mirror of https://github.com/apache/lucene.git
ensure values are sent to codec in sorted order: beef up assertingcodec and add more simple tests
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4765@1444653 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2dcf80718c
commit
7bd948623e
|
@ -46,6 +46,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
||||||
private int currentDoc;
|
private int currentDoc;
|
||||||
private int currentValues[] = new int[8];
|
private int currentValues[] = new int[8];
|
||||||
private int currentUpto = 0;
|
private int currentUpto = 0;
|
||||||
|
private int maxCount = 0;
|
||||||
|
|
||||||
public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
||||||
this.fieldInfo = fieldInfo;
|
this.fieldInfo = fieldInfo;
|
||||||
|
@ -83,7 +84,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
||||||
updateBytesUsed();
|
updateBytesUsed();
|
||||||
}
|
}
|
||||||
|
|
||||||
// finalize currentDoc
|
// finalize currentDoc: this deduplicates the current term ids
|
||||||
private void finishCurrentDoc() {
|
private void finishCurrentDoc() {
|
||||||
Arrays.sort(currentValues, 0, currentUpto);
|
Arrays.sort(currentValues, 0, currentUpto);
|
||||||
int lastValue = -1;
|
int lastValue = -1;
|
||||||
|
@ -99,6 +100,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
||||||
}
|
}
|
||||||
// record the number of unique ords for this doc
|
// record the number of unique ords for this doc
|
||||||
pendingCounts.add(count);
|
pendingCounts.add(count);
|
||||||
|
maxCount = Math.max(maxCount, count);
|
||||||
currentUpto = 0;
|
currentUpto = 0;
|
||||||
currentDoc++;
|
currentDoc++;
|
||||||
}
|
}
|
||||||
|
@ -127,7 +129,9 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
||||||
|
|
||||||
if (currentUpto == currentValues.length) {
|
if (currentUpto == currentValues.length) {
|
||||||
currentValues = ArrayUtil.grow(currentValues, currentValues.length+1);
|
currentValues = ArrayUtil.grow(currentValues, currentValues.length+1);
|
||||||
iwBytesUsed.addAndGet((currentValues.length - currentUpto) * RamUsageEstimator.NUM_BYTES_INT);
|
// reserve additional space for max # values per-doc
|
||||||
|
// when flushing, we need an int[] to sort the mapped-ords within the doc
|
||||||
|
iwBytesUsed.addAndGet((currentValues.length - currentUpto) * 2 * RamUsageEstimator.NUM_BYTES_INT);
|
||||||
}
|
}
|
||||||
|
|
||||||
currentValues[currentUpto] = ord;
|
currentValues[currentUpto] = ord;
|
||||||
|
@ -143,7 +147,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
||||||
@Override
|
@Override
|
||||||
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
|
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
|
||||||
final int maxDoc = state.segmentInfo.getDocCount();
|
final int maxDoc = state.segmentInfo.getDocCount();
|
||||||
|
final int maxCountPerDoc = maxCount;
|
||||||
assert pendingCounts.size() == maxDoc;
|
assert pendingCounts.size() == maxDoc;
|
||||||
final int valueCount = hash.size();
|
final int valueCount = hash.size();
|
||||||
|
|
||||||
|
@ -176,7 +180,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
||||||
new Iterable<Number>() {
|
new Iterable<Number>() {
|
||||||
@Override
|
@Override
|
||||||
public Iterator<Number> iterator() {
|
public Iterator<Number> iterator() {
|
||||||
return new OrdsIterator(ordMap);
|
return new OrdsIterator(ordMap, maxCountPerDoc);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -221,11 +225,17 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
||||||
// iterates over the ords for each doc we have in ram
|
// iterates over the ords for each doc we have in ram
|
||||||
private class OrdsIterator implements Iterator<Number> {
|
private class OrdsIterator implements Iterator<Number> {
|
||||||
final AppendingLongBuffer.Iterator iter = pending.iterator();
|
final AppendingLongBuffer.Iterator iter = pending.iterator();
|
||||||
|
final AppendingLongBuffer.Iterator counts = pendingCounts.iterator();
|
||||||
final int ordMap[];
|
final int ordMap[];
|
||||||
final long numOrds;
|
final long numOrds;
|
||||||
long ordUpto;
|
long ordUpto;
|
||||||
|
|
||||||
OrdsIterator(int ordMap[]) {
|
final int currentDoc[];
|
||||||
|
int currentUpto;
|
||||||
|
int currentLength;
|
||||||
|
|
||||||
|
OrdsIterator(int ordMap[], int maxCount) {
|
||||||
|
this.currentDoc = new int[maxCount];
|
||||||
this.ordMap = ordMap;
|
this.ordMap = ordMap;
|
||||||
this.numOrds = pending.size();
|
this.numOrds = pending.size();
|
||||||
}
|
}
|
||||||
|
@ -240,10 +250,20 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
||||||
if (!hasNext()) {
|
if (!hasNext()) {
|
||||||
throw new NoSuchElementException();
|
throw new NoSuchElementException();
|
||||||
}
|
}
|
||||||
int ord = (int) iter.next();
|
if (currentUpto == currentLength) {
|
||||||
|
// refill next doc, and sort remapped ords within the doc.
|
||||||
|
currentUpto = 0;
|
||||||
|
currentLength = (int) counts.next();
|
||||||
|
for (int i = 0; i < currentLength; i++) {
|
||||||
|
currentDoc[i] = ordMap[(int) iter.next()];
|
||||||
|
}
|
||||||
|
Arrays.sort(currentDoc, 0, currentLength);
|
||||||
|
}
|
||||||
|
int ord = currentDoc[currentUpto];
|
||||||
|
currentUpto++;
|
||||||
ordUpto++;
|
ordUpto++;
|
||||||
// TODO: make reusable Number
|
// TODO: make reusable Number
|
||||||
return ordMap[ord];
|
return ord;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -163,6 +163,39 @@ public class TestDemoDocValue extends LuceneTestCase {
|
||||||
directory.close();
|
directory.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testTwoValuesUnordered() throws IOException {
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
|
||||||
|
// Store the index in memory:
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
// To store an index on disk, use this instead:
|
||||||
|
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
|
||||||
|
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, analyzer);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
|
||||||
|
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
iwriter.close();
|
||||||
|
|
||||||
|
// Now search the index:
|
||||||
|
DirectoryReader ireader = DirectoryReader.open(directory); // read-only=true
|
||||||
|
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
|
||||||
|
OrdIterator oi = dv.getOrds(0, null);
|
||||||
|
assertEquals(0, oi.nextOrd());
|
||||||
|
assertEquals(1, oi.nextOrd());
|
||||||
|
assertEquals(OrdIterator.NO_MORE_ORDS, oi.nextOrd());
|
||||||
|
|
||||||
|
BytesRef bytes = new BytesRef();
|
||||||
|
dv.lookupOrd(0, bytes);
|
||||||
|
assertEquals(new BytesRef("hello"), bytes);
|
||||||
|
|
||||||
|
dv.lookupOrd(1, bytes);
|
||||||
|
assertEquals(new BytesRef("world"), bytes);
|
||||||
|
|
||||||
|
ireader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
public void testThreeValuesTwoDocs() throws IOException {
|
public void testThreeValuesTwoDocs() throws IOException {
|
||||||
Analyzer analyzer = new MockAnalyzer(random());
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
|
||||||
|
@ -213,4 +246,146 @@ public class TestDemoDocValue extends LuceneTestCase {
|
||||||
ireader.close();
|
ireader.close();
|
||||||
directory.close();
|
directory.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testTwoDocumentsLastMissing() throws IOException {
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
|
||||||
|
// Store the index in memory:
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
// To store an index on disk, use this instead:
|
||||||
|
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
|
||||||
|
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||||
|
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
doc = new Document();
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
iwriter.forceMerge(1);
|
||||||
|
iwriter.close();
|
||||||
|
|
||||||
|
// Now search the index:
|
||||||
|
DirectoryReader ireader = DirectoryReader.open(directory); // read-only=true
|
||||||
|
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
|
||||||
|
OrdIterator oi = dv.getOrds(0, null);
|
||||||
|
assertEquals(0, oi.nextOrd());
|
||||||
|
assertEquals(OrdIterator.NO_MORE_ORDS, oi.nextOrd());
|
||||||
|
|
||||||
|
BytesRef bytes = new BytesRef();
|
||||||
|
dv.lookupOrd(0, bytes);
|
||||||
|
assertEquals(new BytesRef("hello"), bytes);
|
||||||
|
|
||||||
|
assertEquals(1, dv.getValueCount());
|
||||||
|
|
||||||
|
ireader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTwoDocumentsLastMissingMerge() throws IOException {
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
|
||||||
|
// Store the index in memory:
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
// To store an index on disk, use this instead:
|
||||||
|
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
|
||||||
|
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||||
|
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
iwriter.commit();
|
||||||
|
doc = new Document();
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
iwriter.forceMerge(1);
|
||||||
|
iwriter.close();
|
||||||
|
|
||||||
|
// Now search the index:
|
||||||
|
DirectoryReader ireader = DirectoryReader.open(directory); // read-only=true
|
||||||
|
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
|
||||||
|
OrdIterator oi = dv.getOrds(0, null);
|
||||||
|
assertEquals(0, oi.nextOrd());
|
||||||
|
assertEquals(OrdIterator.NO_MORE_ORDS, oi.nextOrd());
|
||||||
|
|
||||||
|
BytesRef bytes = new BytesRef();
|
||||||
|
dv.lookupOrd(0, bytes);
|
||||||
|
assertEquals(new BytesRef("hello"), bytes);
|
||||||
|
|
||||||
|
assertEquals(1, dv.getValueCount());
|
||||||
|
|
||||||
|
ireader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTwoDocumentsFirstMissing() throws IOException {
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
|
||||||
|
// Store the index in memory:
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
// To store an index on disk, use this instead:
|
||||||
|
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
|
||||||
|
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||||
|
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||||
|
Document doc = new Document();
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
iwriter.forceMerge(1);
|
||||||
|
iwriter.close();
|
||||||
|
|
||||||
|
// Now search the index:
|
||||||
|
DirectoryReader ireader = DirectoryReader.open(directory); // read-only=true
|
||||||
|
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
|
||||||
|
OrdIterator oi = dv.getOrds(1, null);
|
||||||
|
assertEquals(0, oi.nextOrd());
|
||||||
|
assertEquals(OrdIterator.NO_MORE_ORDS, oi.nextOrd());
|
||||||
|
|
||||||
|
BytesRef bytes = new BytesRef();
|
||||||
|
dv.lookupOrd(0, bytes);
|
||||||
|
assertEquals(new BytesRef("hello"), bytes);
|
||||||
|
|
||||||
|
assertEquals(1, dv.getValueCount());
|
||||||
|
|
||||||
|
ireader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTwoDocumentsFirstMissingMerge() throws IOException {
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
|
||||||
|
// Store the index in memory:
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
// To store an index on disk, use this instead:
|
||||||
|
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
|
||||||
|
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||||
|
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||||
|
Document doc = new Document();
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
iwriter.commit();
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
iwriter.forceMerge(1);
|
||||||
|
iwriter.close();
|
||||||
|
|
||||||
|
// Now search the index:
|
||||||
|
DirectoryReader ireader = DirectoryReader.open(directory); // read-only=true
|
||||||
|
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
|
||||||
|
OrdIterator oi = dv.getOrds(1, null);
|
||||||
|
assertEquals(0, oi.nextOrd());
|
||||||
|
assertEquals(OrdIterator.NO_MORE_ORDS, oi.nextOrd());
|
||||||
|
|
||||||
|
BytesRef bytes = new BytesRef();
|
||||||
|
dv.lookupOrd(0, bytes);
|
||||||
|
assertEquals(new BytesRef("hello"), bytes);
|
||||||
|
|
||||||
|
assertEquals(1, dv.getValueCount());
|
||||||
|
|
||||||
|
ireader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -130,12 +130,53 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
|
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
|
||||||
// nocommit: add checks
|
long valueCount = 0;
|
||||||
|
BytesRef lastValue = null;
|
||||||
|
for (BytesRef b : values) {
|
||||||
|
assert b != null;
|
||||||
|
assert b.isValid();
|
||||||
|
if (valueCount > 0) {
|
||||||
|
assert b.compareTo(lastValue) > 0;
|
||||||
|
}
|
||||||
|
lastValue = BytesRef.deepCopyOf(b);
|
||||||
|
valueCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
int docCount = 0;
|
||||||
|
long ordCount = 0;
|
||||||
|
// nocommit
|
||||||
|
FixedBitSet seenOrds = new FixedBitSet((int)valueCount);
|
||||||
|
Iterator<Number> ordIterator = ords.iterator();
|
||||||
|
for (Number v : docToOrdCount) {
|
||||||
|
assert v != null;
|
||||||
|
int count = v.intValue();
|
||||||
|
assert count >= 0;
|
||||||
|
docCount++;
|
||||||
|
ordCount += count;
|
||||||
|
|
||||||
|
long lastOrd = -1;
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
Number o = ordIterator.next();
|
||||||
|
assert o != null;
|
||||||
|
long ord = o.longValue();
|
||||||
|
assert ord >= 0 && ord < valueCount;
|
||||||
|
assert ord > lastOrd : "ord=" + ord + ",lastOrd=" + lastOrd;
|
||||||
|
seenOrds.set((int)ord); // nocommit
|
||||||
|
lastOrd = ord;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert ordIterator.hasNext() == false;
|
||||||
|
|
||||||
|
assert docCount == maxDoc;
|
||||||
|
assert seenOrds.cardinality() == valueCount;
|
||||||
|
checkIterator(values.iterator(), valueCount);
|
||||||
|
checkIterator(docToOrdCount.iterator(), maxDoc);
|
||||||
|
checkIterator(ords.iterator(), ordCount);
|
||||||
in.addSortedSetField(field, values, docToOrdCount, ords);
|
in.addSortedSetField(field, values, docToOrdCount, ords);
|
||||||
}
|
}
|
||||||
|
|
||||||
private <T> void checkIterator(Iterator<T> iterator, int expectedSize) {
|
private <T> void checkIterator(Iterator<T> iterator, long expectedSize) {
|
||||||
for (int i = 0; i < expectedSize; i++) {
|
for (long i = 0; i < expectedSize; i++) {
|
||||||
boolean hasNext = iterator.hasNext();
|
boolean hasNext = iterator.hasNext();
|
||||||
assert hasNext;
|
assert hasNext;
|
||||||
T v = iterator.next();
|
T v = iterator.next();
|
||||||
|
|
Loading…
Reference in New Issue