fix failing tests w/ ST codec

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1410945 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-11-18 18:02:42 +00:00
parent b59bf27886
commit d963fb1664
9 changed files with 123 additions and 57 deletions

View File

@ -174,6 +174,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
@Override
public void add(long value) throws IOException {
assert value >= minValue;
Number delta = BigInteger.valueOf(value).subtract(BigInteger.valueOf(minValue));
SimpleTextUtil.write(data, encoder.format(delta), scratch);
SimpleTextUtil.writeNewline(data);
@ -237,7 +238,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
// nocommit
@Override
public SortedDocValuesConsumer addSortedField(FieldInfo field, int valueCount, boolean fixedLength, final int maxLength) throws IOException {
public SortedDocValuesConsumer addSortedField(FieldInfo field, final int valueCount, boolean fixedLength, final int maxLength) throws IOException {
writeFieldEntry(field);
// write numValues
SimpleTextUtil.write(data, NUMVALUES);
@ -274,6 +275,9 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
final DecimalFormat ordEncoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
return new SortedDocValuesConsumer() {
// for asserts:
private int valuesSeen;
@Override
public void addValue(BytesRef value) throws IOException {
@ -291,6 +295,8 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
data.writeByte((byte)' ');
}
SimpleTextUtil.writeNewline(data);
valuesSeen++;
assert valuesSeen <= valueCount;
}
@Override
@ -356,11 +362,12 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
final Map<String,OneField> fields = new HashMap<String,OneField>();
SimpleTextDocValuesReader(FieldInfos fieldInfos, Directory dir, SegmentInfo si, IOContext context) throws IOException {
System.out.println("dir=" + dir + " seg=" + si.name);
//System.out.println("dir=" + dir + " seg=" + si.name);
data = dir.openInput(IndexFileNames.segmentFileName(si.name, "", "dat"), context);
maxDoc = si.getDocCount();
while(true) {
readLine();
//System.out.println("READ field=" + scratch.utf8ToString());
if (scratch.equals(END)) {
break;
}
@ -571,6 +578,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
/** Used only in ctor: */
private void readLine() throws IOException {
SimpleTextUtil.readLine(data, scratch);
//System.out.println("line: " + scratch.utf8ToString());
}
/** Used only in ctor: */

View File

@ -33,7 +33,7 @@ class BytesDVWriter {
private final BytesRefArray bytesRefArray;
private final FieldInfo fieldInfo;
private int addeValues = 0;
private int addedValues = 0;
private final BytesRef emptyBytesRef = new BytesRef();
// -2 means not set yet; -1 means length isn't fixed;
@ -56,12 +56,12 @@ class BytesDVWriter {
mergeLength(value.length);
// Fill in any holes:
while(addeValues < docID) {
addeValues++;
while(addedValues < docID) {
addedValues++;
bytesRefArray.append(emptyBytesRef);
mergeLength(0);
}
addeValues++;
addedValues++;
bytesRefArray.append(value);
}
@ -75,8 +75,14 @@ class BytesDVWriter {
totalSize += length;
}
public void finish(int maxDoc) {
if (addedValues < maxDoc) {
mergeLength(0);
}
}
public void flush(FieldInfo fieldInfo, SegmentWriteState state, BinaryDocValuesConsumer consumer) throws IOException {
final int bufferedDocCount = addeValues;
final int bufferedDocCount = addedValues;
BytesRef value = new BytesRef();
for(int docID=0;docID<bufferedDocCount;docID++) {
bytesRefArray.get(value, docID);

View File

@ -110,6 +110,7 @@ final class DocFieldProcessor extends DocConsumer {
}
if (field.bytesDVWriter != null) {
field.bytesDVWriter.finish(state.segmentInfo.getDocCount());
field.bytesDVWriter.flush(field.fieldInfo, state,
dvConsumer.addBinaryField(field.fieldInfo,
field.bytesDVWriter.fixedLength >= 0,
@ -119,6 +120,7 @@ final class DocFieldProcessor extends DocConsumer {
}
if (field.sortedBytesDVWriter != null) {
field.sortedBytesDVWriter.finish(state.segmentInfo.getDocCount());
field.sortedBytesDVWriter.flush(field.fieldInfo, state,
dvConsumer.addSortedField(field.fieldInfo,
field.sortedBytesDVWriter.hash.size(),
@ -129,6 +131,7 @@ final class DocFieldProcessor extends DocConsumer {
}
if (field.numberDVWriter != null) {
field.numberDVWriter.finish(state.segmentInfo.getDocCount());
field.numberDVWriter.flush(field.fieldInfo, state,
dvConsumer.addNumericField(field.fieldInfo,
field.numberDVWriter.minValue,

View File

@ -80,6 +80,12 @@ class NumberDVWriter {
}
}
public void finish(int maxDoc) {
if (pending.size() < maxDoc) {
mergeValue(0);
}
}
public void flush(FieldInfo fieldInfo, SegmentWriteState state, NumericDocValuesConsumer consumer) throws IOException {
final int bufferedDocCount = pending.size();

View File

@ -73,6 +73,13 @@ class SortedBytesDVWriter {
addOneValue(value);
}
public void finish(int maxDoc) {
if (pendingIndex < maxDoc) {
addOneValue(EMPTY);
mergeLength(0);
}
}
private void addOneValue(BytesRef value) {
mergeLength(value.length);
int ord = hash.add(value);

View File

@ -557,7 +557,8 @@ public interface FieldCache {
public abstract int getOrd(int docID);
/** Returns total unique ord count; this includes +1 for
* the null ord (always 0). */
* the null ord (always 0) unless the field was
* indexed with doc values. */
public abstract int numOrd();
/** Number of documents */

View File

@ -1358,12 +1358,12 @@ public abstract class FieldComparator<T> {
// Used per-segment when bit width is not a native array
// size (8, 16, 32):
private final class AnyOrdComparator extends PerSegmentComparator {
private final class AnyDocToOrdComparator extends PerSegmentComparator {
private final PackedInts.Reader readerOrds;
private final DocTermsIndex termsIndex;
private final int docBase;
public AnyOrdComparator(PackedInts.Reader readerOrds, DocTermsIndex termsIndex, int docBase) {
public AnyDocToOrdComparator(PackedInts.Reader readerOrds, DocTermsIndex termsIndex, int docBase) {
this.readerOrds = readerOrds;
this.termsIndex = termsIndex;
this.docBase = docBase;
@ -1403,13 +1403,57 @@ public abstract class FieldComparator<T> {
}
}
// Used per-segment when docToOrd is null:
private final class AnyOrdComparator extends PerSegmentComparator {
private final DocTermsIndex termsIndex;
private final int docBase;
public AnyOrdComparator(DocTermsIndex termsIndex, int docBase) {
this.termsIndex = termsIndex;
this.docBase = docBase;
}
@Override
public int compareBottom(int doc) {
assert bottomSlot != -1;
final int docOrd = (int) termsIndex.getOrd(doc);
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
// setBottom):
return 1;
} else {
return -1;
}
}
@Override
public void copy(int slot, int doc) {
final int ord = (int) termsIndex.getOrd(doc);
ords[slot] = ord;
if (ord == 0) {
values[slot] = null;
} else {
assert ord > 0;
if (values[slot] == null) {
values[slot] = new BytesRef();
}
termsIndex.lookup(ord, values[slot]);
}
readerGen[slot] = currentReaderGen;
}
}
@Override
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
final int docBase = context.docBase;
termsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
final PackedInts.Reader docToOrd = termsIndex.getDocToOrd();
FieldComparator<BytesRef> perSegComp = null;
if (docToOrd.hasArray()) {
if (docToOrd != null && docToOrd.hasArray()) {
final Object arr = docToOrd.getArray();
if (arr instanceof byte[]) {
perSegComp = new ByteOrdComparator((byte[]) arr, termsIndex, docBase);
@ -1423,7 +1467,11 @@ public abstract class FieldComparator<T> {
// every one having a unique value.
}
if (perSegComp == null) {
perSegComp = new AnyOrdComparator(docToOrd, termsIndex, docBase);
if (docToOrd != null) {
perSegComp = new AnyDocToOrdComparator(docToOrd, termsIndex, docBase);
} else {
perSegComp = new AnyOrdComparator(termsIndex, docBase);
}
}
currentReaderGen++;

View File

@ -56,10 +56,7 @@ public class TestDemoDocValue extends LuceneTestCase {
public void testDemoNumber() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
@ -94,10 +91,7 @@ public class TestDemoDocValue extends LuceneTestCase {
public void testDemoFloat() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
@ -132,10 +126,7 @@ public class TestDemoDocValue extends LuceneTestCase {
public void testDemoTwoFieldsNumber() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
@ -173,10 +164,7 @@ public class TestDemoDocValue extends LuceneTestCase {
public void testDemoTwoFieldsMixed() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
@ -216,10 +204,7 @@ public class TestDemoDocValue extends LuceneTestCase {
public void testDemoThreeFieldsMixed() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
@ -264,10 +249,7 @@ public class TestDemoDocValue extends LuceneTestCase {
public void testDemoThreeFieldsMixed2() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
@ -312,10 +294,7 @@ public class TestDemoDocValue extends LuceneTestCase {
public void testTwoDocumentsNumeric() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.setMergePolicy(newLogMergePolicy());
@ -343,10 +322,7 @@ public class TestDemoDocValue extends LuceneTestCase {
public void testTwoDocumentsMerged() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.setMergePolicy(newLogMergePolicy());
@ -385,10 +361,7 @@ public class TestDemoDocValue extends LuceneTestCase {
public void testBigRange() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.setMergePolicy(newLogMergePolicy());
@ -416,10 +389,7 @@ public class TestDemoDocValue extends LuceneTestCase {
public void testDemoBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
@ -458,10 +428,7 @@ public class TestDemoDocValue extends LuceneTestCase {
public void testBytesTwoDocumentsMerged() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.setMergePolicy(newLogMergePolicy());
@ -502,10 +469,7 @@ public class TestDemoDocValue extends LuceneTestCase {
public void testDemoSortedBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
@ -542,10 +506,7 @@ public class TestDemoDocValue extends LuceneTestCase {
public void testSortedBytesTwoDocuments() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.setMergePolicy(newLogMergePolicy());
@ -576,10 +537,7 @@ public class TestDemoDocValue extends LuceneTestCase {
public void testSortedBytesTwoDocumentsMerged() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.setMergePolicy(newLogMergePolicy());
@ -620,10 +578,7 @@ public class TestDemoDocValue extends LuceneTestCase {
public void testBytesWithNewline() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.setMergePolicy(newLogMergePolicy());
@ -644,4 +599,32 @@ public class TestDemoDocValue extends LuceneTestCase {
ireader.close();
directory.close();
}
public void testMissingSortedBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter iwriter = new IndexWriter(directory, iwc);
Document doc = new Document();
doc.add(new SortedBytesDocValuesField("dv", new BytesRef("hello world 2")));
iwriter.addDocument(doc);
// 2nd doc missing the DV field
iwriter.addDocument(new Document());
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
BytesRef scratch = new BytesRef();
dv.lookupOrd(dv.getOrd(0), scratch);
assertEquals(new BytesRef("hello world 2"), scratch);
dv.lookupOrd(dv.getOrd(1), scratch);
assertEquals(new BytesRef(""), scratch);
ireader.close();
directory.close();
}
}

View File

@ -48,6 +48,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.StorableField;
@ -130,6 +131,9 @@ public class TestSort extends LuceneTestCase {
dirs.add(indexStore);
RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
// nocommit remove:
((LogMergePolicy) writer.w.getConfig().getMergePolicy()).setUseCompoundFile(false);
final DocValues.Type stringDVType;
if (dvStringSorted) {
// Index sorted