diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java index 72960ae601a..053861a25d5 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java @@ -174,6 +174,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat { @Override public void add(long value) throws IOException { + assert value >= minValue; Number delta = BigInteger.valueOf(value).subtract(BigInteger.valueOf(minValue)); SimpleTextUtil.write(data, encoder.format(delta), scratch); SimpleTextUtil.writeNewline(data); @@ -237,7 +238,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat { // nocommit @Override - public SortedDocValuesConsumer addSortedField(FieldInfo field, int valueCount, boolean fixedLength, final int maxLength) throws IOException { + public SortedDocValuesConsumer addSortedField(FieldInfo field, final int valueCount, boolean fixedLength, final int maxLength) throws IOException { writeFieldEntry(field); // write numValues SimpleTextUtil.write(data, NUMVALUES); @@ -274,6 +275,9 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat { final DecimalFormat ordEncoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT)); return new SortedDocValuesConsumer() { + + // for asserts: + private int valuesSeen; @Override public void addValue(BytesRef value) throws IOException { @@ -291,6 +295,8 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat { data.writeByte((byte)' '); } SimpleTextUtil.writeNewline(data); + valuesSeen++; + assert valuesSeen <= valueCount; } @Override @@ -356,11 +362,12 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat { final Map fields = new HashMap(); SimpleTextDocValuesReader(FieldInfos fieldInfos, Directory dir, SegmentInfo si, IOContext context) throws IOException { - System.out.println("dir=" + dir + " seg=" + si.name); + //System.out.println("dir=" + dir + " seg=" + si.name); data = dir.openInput(IndexFileNames.segmentFileName(si.name, "", "dat"), context); maxDoc = si.getDocCount(); while(true) { readLine(); + //System.out.println("READ field=" + scratch.utf8ToString()); if (scratch.equals(END)) { break; } @@ -571,6 +578,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat { /** Used only in ctor: */ private void readLine() throws IOException { SimpleTextUtil.readLine(data, scratch); + //System.out.println("line: " + scratch.utf8ToString()); } /** Used only in ctor: */ diff --git a/lucene/core/src/java/org/apache/lucene/index/BytesDVWriter.java b/lucene/core/src/java/org/apache/lucene/index/BytesDVWriter.java index 9ab18f55060..72f9853f4bc 100644 --- a/lucene/core/src/java/org/apache/lucene/index/BytesDVWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/BytesDVWriter.java @@ -33,7 +33,7 @@ class BytesDVWriter { private final BytesRefArray bytesRefArray; private final FieldInfo fieldInfo; - private int addeValues = 0; + private int addedValues = 0; private final BytesRef emptyBytesRef = new BytesRef(); // -2 means not set yet; -1 means length isn't fixed; @@ -56,12 +56,12 @@ class BytesDVWriter { mergeLength(value.length); // Fill in any holes: - while(addeValues < docID) { - addeValues++; + while(addedValues < docID) { + addedValues++; bytesRefArray.append(emptyBytesRef); mergeLength(0); } - addeValues++; + addedValues++; bytesRefArray.append(value); } @@ -75,8 +75,14 @@ class BytesDVWriter { totalSize += length; } + public void finish(int maxDoc) { + if (addedValues < maxDoc) { + mergeLength(0); + } + } + public void flush(FieldInfo fieldInfo, SegmentWriteState state, BinaryDocValuesConsumer consumer) throws IOException { - final int bufferedDocCount = addeValues; + final int bufferedDocCount = addedValues; BytesRef value = new BytesRef(); for(int docID=0;docID= 0, @@ -119,6 +120,7 @@ final class DocFieldProcessor extends DocConsumer { } if (field.sortedBytesDVWriter != null) { + field.sortedBytesDVWriter.finish(state.segmentInfo.getDocCount()); field.sortedBytesDVWriter.flush(field.fieldInfo, state, dvConsumer.addSortedField(field.fieldInfo, field.sortedBytesDVWriter.hash.size(), @@ -129,6 +131,7 @@ final class DocFieldProcessor extends DocConsumer { } if (field.numberDVWriter != null) { + field.numberDVWriter.finish(state.segmentInfo.getDocCount()); field.numberDVWriter.flush(field.fieldInfo, state, dvConsumer.addNumericField(field.fieldInfo, field.numberDVWriter.minValue, diff --git a/lucene/core/src/java/org/apache/lucene/index/NumberDVWriter.java b/lucene/core/src/java/org/apache/lucene/index/NumberDVWriter.java index dfe8a16efef..2a83458dc86 100644 --- a/lucene/core/src/java/org/apache/lucene/index/NumberDVWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/NumberDVWriter.java @@ -80,6 +80,12 @@ class NumberDVWriter { } } + public void finish(int maxDoc) { + if (pending.size() < maxDoc) { + mergeValue(0); + } + } + public void flush(FieldInfo fieldInfo, SegmentWriteState state, NumericDocValuesConsumer consumer) throws IOException { final int bufferedDocCount = pending.size(); diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedBytesDVWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedBytesDVWriter.java index 7cc5c3750bf..55e0c43f6e0 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedBytesDVWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedBytesDVWriter.java @@ -73,6 +73,13 @@ class SortedBytesDVWriter { addOneValue(value); } + public void finish(int maxDoc) { + if (pendingIndex < maxDoc) { + addOneValue(EMPTY); + mergeLength(0); + } + } + private void addOneValue(BytesRef value) { mergeLength(value.length); int ord = hash.add(value); diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java index 23684819d32..6b5d45ad282 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java @@ -557,7 +557,8 @@ public interface FieldCache { public abstract int getOrd(int docID); /** Returns total unique ord count; this includes +1 for - * the null ord (always 0). */ + * the null ord (always 0) unless the field was + * indexed with doc values. */ public abstract int numOrd(); /** Number of documents */ diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java index 30d1b011a0c..8145501580f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java @@ -1358,12 +1358,12 @@ public abstract class FieldComparator { // Used per-segment when bit width is not a native array // size (8, 16, 32): - private final class AnyOrdComparator extends PerSegmentComparator { + private final class AnyDocToOrdComparator extends PerSegmentComparator { private final PackedInts.Reader readerOrds; private final DocTermsIndex termsIndex; private final int docBase; - public AnyOrdComparator(PackedInts.Reader readerOrds, DocTermsIndex termsIndex, int docBase) { + public AnyDocToOrdComparator(PackedInts.Reader readerOrds, DocTermsIndex termsIndex, int docBase) { this.readerOrds = readerOrds; this.termsIndex = termsIndex; this.docBase = docBase; @@ -1403,13 +1403,57 @@ public abstract class FieldComparator { } } + // Used per-segment when docToOrd is null: + private final class AnyOrdComparator extends PerSegmentComparator { + private final DocTermsIndex termsIndex; + private final int docBase; + + public AnyOrdComparator(DocTermsIndex termsIndex, int docBase) { + this.termsIndex = termsIndex; + this.docBase = docBase; + } + + @Override + public int compareBottom(int doc) { + assert bottomSlot != -1; + final int docOrd = (int) termsIndex.getOrd(doc); + if (bottomSameReader) { + // ord is precisely comparable, even in the equal case + return bottomOrd - docOrd; + } else if (bottomOrd >= docOrd) { + // the equals case always means bottom is > doc + // (because we set bottomOrd to the lower bound in + // setBottom): + return 1; + } else { + return -1; + } + } + + @Override + public void copy(int slot, int doc) { + final int ord = (int) termsIndex.getOrd(doc); + ords[slot] = ord; + if (ord == 0) { + values[slot] = null; + } else { + assert ord > 0; + if (values[slot] == null) { + values[slot] = new BytesRef(); + } + termsIndex.lookup(ord, values[slot]); + } + readerGen[slot] = currentReaderGen; + } + } + @Override public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { final int docBase = context.docBase; termsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), field); final PackedInts.Reader docToOrd = termsIndex.getDocToOrd(); FieldComparator perSegComp = null; - if (docToOrd.hasArray()) { + if (docToOrd != null && docToOrd.hasArray()) { final Object arr = docToOrd.getArray(); if (arr instanceof byte[]) { perSegComp = new ByteOrdComparator((byte[]) arr, termsIndex, docBase); @@ -1423,7 +1467,11 @@ public abstract class FieldComparator { // every one having a unique value. } if (perSegComp == null) { - perSegComp = new AnyOrdComparator(docToOrd, termsIndex, docBase); + if (docToOrd != null) { + perSegComp = new AnyDocToOrdComparator(docToOrd, termsIndex, docBase); + } else { + perSegComp = new AnyOrdComparator(termsIndex, docBase); + } } currentReaderGen++; diff --git a/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java b/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java index c881105b658..2347e073b6f 100644 --- a/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java +++ b/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java @@ -56,10 +56,7 @@ public class TestDemoDocValue extends LuceneTestCase { public void testDemoNumber() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); - // Store the index in memory: Directory directory = newDirectory(); - // To store an index on disk, use this instead: - // Directory directory = FSDirectory.open(new File("/tmp/testindex")); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); @@ -94,10 +91,7 @@ public class TestDemoDocValue extends LuceneTestCase { public void testDemoFloat() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); - // Store the index in memory: Directory directory = newDirectory(); - // To store an index on disk, use this instead: - // Directory directory = FSDirectory.open(new File("/tmp/testindex")); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); @@ -132,10 +126,7 @@ public class TestDemoDocValue extends LuceneTestCase { public void testDemoTwoFieldsNumber() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); - // Store the index in memory: Directory directory = newDirectory(); - // To store an index on disk, use this instead: - // Directory directory = FSDirectory.open(new File("/tmp/testindex")); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); @@ -173,10 +164,7 @@ public class TestDemoDocValue extends LuceneTestCase { public void testDemoTwoFieldsMixed() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); - // Store the index in memory: Directory directory = newDirectory(); - // To store an index on disk, use this instead: - // Directory directory = FSDirectory.open(new File("/tmp/testindex")); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); @@ -216,10 +204,7 @@ public class TestDemoDocValue extends LuceneTestCase { public void testDemoThreeFieldsMixed() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); - // Store the index in memory: Directory directory = newDirectory(); - // To store an index on disk, use this instead: - // Directory directory = FSDirectory.open(new File("/tmp/testindex")); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); @@ -264,10 +249,7 @@ public class TestDemoDocValue extends LuceneTestCase { public void testDemoThreeFieldsMixed2() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); - // Store the index in memory: Directory directory = newDirectory(); - // To store an index on disk, use this instead: - // Directory directory = FSDirectory.open(new File("/tmp/testindex")); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); @@ -312,10 +294,7 @@ public class TestDemoDocValue extends LuceneTestCase { public void testTwoDocumentsNumeric() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); - // Store the index in memory: Directory directory = newDirectory(); - // To store an index on disk, use this instead: - // Directory directory = FSDirectory.open(new File("/tmp/testindex")); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.setMergePolicy(newLogMergePolicy()); @@ -343,10 +322,7 @@ public class TestDemoDocValue extends LuceneTestCase { public void testTwoDocumentsMerged() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); - // Store the index in memory: Directory directory = newDirectory(); - // To store an index on disk, use this instead: - // Directory directory = FSDirectory.open(new File("/tmp/testindex")); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.setMergePolicy(newLogMergePolicy()); @@ -385,10 +361,7 @@ public class TestDemoDocValue extends LuceneTestCase { public void testBigRange() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); - // Store the index in memory: Directory directory = newDirectory(); - // To store an index on disk, use this instead: - // Directory directory = FSDirectory.open(new File("/tmp/testindex")); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.setMergePolicy(newLogMergePolicy()); @@ -416,10 +389,7 @@ public class TestDemoDocValue extends LuceneTestCase { public void testDemoBytes() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); - // Store the index in memory: Directory directory = newDirectory(); - // To store an index on disk, use this instead: - // Directory directory = FSDirectory.open(new File("/tmp/testindex")); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); @@ -458,10 +428,7 @@ public class TestDemoDocValue extends LuceneTestCase { public void testBytesTwoDocumentsMerged() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); - // Store the index in memory: Directory directory = newDirectory(); - // To store an index on disk, use this instead: - // Directory directory = FSDirectory.open(new File("/tmp/testindex")); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.setMergePolicy(newLogMergePolicy()); @@ -502,10 +469,7 @@ public class TestDemoDocValue extends LuceneTestCase { public void testDemoSortedBytes() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); - // Store the index in memory: Directory directory = newDirectory(); - // To store an index on disk, use this instead: - // Directory directory = FSDirectory.open(new File("/tmp/testindex")); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); @@ -542,10 +506,7 @@ public class TestDemoDocValue extends LuceneTestCase { public void testSortedBytesTwoDocuments() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); - // Store the index in memory: Directory directory = newDirectory(); - // To store an index on disk, use this instead: - // Directory directory = FSDirectory.open(new File("/tmp/testindex")); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.setMergePolicy(newLogMergePolicy()); @@ -576,10 +537,7 @@ public class TestDemoDocValue extends LuceneTestCase { public void testSortedBytesTwoDocumentsMerged() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); - // Store the index in memory: Directory directory = newDirectory(); - // To store an index on disk, use this instead: - // Directory directory = FSDirectory.open(new File("/tmp/testindex")); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.setMergePolicy(newLogMergePolicy()); @@ -620,10 +578,7 @@ public class TestDemoDocValue extends LuceneTestCase { public void testBytesWithNewline() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); - // Store the index in memory: Directory directory = newDirectory(); - // To store an index on disk, use this instead: - // Directory directory = FSDirectory.open(new File("/tmp/testindex")); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.setMergePolicy(newLogMergePolicy()); @@ -644,4 +599,32 @@ public class TestDemoDocValue extends LuceneTestCase { ireader.close(); directory.close(); } + + public void testMissingSortedBytes() throws IOException { + Analyzer analyzer = new MockAnalyzer(random()); + + Directory directory = newDirectory(); + // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); + iwc.setMergePolicy(newLogMergePolicy()); + IndexWriter iwriter = new IndexWriter(directory, iwc); + Document doc = new Document(); + doc.add(new SortedBytesDocValuesField("dv", new BytesRef("hello world 2"))); + iwriter.addDocument(doc); + // 2nd doc missing the DV field + iwriter.addDocument(new Document()); + iwriter.close(); + + // Now search the index: + IndexReader ireader = DirectoryReader.open(directory); // read-only=true + assert ireader.leaves().size() == 1; + SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv"); + BytesRef scratch = new BytesRef(); + dv.lookupOrd(dv.getOrd(0), scratch); + assertEquals(new BytesRef("hello world 2"), scratch); + dv.lookupOrd(dv.getOrd(1), scratch); + assertEquals(new BytesRef(""), scratch); + ireader.close(); + directory.close(); + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSort.java b/lucene/core/src/test/org/apache/lucene/search/TestSort.java index d81b89053e8..275ad1b9363 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSort.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSort.java @@ -48,6 +48,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.StorableField; @@ -130,6 +131,9 @@ public class TestSort extends LuceneTestCase { dirs.add(indexStore); RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); + // nocommit remove: + ((LogMergePolicy) writer.w.getConfig().getMergePolicy()).setUseCompoundFile(false); + final DocValues.Type stringDVType; if (dvStringSorted) { // Index sorted