fix failing tests w/ ST codec

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1410945 13f79535-47bb-0310-9956-ffa450edef68
2012-11-18 18:02:42 +00:00 · 2012-11-18 18:02:42 +00:00 · d963fb1664
parent b59bf27886
commit d963fb1664
9 changed files with 123 additions and 57 deletions
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
@ -174,6 +174,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {

        @Override
        public void add(long value) throws IOException {
+          assert value >= minValue;
          Number delta = BigInteger.valueOf(value).subtract(BigInteger.valueOf(minValue));
          SimpleTextUtil.write(data, encoder.format(delta), scratch);
          SimpleTextUtil.writeNewline(data);
@ -237,7 +238,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
    
    // nocommit
    @Override
-    public SortedDocValuesConsumer addSortedField(FieldInfo field, int valueCount, boolean fixedLength, final int maxLength) throws IOException {
+    public SortedDocValuesConsumer addSortedField(FieldInfo field, final int valueCount, boolean fixedLength, final int maxLength) throws IOException {
      writeFieldEntry(field);
      // write numValues
      SimpleTextUtil.write(data, NUMVALUES);
@ -274,6 +275,9 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
      final DecimalFormat ordEncoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));

      return new SortedDocValuesConsumer() {
+
+        // for asserts:
+        private int valuesSeen;
        
        @Override
        public void addValue(BytesRef value) throws IOException {
@ -291,6 +295,8 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
            data.writeByte((byte)' ');
          }
          SimpleTextUtil.writeNewline(data);
+          valuesSeen++;
+          assert valuesSeen <= valueCount;
        }

        @Override
@ -356,11 +362,12 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
    final Map<String,OneField> fields = new HashMap<String,OneField>();
    
    SimpleTextDocValuesReader(FieldInfos fieldInfos, Directory dir, SegmentInfo si, IOContext context) throws IOException {
-      System.out.println("dir=" + dir + " seg=" + si.name);
+      //System.out.println("dir=" + dir + " seg=" + si.name);
      data = dir.openInput(IndexFileNames.segmentFileName(si.name, "", "dat"), context);
      maxDoc = si.getDocCount();
      while(true) {
        readLine();
+        //System.out.println("READ field=" + scratch.utf8ToString());
        if (scratch.equals(END)) {
          break;
        }
@ -571,6 +578,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
    /** Used only in ctor: */
    private void readLine() throws IOException {
      SimpleTextUtil.readLine(data, scratch);
+      //System.out.println("line: " + scratch.utf8ToString());
    }

    /** Used only in ctor: */
--- a/lucene/core/src/java/org/apache/lucene/index/BytesDVWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BytesDVWriter.java
@ -33,7 +33,7 @@ class BytesDVWriter {

  private final BytesRefArray bytesRefArray;
  private final FieldInfo fieldInfo;
-  private int addeValues = 0;
+  private int addedValues = 0;
  private final BytesRef emptyBytesRef = new BytesRef();

  // -2 means not set yet; -1 means length isn't fixed;
@ -56,12 +56,12 @@ class BytesDVWriter {
    mergeLength(value.length);
    
    // Fill in any holes:
-    while(addeValues < docID) {
-      addeValues++;
+    while(addedValues < docID) {
+      addedValues++;
      bytesRefArray.append(emptyBytesRef);
      mergeLength(0);
    }
-    addeValues++;
+    addedValues++;
    bytesRefArray.append(value);
  }

@ -75,8 +75,14 @@ class BytesDVWriter {
    totalSize += length;
  }

+  public void finish(int maxDoc) {
+    if (addedValues < maxDoc) {
+      mergeLength(0);
+    }
+  }
+
  public void flush(FieldInfo fieldInfo, SegmentWriteState state, BinaryDocValuesConsumer consumer) throws IOException {
-    final int bufferedDocCount = addeValues;
+    final int bufferedDocCount = addedValues;
    BytesRef value = new BytesRef();
    for(int docID=0;docID<bufferedDocCount;docID++) {
      bytesRefArray.get(value, docID);
--- a/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java
@ -110,6 +110,7 @@ final class DocFieldProcessor extends DocConsumer {
          }

          if (field.bytesDVWriter != null) {
+            field.bytesDVWriter.finish(state.segmentInfo.getDocCount());
            field.bytesDVWriter.flush(field.fieldInfo, state,
                                      dvConsumer.addBinaryField(field.fieldInfo,
                                                                field.bytesDVWriter.fixedLength >= 0,
@ -119,6 +120,7 @@ final class DocFieldProcessor extends DocConsumer {
          }

          if (field.sortedBytesDVWriter != null) {
+            field.sortedBytesDVWriter.finish(state.segmentInfo.getDocCount());
            field.sortedBytesDVWriter.flush(field.fieldInfo, state,
                                            dvConsumer.addSortedField(field.fieldInfo,
                                                                      field.sortedBytesDVWriter.hash.size(),
@ -129,6 +131,7 @@ final class DocFieldProcessor extends DocConsumer {
          }

          if (field.numberDVWriter != null) {
+            field.numberDVWriter.finish(state.segmentInfo.getDocCount());
            field.numberDVWriter.flush(field.fieldInfo, state,
                                       dvConsumer.addNumericField(field.fieldInfo,
                                                                  field.numberDVWriter.minValue,
--- a/lucene/core/src/java/org/apache/lucene/index/NumberDVWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NumberDVWriter.java
@ -80,6 +80,12 @@ class NumberDVWriter {
    }
  }

+  public void finish(int maxDoc) {
+    if (pending.size() < maxDoc) {
+      mergeValue(0);
+    }
+  }
+
  public void flush(FieldInfo fieldInfo, SegmentWriteState state, NumericDocValuesConsumer consumer) throws IOException {
    final int bufferedDocCount = pending.size();

--- a/lucene/core/src/java/org/apache/lucene/index/SortedBytesDVWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedBytesDVWriter.java
@ -73,6 +73,13 @@ class SortedBytesDVWriter {
    addOneValue(value);
  }

+  public void finish(int maxDoc) {
+    if (pendingIndex < maxDoc) {
+      addOneValue(EMPTY);
+      mergeLength(0);
+    }
+  }
+
  private void addOneValue(BytesRef value) {
    mergeLength(value.length);
    int ord = hash.add(value);
--- a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
@ -557,7 +557,8 @@ public interface FieldCache {
    public abstract int getOrd(int docID);

    /** Returns total unique ord count; this includes +1 for
-     *  the null ord (always 0). */
+     *  the null ord (always 0) unless the field was
+     *  indexed with doc values. */
    public abstract int numOrd();

    /** Number of documents */
--- a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
@ -1358,12 +1358,12 @@ public abstract class FieldComparator<T> {

    // Used per-segment when bit width is not a native array
    // size (8, 16, 32):
-    private final class AnyOrdComparator extends PerSegmentComparator {
+    private final class AnyDocToOrdComparator extends PerSegmentComparator {
      private final PackedInts.Reader readerOrds;
      private final DocTermsIndex termsIndex;
      private final int docBase;

-      public AnyOrdComparator(PackedInts.Reader readerOrds, DocTermsIndex termsIndex, int docBase) {
+      public AnyDocToOrdComparator(PackedInts.Reader readerOrds, DocTermsIndex termsIndex, int docBase) {
        this.readerOrds = readerOrds;
        this.termsIndex = termsIndex;
        this.docBase = docBase;
@ -1403,13 +1403,57 @@ public abstract class FieldComparator<T> {
      }
    }

+    // Used per-segment when docToOrd is null:
+    private final class AnyOrdComparator extends PerSegmentComparator {
+      private final DocTermsIndex termsIndex;
+      private final int docBase;
+
+      public AnyOrdComparator(DocTermsIndex termsIndex, int docBase) {
+        this.termsIndex = termsIndex;
+        this.docBase = docBase;
+      }
+
+      @Override
+      public int compareBottom(int doc) {
+        assert bottomSlot != -1;
+        final int docOrd = (int) termsIndex.getOrd(doc);
+        if (bottomSameReader) {
+          // ord is precisely comparable, even in the equal case
+          return bottomOrd - docOrd;
+        } else if (bottomOrd >= docOrd) {
+          // the equals case always means bottom is > doc
+          // (because we set bottomOrd to the lower bound in
+          // setBottom):
+          return 1;
+        } else {
+          return -1;
+        }
+      }
+
+      @Override
+      public void copy(int slot, int doc) {
+        final int ord = (int) termsIndex.getOrd(doc);
+        ords[slot] = ord;
+        if (ord == 0) {
+          values[slot] = null;
+        } else {
+          assert ord > 0;
+          if (values[slot] == null) {
+            values[slot] = new BytesRef();
+          }
+          termsIndex.lookup(ord, values[slot]);
+        }
+        readerGen[slot] = currentReaderGen;
+      }
+    }
+
    @Override
    public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
      final int docBase = context.docBase;
      termsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
      final PackedInts.Reader docToOrd = termsIndex.getDocToOrd();
      FieldComparator<BytesRef> perSegComp = null;
-      if (docToOrd.hasArray()) {
+      if (docToOrd != null && docToOrd.hasArray()) {
        final Object arr = docToOrd.getArray();
        if (arr instanceof byte[]) {
          perSegComp = new ByteOrdComparator((byte[]) arr, termsIndex, docBase);
@ -1423,7 +1467,11 @@ public abstract class FieldComparator<T> {
        // every one having a unique value.
      }
      if (perSegComp == null) {
-        perSegComp = new AnyOrdComparator(docToOrd, termsIndex, docBase);
+        if (docToOrd != null) {
+          perSegComp = new AnyDocToOrdComparator(docToOrd, termsIndex, docBase);
+        } else {
+          perSegComp = new AnyOrdComparator(termsIndex, docBase);
+        }
      }

      currentReaderGen++;
--- a/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java
+++ b/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java
@ -56,10 +56,7 @@ public class TestDemoDocValue extends LuceneTestCase {
  public void testDemoNumber() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());

-    // Store the index in memory:
    Directory directory = newDirectory();
-    // To store an index on disk, use this instead:
-    // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    Document doc = new Document();
@ -94,10 +91,7 @@ public class TestDemoDocValue extends LuceneTestCase {
  public void testDemoFloat() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());

-    // Store the index in memory:
    Directory directory = newDirectory();
-    // To store an index on disk, use this instead:
-    // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    Document doc = new Document();
@ -132,10 +126,7 @@ public class TestDemoDocValue extends LuceneTestCase {
  public void testDemoTwoFieldsNumber() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());

-    // Store the index in memory:
    Directory directory = newDirectory();
-    // To store an index on disk, use this instead:
-    // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    Document doc = new Document();
@ -173,10 +164,7 @@ public class TestDemoDocValue extends LuceneTestCase {
  public void testDemoTwoFieldsMixed() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());

-    // Store the index in memory:
    Directory directory = newDirectory();
-    // To store an index on disk, use this instead:
-    // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    Document doc = new Document();
@ -216,10 +204,7 @@ public class TestDemoDocValue extends LuceneTestCase {
  public void testDemoThreeFieldsMixed() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());

-    // Store the index in memory:
    Directory directory = newDirectory();
-    // To store an index on disk, use this instead:
-    // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    Document doc = new Document();
@ -264,10 +249,7 @@ public class TestDemoDocValue extends LuceneTestCase {
  public void testDemoThreeFieldsMixed2() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());

-    // Store the index in memory:
    Directory directory = newDirectory();
-    // To store an index on disk, use this instead:
-    // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    Document doc = new Document();
@ -312,10 +294,7 @@ public class TestDemoDocValue extends LuceneTestCase {
  public void testTwoDocumentsNumeric() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());

-    // Store the index in memory:
    Directory directory = newDirectory();
-    // To store an index on disk, use this instead:
-    // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
@ -343,10 +322,7 @@ public class TestDemoDocValue extends LuceneTestCase {
  public void testTwoDocumentsMerged() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());

-    // Store the index in memory:
    Directory directory = newDirectory();
-    // To store an index on disk, use this instead:
-    // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
@ -385,10 +361,7 @@ public class TestDemoDocValue extends LuceneTestCase {
  public void testBigRange() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());

-    // Store the index in memory:
    Directory directory = newDirectory();
-    // To store an index on disk, use this instead:
-    // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
@ -416,10 +389,7 @@ public class TestDemoDocValue extends LuceneTestCase {
  public void testDemoBytes() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());

-    // Store the index in memory:
    Directory directory = newDirectory();
-    // To store an index on disk, use this instead:
-    // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    Document doc = new Document();
@ -458,10 +428,7 @@ public class TestDemoDocValue extends LuceneTestCase {
  public void testBytesTwoDocumentsMerged() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());

-    // Store the index in memory:
    Directory directory = newDirectory();
-    // To store an index on disk, use this instead:
-    // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
@ -502,10 +469,7 @@ public class TestDemoDocValue extends LuceneTestCase {
  public void testDemoSortedBytes() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());

-    // Store the index in memory:
    Directory directory = newDirectory();
-    // To store an index on disk, use this instead:
-    // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    Document doc = new Document();
@ -542,10 +506,7 @@ public class TestDemoDocValue extends LuceneTestCase {
  public void testSortedBytesTwoDocuments() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());

-    // Store the index in memory:
    Directory directory = newDirectory();
-    // To store an index on disk, use this instead:
-    // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
@ -576,10 +537,7 @@ public class TestDemoDocValue extends LuceneTestCase {
  public void testSortedBytesTwoDocumentsMerged() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());

-    // Store the index in memory:
    Directory directory = newDirectory();
-    // To store an index on disk, use this instead:
-    // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
@ -620,10 +578,7 @@ public class TestDemoDocValue extends LuceneTestCase {
  public void testBytesWithNewline() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());

-    // Store the index in memory:
    Directory directory = newDirectory();
-    // To store an index on disk, use this instead:
-    // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
@ -644,4 +599,32 @@ public class TestDemoDocValue extends LuceneTestCase {
    ireader.close();
    directory.close();
  }
+
+  public void testMissingSortedBytes() throws IOException {
+    Analyzer analyzer = new MockAnalyzer(random());
+
+    Directory directory = newDirectory();
+    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+    iwc.setMergePolicy(newLogMergePolicy());
+    IndexWriter iwriter = new IndexWriter(directory, iwc);
+    Document doc = new Document();
+    doc.add(new SortedBytesDocValuesField("dv", new BytesRef("hello world 2")));
+    iwriter.addDocument(doc);
+    // 2nd doc missing the DV field
+    iwriter.addDocument(new Document());
+    iwriter.close();
+    
+    // Now search the index:
+    IndexReader ireader = DirectoryReader.open(directory); // read-only=true
+    assert ireader.leaves().size() == 1;
+    SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
+    BytesRef scratch = new BytesRef();
+    dv.lookupOrd(dv.getOrd(0), scratch);
+    assertEquals(new BytesRef("hello world 2"), scratch);
+    dv.lookupOrd(dv.getOrd(1), scratch);
+    assertEquals(new BytesRef(""), scratch);
+    ireader.close();
+    directory.close();
+  }
 }
--- a/lucene/core/src/test/org/apache/lucene/search/TestSort.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSort.java
@ -48,6 +48,7 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.LogMergePolicy;
 import org.apache.lucene.index.MultiReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.StorableField;
@ -130,6 +131,9 @@ public class TestSort extends LuceneTestCase {
    dirs.add(indexStore);
    RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));

+    // nocommit remove:
+    ((LogMergePolicy) writer.w.getConfig().getMergePolicy()).setUseCompoundFile(false);
+
    final DocValues.Type stringDVType;
    if (dvStringSorted) {
      // Index sorted