LUCENE-3829: improve test coverage of DocValues

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1294286 13f79535-47bb-0310-9956-ffa450edef68
2025-03-04 07:19:18 +00:00 · 2012-02-27 19:20:08 +00:00 · 2012-02-27 19:20:08 +00:00 · 29ab4d99d6
commit 29ab4d99d6
parent dc0c24053e
13 changed files with 199 additions and 48 deletions
--- a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java
@ -23,16 +23,17 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
-import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Map;
 import java.util.Set;

 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.document.DocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.DocValuesField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.DocValues.SortedSource;
@ -817,6 +818,103 @@ public class TestDocValuesIndexing extends LuceneTestCase {
    int nextDoc = termDocsEnum.nextDoc();
    assertEquals(DocsEnum.NO_MORE_DOCS, termDocsEnum.nextDoc());
    return nextDoc;
+  }
+
+  public void testWithThreads() throws Exception {
+    final int NUM_DOCS = atLeast(100);
+    final Directory dir = newDirectory();
+    final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
+    final boolean allowDups = random.nextBoolean();
+    final Set<String> seen = new HashSet<String>();
+    if (VERBOSE) {
+      System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
+    }
+    int numDocs = 0;
+    final List<BytesRef> docValues = new ArrayList<BytesRef>();
+
+    // TODO: deletions
+    while (numDocs < NUM_DOCS) {
+      final String s;
+      if (random.nextBoolean()) {
+        s = _TestUtil.randomSimpleString(random);
+      } else {
+        s = _TestUtil.randomUnicodeString(random);
+      }
+      final BytesRef br = new BytesRef(s);
+
+      if (!allowDups) {
+        if (seen.contains(s)) {
+          continue;
+        }
+        seen.add(s);
+      }
+
+      if (VERBOSE) {
+        System.out.println("  " + numDocs + ": s=" + s);
+      }
+      
+      final Document doc = new Document();
+      doc.add(new DocValuesField("stringdv", br, DocValues.Type.BYTES_VAR_SORTED));
+      doc.add(new DocValuesField("id", numDocs, DocValues.Type.VAR_INTS));
+      docValues.add(br);
+      writer.addDocument(doc);
+      numDocs++;
+
+      if (random.nextInt(40) == 17) {
+        // force flush
+        writer.getReader().close();
+      }
+    }
+
+    writer.forceMerge(1);
+    final DirectoryReader r = writer.getReader();
+    writer.close();
    
+    final AtomicReader sr = getOnlySegmentReader(r);
+    final DocValues dv = sr.docValues("stringdv");
+    final DocValues.Source stringDVSource = dv.getSource();
+    assertNotNull(stringDVSource);
+    final DocValues.Source stringDVDirectSource = dv.getDirectSource();
+    assertNotNull(stringDVDirectSource);
+    assertNotNull(dv);
+
+    final long END_TIME = System.currentTimeMillis() + (TEST_NIGHTLY ? 30 : 1);
+
+    final DocValues.Source docIDToID = sr.docValues("id").getSource();
+
+    final int NUM_THREADS = _TestUtil.nextInt(random, 1, 10);
+    Thread[] threads = new Thread[NUM_THREADS];
+    for(int thread=0;thread<NUM_THREADS;thread++) {
+      threads[thread] = new Thread() {
+          @Override
+          public void run() {
+            while(System.currentTimeMillis() < END_TIME) {
+              final DocValues.Source source;
+              // LUCENE-3829: remove this 'true ||' below
+              // once we fix thread safety of DirectSource
+              if (true || random.nextBoolean()) {
+                source = stringDVSource;
+              } else {
+                source = stringDVDirectSource;
+              }
+
+              final DocValues.SortedSource sortedSource = source.asSortedSource();
+              assertNotNull(sortedSource);
+
+              final BytesRef scratch = new BytesRef();
+
+              for(int iter=0;iter<100;iter++) {
+                final int docID = random.nextInt(sr.maxDoc());
+                final BytesRef br = sortedSource.getBytes(docID, scratch);
+                assertEquals(docValues.get((int) docIDToID.getInt(docID)), br);
+              }
+            }
+          }
+        };
+      threads[thread].start();
+    }
+
+    r.close();
+    dir.close();
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java
@ -39,7 +39,7 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {

  @BeforeClass
  public static void beforeClass() throws Exception {
-    lineDocFile = new LineFileDocs(random);
+    lineDocFile = new LineFileDocs(random, defaultCodecSupportsDocValues());
  }
  
  @AfterClass
@ -323,6 +323,8 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
        }
        writer.commit();
      } catch (Throwable ex) {
+        System.out.println("FAILED exc:");
+        ex.printStackTrace(System.out);
        throw new RuntimeException(ex);
      }
    }
--- a/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java
@ -59,7 +59,7 @@ public class TestForceMergeForever extends LuceneTestCase {
    // Try to make an index that requires merging:
    w.getConfig().setMaxBufferedDocs(_TestUtil.nextInt(random, 2, 11));
    final int numStartDocs = atLeast(20);
-    final LineFileDocs docs = new LineFileDocs(random);
+    final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
    for(int docIDX=0;docIDX<numStartDocs;docIDX++) {
      w.addDocument(docs.nextDoc());
    }
--- a/lucene/core/src/test/org/apache/lucene/index/TestNorms.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestNorms.java
@ -29,7 +29,6 @@ import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.MockDirectoryWrapper;
 import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.LuceneTestCase;

@ -179,7 +178,7 @@ public class TestNorms extends LuceneTestCase {
    Similarity provider = new MySimProvider(writeNorms);
    config.setSimilarity(provider);
    RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
-    final LineFileDocs docs = new LineFileDocs(random);
+    final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
    int num = atLeast(100);
    for (int i = 0; i < num; i++) {
      Document doc = docs.nextDoc();
--- a/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java
@ -34,7 +34,7 @@ public class TestRollingUpdates extends LuceneTestCase {
  public void testRollingUpdates() throws Exception {
    final MockDirectoryWrapper dir = newDirectory();
    dir.setCheckIndexOnClose(false); // we use a custom codec provider
-    final LineFileDocs docs = new LineFileDocs(random);
+    final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());

    //provider.register(new MemoryCodec());
    if ( (!"Lucene3x".equals(Codec.getDefault().getName())) && random.nextBoolean()) {
--- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
@ -51,7 +51,7 @@ import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
 public class TestTermsEnum extends LuceneTestCase {

  public void test() throws Exception {
-    final LineFileDocs docs = new LineFileDocs(random);
+    final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
    final Directory d = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random, d);
    final int numDocs = atLeast(10);
--- a/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java
+++ b/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java
@ -48,7 +48,8 @@ public class TestNRTCachingDirectory extends LuceneTestCase {
    NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
    RandomIndexWriter w = new RandomIndexWriter(random, cachedDir, conf);
-    final LineFileDocs docs = new LineFileDocs(random);    
+    final LineFileDocs docs = new LineFileDocs(random,
+                                               defaultCodecSupportsDocValues());
    final int numDocs = _TestUtil.nextInt(random, 100, 400);

    if (VERBOSE) {
--- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
+++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
@ -1094,7 +1094,7 @@ public class TestFSTs extends LuceneTestCase {
      Codec.setDefault(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()));
    }

-    final LineFileDocs docs = new LineFileDocs(random);
+    final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
    final int RUN_TIME_MSEC = atLeast(500);
    final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
    final File tempDir = _TestUtil.getTempDir("fstlines");
--- a/lucene/test-framework/src/java/org/apache/lucene/index/ThreadedIndexingAndSearchingTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/ThreadedIndexingAndSearchingTestCase.java
@ -356,37 +356,29 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas
                      shift = 0;
                      trigger = 1;
                    } else {
-                      trigger = totTermCount.get()/10;
+                      trigger = totTermCount.get()/30;
                      shift = random.nextInt(trigger);
                    }
-                    BytesRef term = termsEnum.next();
-                    if (term == null) {
-                      if (seenTermCount == 0) {
+                    while (true) {
+                      BytesRef term = termsEnum.next();
+                      if (term == null) {
+                        if (seenTermCount == 0) {
+                          break;
+                        }
+                        totTermCount.set(seenTermCount);
                        break;
                      }
-                      totTermCount.set(seenTermCount);
-                      seenTermCount = 0;
-                      if (totTermCount.get() < 10) {
-                        shift = 0;
+                      seenTermCount++;
+                      // search 30 terms
+                      if (trigger == 0) {
                        trigger = 1;
-                      } else {
-                        trigger = totTermCount.get()/10;
-                        //System.out.println("trigger " + trigger);
-                        shift = random.nextInt(trigger);
                      }
-                      termsEnum.seekCeil(new BytesRef(""));
-                      continue;
-                    }
-                    seenTermCount++;
-                    // search 10 terms
-                    if (trigger == 0) {
-                      trigger = 1;
-                    }
-                    if ((seenTermCount + shift) % trigger == 0) {
-                      //if (VERBOSE) {
-                      //System.out.println(Thread.currentThread().getName() + " now search body:" + term.utf8ToString());
-                      //}
-                      totHits.addAndGet(runQuery(s, new TermQuery(new Term("body", term))));
+                      if ((seenTermCount + shift) % trigger == 0) {
+                        //if (VERBOSE) {
+                        //System.out.println(Thread.currentThread().getName() + " now search body:" + term.utf8ToString());
+                        //}
+                        totHits.addAndGet(runQuery(s, new TermQuery(new Term("body", term))));
+                      }
                    }
                    //if (VERBOSE) {
                    //System.out.println(Thread.currentThread().getName() + ": search done");
@ -432,7 +424,7 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas

    final long t0 = System.currentTimeMillis();

-    final LineFileDocs docs = new LineFileDocs(random);
+    final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
    final File tempDir = _TestUtil.getTempDir(testName);
    dir = newFSDirectory(tempDir);
    ((MockDirectoryWrapper) dir).setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves.
@ -636,7 +628,14 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas

  private int runQuery(IndexSearcher s, Query q) throws Exception {
    s.search(q, 10);
-    return s.search(q, null, 10, new Sort(new SortField("title", SortField.Type.STRING))).totalHits;
+    int hitCount = s.search(q, null, 10, new Sort(new SortField("title", SortField.Type.STRING))).totalHits;
+    if (defaultCodecSupportsDocValues()) {
+      final Sort dvSort = new Sort(new SortField("title", SortField.Type.STRING));
+      dvSort.getSort()[0].setUseIndexValues(true);
+      int hitCount2 = s.search(q, null, 10, dvSort).totalHits;
+      assertEquals(hitCount, hitCount2);
+    }
+    return hitCount;
  }

  protected void smokeTestSearcher(IndexSearcher s) throws Exception {
--- a/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
@ -518,7 +518,7 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
    @Override
    public void run() {
      try {
-        final LineFileDocs docs = new LineFileDocs(random);
+        final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
        int numDocs = 0;
        while (System.nanoTime() < endTimeNanos) {
          final int what = random.nextInt(3);
--- a/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java
@ -17,22 +17,24 @@ package org.apache.lucene.util;
 * limitations under the License.
 */

+import java.io.BufferedReader;
 import java.io.Closeable;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
-import java.io.BufferedReader;
-import java.io.InputStreamReader;
 import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Random;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.zip.GZIPInputStream;
-import java.util.Random;

+import org.apache.lucene.document.DocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DocValues;

 /** Minimal port of contrib/benchmark's LneDocSource +
 * DocMaker, so tests can enum docs from a line file created
@ -43,16 +45,22 @@ public class LineFileDocs implements Closeable {
  private final static int BUFFER_SIZE = 1 << 16;     // 64K
  private final AtomicInteger id = new AtomicInteger();
  private final String path;
+  private final boolean useDocValues;

  /** If forever is true, we rewind the file at EOF (repeat
   * the docs over and over) */
-  public LineFileDocs(Random random, String path) throws IOException {
+  public LineFileDocs(Random random, String path, boolean useDocValues) throws IOException {
    this.path = path;
+    this.useDocValues = useDocValues;
    open(random);
  }

  public LineFileDocs(Random random) throws IOException {
-    this(random, LuceneTestCase.TEST_LINE_DOCS_FILE);
+    this(random, LuceneTestCase.TEST_LINE_DOCS_FILE, true);
+  }
+
+  public LineFileDocs(Random random, boolean useDocValues) throws IOException {
+    this(random, LuceneTestCase.TEST_LINE_DOCS_FILE, useDocValues);
  }

  public synchronized void close() throws IOException {
@ -113,11 +121,12 @@ public class LineFileDocs implements Closeable {
    final Document doc;
    final Field titleTokenized;
    final Field title;
+    final Field titleDV;
    final Field body;
    final Field id;
    final Field date;

-    public DocState() {
+    public DocState(boolean useDocValues) {
      doc = new Document();
      
      title = new StringField("title", "");
@ -139,6 +148,13 @@ public class LineFileDocs implements Closeable {

      date = new Field("date", "", StringField.TYPE_STORED);
      doc.add(date);
+
+      if (useDocValues) {
+        titleDV = new DocValuesField("titleDV", new BytesRef(), DocValues.Type.BYTES_VAR_SORTED);
+        doc.add(titleDV);
+      } else {
+        titleDV = null;
+      }
    }
  }

@ -162,7 +178,7 @@ public class LineFileDocs implements Closeable {

    DocState docState = threadDocs.get();
    if (docState == null) {
-      docState = new DocState();
+      docState = new DocState(useDocValues);
      threadDocs.set(docState);
    }

@ -178,6 +194,9 @@ public class LineFileDocs implements Closeable {
    docState.body.setStringValue(line.substring(1+spot2, line.length()));
    final String title = line.substring(0, spot);
    docState.title.setStringValue(title);
+    if (docState.titleDV != null) {
+      docState.titleDV.setBytesValue(new BytesRef(title));
+    }
    docState.titleTokenized.setStringValue(title);
    docState.date.setStringValue(line.substring(1+spot, spot2));
    docState.id.setStringValue(Integer.toString(id.getAndIncrement()));
--- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
@ -1573,4 +1573,8 @@ public abstract class LuceneTestCase extends Assert {

  @Ignore("just a hack")
  public final void alwaysIgnoredTestMethod() {}
+
+  protected static boolean defaultCodecSupportsDocValues() {
+    return !Codec.getDefault().getName().equals("Lucene3x");
+  }
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
@ -38,10 +38,12 @@ import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.lucene40.Lucene40Codec;
 import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
+import org.apache.lucene.document.DocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.CheckIndex;
 import org.apache.lucene.index.ConcurrentMergeScheduler;
+import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfos;
@ -677,9 +679,36 @@ public class _TestUtil {
  public static Document cloneDocument(Document doc1) {
    final Document doc2 = new Document();
    for(IndexableField f : doc1) {
-      Field field1 = (Field) f;
-      
-      Field field2 = new Field(field1.name(), field1.stringValue(), field1.fieldType());
+      final Field field1 = (Field) f;
+      final Field field2;
+      if (field1 instanceof DocValuesField) {
+        final DocValues.Type dvType = field1.fieldType().docValueType();
+        switch (dvType) {
+        case VAR_INTS:
+        case FIXED_INTS_8:
+        case FIXED_INTS_16:
+        case FIXED_INTS_32:
+        case FIXED_INTS_64:
+          field2 = new DocValuesField(field1.name(), field1.numericValue().intValue(), dvType);
+          break;
+        case BYTES_FIXED_DEREF:
+        case BYTES_FIXED_STRAIGHT:
+        case BYTES_VAR_DEREF:
+        case BYTES_VAR_STRAIGHT: 
+        case BYTES_FIXED_SORTED:
+        case BYTES_VAR_SORTED:
+          field2 = new DocValuesField(field1.name(), BytesRef.deepCopyOf(field1.binaryValue()), dvType);
+          break;
+        case FLOAT_32:
+        case FLOAT_64:
+          field2 = new DocValuesField(field1.name(), field1.numericValue().doubleValue(), dvType);
+          break;
+        default:
+          throw new IllegalArgumentException("don't know how to clone DV field=" + field1);
+        }
+      } else {
+        field2 = new Field(field1.name(), field1.stringValue(), field1.fieldType());
+      }
      doc2.add(field2);
    }