LUCENE-3269: speed up top-k sampling tests

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1143122 13f79535-47bb-0310-9956-ffa450edef68
2025-02-20 08:56:03 +00:00 · 2011-07-05 16:13:15 +00:00 · 2011-07-05 16:13:15 +00:00 · 6e25bef3ef
commit 6e25bef3ef
parent 06a3778905
5 changed files with 58 additions and 65 deletions
--- a/modules/facet/src/test/org/apache/lucene/facet/FacetTestBase.java
+++ b/modules/facet/src/test/org/apache/lucene/facet/FacetTestBase.java
@ -21,6 +21,7 @@ import org.apache.lucene.document.Field.TermVector;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.RandomIndexWriter;
@ -138,7 +139,7 @@ public abstract class FacetTestBase extends LuceneTestCase {
      taxoDir = newDirectory();
    }
    
-    RandomIndexWriter iw = new RandomIndexWriter(random, indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, getAnalyzer()));
+    RandomIndexWriter iw = new RandomIndexWriter(random, indexDir, getIndexWriterConfig(getAnalyzer()));
    TaxonomyWriter taxo = new LuceneTaxonomyWriter(taxoDir, OpenMode.CREATE);
    
    populateIndex(iw, taxo, getFacetIndexingParams(partitionSize));
@ -154,6 +155,11 @@ public abstract class FacetTestBase extends LuceneTestCase {
    indexReader = IndexReader.open(indexDir);
    searcher = newSearcher(indexReader);
  }
+  
+  /** Returns indexing params for the main index */
+  protected IndexWriterConfig getIndexWriterConfig(Analyzer analyzer) {
+    return newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+  }

  /** Returns a default facet indexing params */
  protected FacetIndexingParams getFacetIndexingParams(final int partSize) {
--- a/modules/facet/src/test/org/apache/lucene/facet/search/BaseTestTopK.java
+++ b/modules/facet/src/test/org/apache/lucene/facet/search/BaseTestTopK.java
@ -6,8 +6,11 @@ import java.util.List;

 import org.apache.lucene.DocumentBuilder.DocumentBuilderException;
 import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.util._TestUtil;

+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.facet.FacetTestBase;
 import org.apache.lucene.facet.index.params.FacetIndexingParams;
 import org.apache.lucene.facet.search.params.CountFacetRequest;
@ -105,4 +108,9 @@ public abstract class BaseTestTopK extends FacetTestBase {
  protected int numDocsToIndex() {
    return 20000;
  }
+
+  @Override
+  protected IndexWriterConfig getIndexWriterConfig(Analyzer analyzer) {
+    return super.getIndexWriterConfig(analyzer).setMaxBufferedDocs(_TestUtil.nextInt(random, 500, 10000));
+  }
 }
--- a/modules/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandlerRandom.java
+++ b/modules/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandlerRandom.java
@ -32,41 +32,6 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 */

 public class TestTopKResultsHandlerRandom extends BaseTestTopK {
-
-  /**
-   * Try out faceted search in it's most basic form (no sampling nor complement
-   * that is). In this test lots (and lots..) of randomly generated data is
-   * being indexed, and later on an "over-all" faceted search is performed. The
-   * results are checked against the DF of each facet by itself
-   */
-  @Test
-  public void testCountsComplementDisabled() throws Exception {
-    doTestCounts(false);
-  }
-
-  private void doTestCounts(boolean doComplement) throws Exception,
-      IOException, IllegalAccessException, InstantiationException {
-    for (int partitionSize : partitionSizes) {
-      initIndex(partitionSize);
-      
-      List<FacetResult> facetResults = countFacets(partitionSize, 100000, doComplement);
-      assertCountsAndCardinality(facetCountsTruth(), facetResults);
-      
-      closeAll();
-    }
-  }
-
-  /**
-   * Try out faceted search with complements. In this test lots (and lots..) of
-   * randomly generated data is being indexed, and later on, a "beta" faceted
-   * search is performed - retrieving ~90% of the documents so complements takes
-   * place in here. The results are checked against the a regular (a.k.a
-   * no-complement, no-sampling) faceted search with the same parameters.
-   */
-  @Test
-  public void testCountsComplementEnforced() throws Exception {
-    doTestCounts(true);
-  }
  
  private List<FacetResult> countFacets(int partitionSize, int numResults, final boolean doComplement)
      throws IOException, IllegalAccessException, InstantiationException {
@ -97,6 +62,25 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
    for (int partitionSize : partitionSizes) {
      initIndex(partitionSize);
      
+      /*
+       * Try out faceted search in it's most basic form (no sampling nor complement
+       * that is). In this test lots (and lots..) of randomly generated data is
+       * being indexed, and later on an "over-all" faceted search is performed. The
+       * results are checked against the DF of each facet by itself
+       */
+      List<FacetResult> facetResults = countFacets(partitionSize, 100000, false);
+      assertCountsAndCardinality(facetCountsTruth(), facetResults);
+      
+      /*
+       * Try out faceted search with complements. In this test lots (and lots..) of
+       * randomly generated data is being indexed, and later on, a "beta" faceted
+       * search is performed - retrieving ~90% of the documents so complements takes
+       * place in here. The results are checked against the a regular (a.k.a
+       * no-complement, no-sampling) faceted search with the same parameters.
+       */
+      facetResults = countFacets(partitionSize, 100000, true);
+      assertCountsAndCardinality(facetCountsTruth(), facetResults);
+      
      List<FacetResult> allFacetResults = countFacets(partitionSize, 100000, false);
      
      HashMap<String,Integer> all = new HashMap<String,Integer>();
--- a/modules/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java
+++ b/modules/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java
@ -7,7 +7,6 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
-import org.junit.Test;

 import org.apache.lucene.search.MultiCollector;
 import org.apache.lucene.facet.search.BaseTestTopK;
@ -48,22 +47,12 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
      TaxonomyReader taxoReader, IndexReader indexReader,
      FacetSearchParams searchParams);
  
-  @Test
-  public void testCountUsingComplementSampling() throws Exception {
-    doTestWithSamping(true);
-  }
-  
-  @Test
-  public void testCountUsingSampling() throws Exception {
-    doTestWithSamping(false);
-  }
-  
  /**
   * Try out faceted search with sampling enabled and complements either disabled or enforced
   * Lots of randomly generated data is being indexed, and later on a "90% docs" faceted search
   * is performed. The results are compared to non-sampled ones.
   */
-  private void doTestWithSamping(boolean complement) throws Exception, IOException {
+  public void testCountUsingSamping() throws Exception, IOException {
    for (int partitionSize : partitionSizes) {
      initIndex(partitionSize);
      
@ -84,24 +73,30 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
      
      FacetSearchParams samplingSearchParams = searchParamsWithRequests(K, partitionSize); 

-      // try several times in case of failure, because the test has a chance to fail 
-      // if the top K facets are not sufficiently common with the sample set
-      for (int n=RETRIES; n>0; n--) {
-        FacetsCollector samplingFC = samplingCollector(complement, sampler,  samplingSearchParams);
-        
-        searcher.search(q, samplingFC);
-        List<FacetResult> sampledResults = samplingFC.getFacetResults();
-        
-        try {
-          assertSameResults(expectedResults, sampledResults);
-          break; // succeeded
-        } catch (Exception e) {
-          if (n<=1) { // otherwise try again
-            throw e; 
-          }
+      assertSampling(expectedResults, q, sampler, samplingSearchParams, false);
+      assertSampling(expectedResults, q, sampler, samplingSearchParams, true);
+
+      closeAll();
+    }
+  }
+  
+  private void assertSampling(List<FacetResult> expected, Query q, Sampler sampler, FacetSearchParams params, boolean complement) throws Exception {
+    // try several times in case of failure, because the test has a chance to fail 
+    // if the top K facets are not sufficiently common with the sample set
+    for (int n=RETRIES; n>0; n--) {
+      FacetsCollector samplingFC = samplingCollector(false, sampler, params);
+      
+      searcher.search(q, samplingFC);
+      List<FacetResult> sampledResults = samplingFC.getFacetResults();
+      
+      try {
+        assertSameResults(expected, sampledResults);
+        break; // succeeded
+      } catch (Exception e) {
+        if (n<=1) { // otherwise try again
+          throw e; 
        }
      }
-      closeAll();
    }
  }
  
--- a/modules/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCompactLabelToOrdinal.java
+++ b/modules/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCompactLabelToOrdinal.java
@ -36,7 +36,7 @@ public class TestCompactLabelToOrdinal extends LuceneTestCase {

    CompactLabelToOrdinal compact = new CompactLabelToOrdinal(2000000, 0.15f, 3);

-    final int n = 100 * 1000;
+    final int n = atLeast(10 * 1000);
    final int numUniqueValues = 50 * 1000;

    String[] uniqueValues = new String[numUniqueValues];