LUCENE-5666: merge trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5666@1594507 13f79535-47bb-0310-9956-ffa450edef68
2014-05-14 07:46:44 +00:00 · 2014-05-14 07:46:44 +00:00 · c85da1a6a9
parent 2078b86ba7 986275e2a8
commit c85da1a6a9
13 changed files with 148 additions and 42 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -198,6 +198,8 @@ Bug fixes
 * LUCENE-5660: AnalyzingSuggester.build will now throw IllegalArgumentException if
  you give it a longer suggestion than it can handle (Robert Muir, Mike McCandless)

+* LUCENE-5668: Fix off-by-one in TieredMergePolicy (Mike McCandless)
+
 Test Framework

 * LUCENE-5622: Fail tests if they print over the given limit of bytes to 
--- a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
@ -361,7 +361,7 @@ public class TieredMergePolicy extends MergePolicy {
        return spec;
      }

-      if (eligible.size() >= allowedSegCountInt) {
+      if (eligible.size() > allowedSegCountInt) {

        // OK we are over budget -- find best merge!
        MergeScore bestScore = null;
--- a/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
+++ b/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
@ -420,6 +420,7 @@ public final class RamUsageEstimator {
      seen.add(ob);

      final Class<?> obClazz = ob.getClass();
+      assert obClazz != null : "jvm bug detected (Object.getClass() == null). please report this to your vendor";
      if (obClazz.isArray()) {
        /*
         * Consider an array, possibly of primitive types. Push any of its references to
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
@ -1940,44 +1940,56 @@ public class TestIndexWriterExceptions extends LuceneTestCase {

      shouldFail.set(true);
      boolean doClose = false;
-
+      int updatingDocID = -1;
+      long updatingValue = -1;
      try {
-
        boolean defaultCodecSupportsFieldUpdates = defaultCodecSupportsFieldUpdates();
        for(int i=0;i<numDocs;i++) {
          if (random().nextInt(10) == 7) {
            boolean fieldUpdate = defaultCodecSupportsFieldUpdates && random().nextBoolean();
+            int docid = docBase + i;
            if (fieldUpdate) {
              long value = iter;
              if (VERBOSE) {
-                System.out.println("  update id=" + (docBase+i) + " to value " + value);
+                System.out.println("  update id=" + docid + " to value " + value);
              }
+              Term idTerm = new Term("id", Integer.toString(docid));
+              updatingDocID = docid; // record that we're updating that document
+              updatingValue = value; // and its updating value
              if (random().nextBoolean()) { // update only numeric field
-                w.updateNumericDocValue(new Term("id", Integer.toString(docBase + i)), "f", value);
-                w.updateNumericDocValue(new Term("id", Integer.toString(docBase + i)), "cf", value * 2);
+                w.updateNumericDocValue(idTerm, "f", value);
+                w.updateNumericDocValue(idTerm, "cf", value * 2);
              } else if (random().nextBoolean()) {
-                w.updateBinaryDocValue(new Term("id", Integer.toString(docBase + i)), "bf", TestBinaryDocValuesUpdates.toBytes(value));
-                w.updateBinaryDocValue(new Term("id", Integer.toString(docBase + i)), "bcf", TestBinaryDocValuesUpdates.toBytes(value * 2));
+                w.updateBinaryDocValue(idTerm, "bf", TestBinaryDocValuesUpdates.toBytes(value));
+                w.updateBinaryDocValue(idTerm, "bcf", TestBinaryDocValuesUpdates.toBytes(value * 2));
              } else {
-                w.updateNumericDocValue(new Term("id", Integer.toString(docBase + i)), "f", value);
-                w.updateNumericDocValue(new Term("id", Integer.toString(docBase + i)), "cf", value * 2);
-                w.updateBinaryDocValue(new Term("id", Integer.toString(docBase + i)), "bf", TestBinaryDocValuesUpdates.toBytes(value));
-                w.updateBinaryDocValue(new Term("id", Integer.toString(docBase + i)), "bcf", TestBinaryDocValuesUpdates.toBytes(value * 2));
+                w.updateNumericDocValue(idTerm, "f", value);
+                w.updateNumericDocValue(idTerm, "cf", value * 2);
+                w.updateBinaryDocValue(idTerm, "bf", TestBinaryDocValuesUpdates.toBytes(value));
+                w.updateBinaryDocValue(idTerm, "bcf", TestBinaryDocValuesUpdates.toBytes(value * 2));
              }
+              // record that we successfully updated the document. this is
+              // important when we later assert the value of the DV fields of
+              // that document - since we update two fields that depend on each
+              // other, could be that one of the fields successfully updates,
+              // while the other fails (since we turn on random exceptions).
+              // while this is supported, it makes the test raise false alarms.
+              updatingDocID = -1;
+              updatingValue = -1;
            }
            
            // sometimes do both deletes and updates
            if (!fieldUpdate || random().nextBoolean()) {
              if (VERBOSE) {
-                System.out.println("  delete id=" + (docBase+i));
+                System.out.println("  delete id=" + docid);
              }
              deleteCount++;
-              w.deleteDocuments(new Term("id", ""+(docBase+i)));
+              w.deleteDocuments(new Term("id", ""+docid));
            }
          }
        }

-        // Trigger writeLiveDocs so we hit fake exc:
+        // Trigger writeLiveDocs + writeFieldUpdates so we hit fake exc:
        IndexReader r = w.getReader(true);

        // Sometimes we will make it here (we only randomly
@ -2011,6 +2023,18 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
      }
      shouldFail.set(false);

+      if (updatingDocID != -1) {
+        // Updating this document did not succeed. Since the fields we assert on
+        // depend on each other, and the update may have gone through halfway,
+        // replay the update on both numeric and binary DV fields, so later
+        // asserts succeed.
+        Term idTerm = new Term("id", ""+updatingDocID);
+        w.updateNumericDocValue(idTerm, "f", updatingValue);
+        w.updateNumericDocValue(idTerm, "cf", updatingValue * 2);
+        w.updateBinaryDocValue(idTerm, "bf", TestBinaryDocValuesUpdates.toBytes(updatingValue));
+        w.updateBinaryDocValue(idTerm, "bcf", TestBinaryDocValuesUpdates.toBytes(updatingValue * 2));
+      }
+      
      IndexReader r;

      if (doClose && w != null) {
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterThreadsToSegments.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterThreadsToSegments.java
@ -160,7 +160,7 @@ public class TestIndexWriterThreadsToSegments extends LuceneTestCase {
  // LUCENE-5644: index docs w/ multiple threads but in between flushes we limit how many threads can index concurrently in the next
  // iteration, and then verify that no more segments were flushed than number of threads:
  public void testSegmentCountOnFlushRandom() throws Exception {
-    Directory dir = newDirectory();
+    Directory dir = newFSDirectory(createTempDir());
    IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));

    int maxThreadStates = TestUtil.nextInt(random(), 1, 12);
--- a/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java
@ -18,6 +18,7 @@ package org.apache.lucene.index;
 */

 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.store.Directory;
@ -211,4 +212,33 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
    
    // TODO: Add more checks for other non-double setters!
  }
+
+  // LUCENE-5668
+  public void testUnbalancedMergeSelection() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    TieredMergePolicy tmp = (TieredMergePolicy) iwc.getMergePolicy();
+    tmp.setFloorSegmentMB(0.00001);
+    // We need stable sizes for each segment:
+    iwc.setCodec(Codec.forName("Lucene46"));
+    iwc.setMergeScheduler(new SerialMergeScheduler());
+    iwc.setMaxBufferedDocs(100);
+    iwc.setRAMBufferSizeMB(-1);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    for(int i=0;i<15000*RANDOM_MULTIPLIER;i++) {
+      Document doc = new Document();
+      doc.add(newTextField("id", random().nextLong() + "" + random().nextLong(), Field.Store.YES));
+      w.addDocument(doc);
+    }
+    IndexReader r = DirectoryReader.open(w, true);
+
+    // Make sure TMP always merged equal-number-of-docs segments:
+    for(AtomicReaderContext ctx : r.leaves()) {
+      int numDocs = ctx.reader().numDocs();
+      assertTrue("got numDocs=" + numDocs, numDocs == 100 || numDocs == 1000 || numDocs == 10000);
+    }
+    r.close();
+    w.close();
+    dir.close();
+  }
 }
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java
@ -352,4 +352,42 @@ public class TestBooleanQuery extends LuceneTestCase {
    dir.close();
  }

+  public void testOneClauseRewriteOptimization() throws Exception {
+    final float BOOST = 3.5F;
+    final String FIELD = "content";
+    final String VALUE = "foo";
+      
+    Directory dir = newDirectory();
+    (new RandomIndexWriter(random(), dir)).shutdown();
+    IndexReader r = DirectoryReader.open(dir);
+
+    TermQuery expected = new TermQuery(new Term(FIELD, VALUE));
+    expected.setBoost(BOOST);
+
+    final int numLayers = atLeast(3);
+    boolean needBoost = true;
+    Query actual = new TermQuery(new Term(FIELD, VALUE));
+
+    for (int i = 0; i < numLayers; i++) {
+      if (needBoost && 0 == TestUtil.nextInt(random(),0,numLayers)) {
+        needBoost = false;
+        actual.setBoost(BOOST);
+      }
+
+      BooleanQuery bq = new BooleanQuery();
+      bq.add(actual, random().nextBoolean() 
+             ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST);
+      actual = bq;
+    }
+    if (needBoost) {
+      actual.setBoost(BOOST);
+    }
+
+    assertEquals(numLayers + ": " + actual.toString(),
+                 expected, actual.rewrite(r));
+
+    r.close();
+    dir.close();
+  }
+
 }
--- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractVisitingPrefixTreeFilter.java
+++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractVisitingPrefixTreeFilter.java
@ -222,10 +222,10 @@ public abstract class AbstractVisitingPrefixTreeFilter extends AbstractPrefixTre

      //Check for adjacent leaf (happens for indexed non-point shapes)
      if (hasIndexedLeaves && cell.getLevel() != 0) {
-        //If the next indexed term just adds a leaf marker ('+') to cell,
+        //If the next indexed term just adds a leaf marker to cell,
        // then add all of those docs
        scanCell = grid.readCell(thisTerm, scanCell);
-        assert curVNode.cell.isPrefixOf(scanCell);
+        assert curVNode.cell.isPrefixOf(scanCell) : "missing leaf or descendants";
        if (scanCell.getLevel() == cell.getLevel() && scanCell.isLeaf()) {
          visitLeaf(scanCell);
          //advance
@ -348,7 +348,6 @@ public abstract class AbstractVisitingPrefixTreeFilter extends AbstractPrefixTre
     */
    protected abstract void visitScanned(Cell cell) throws IOException;

-
    protected void preSiblings(VNode vNode) throws IOException {
    }

--- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/ContainsPrefixTreeFilter.java
+++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/ContainsPrefixTreeFilter.java
@ -83,6 +83,7 @@ public class ContainsPrefixTreeFilter extends AbstractPrefixTreeFilter {
      super(context, acceptDocs);
    }

+    //The reused value of cell.getTokenBytesNoLeaf which is always then seek()'ed to. It's used in assertions too.
    BytesRef termBytes = new BytesRef();//no leaf
    Cell nextCell;//see getLeafDocs

@ -135,21 +136,24 @@ public class ContainsPrefixTreeFilter extends AbstractPrefixTreeFilter {
      if (termsEnum == null)
        return false;
      termBytes = cell.getTokenBytesNoLeaf(termBytes);
+      assert assertCloneTermBytes(); //assertions look at termBytes later on
      return termsEnum.seekExact(termBytes);
    }

+    private boolean assertCloneTermBytes() {
+      termBytes = BytesRef.deepCopyOf(termBytes);
+      return true;
+    }
+
    private SmallDocSet getDocs(Cell cell, Bits acceptContains) throws IOException {
      assert cell.getTokenBytesNoLeaf(null).equals(termBytes);

      return collectDocs(acceptContains);
    }

-    private Cell lastLeaf = null;//just for assertion
-
-    private SmallDocSet getLeafDocs(Cell leafCell, Bits acceptContains) throws IOException {
-      assert leafCell.getTokenBytesNoLeaf(null).equals(termBytes);
-      assert ! leafCell.equals(lastLeaf);//don't call for same leaf again
-      lastLeaf = leafCell;
+    /** Gets docs on the leaf of the given cell, _if_ there is a leaf cell, otherwise null. */
+    private SmallDocSet getLeafDocs(Cell cell, Bits acceptContains) throws IOException {
+      assert cell.getTokenBytesNoLeaf(null).equals(termBytes);

      if (termsEnum == null)
        return null;
@ -159,8 +163,8 @@ public class ContainsPrefixTreeFilter extends AbstractPrefixTreeFilter {
        return null;
      }
      nextCell = grid.readCell(nextTerm, nextCell);
-      assert leafCell.isPrefixOf(nextCell);
-      if (nextCell.getLevel() == leafCell.getLevel() && nextCell.isLeaf()) {
+      assert cell.isPrefixOf(nextCell);
+      if (nextCell.getLevel() == cell.getLevel() && nextCell.isLeaf()) {
        return collectDocs(acceptContains);
      } else {
        return null;
--- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/WithinPrefixTreeFilter.java
+++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/WithinPrefixTreeFilter.java
@ -48,10 +48,13 @@ import java.io.IOException;
 *
 * @lucene.experimental
 */
-//TODO LUCENE-4869: implement faster algorithm based on filtering out false-positives of a
-//  minimal query buffer by looking in a DocValues cache holding a representative
-//  point of each disjoint component of a document's shape(s).
 public class WithinPrefixTreeFilter extends AbstractVisitingPrefixTreeFilter {
+  //TODO LUCENE-4869: implement faster algorithm based on filtering out false-positives of a
+  //  minimal query buffer by looking in a DocValues cache holding a representative
+  //  point of each disjoint component of a document's shape(s).
+
+  //TODO Could the recursion in allCellsIntersectQuery() be eliminated when non-fuzzy or other
+  //  circumstances?

  private final Shape bufferedQueryShape;//if null then the whole world

--- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/TreeCellIterator.java
+++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/TreeCellIterator.java
@ -32,7 +32,7 @@ class TreeCellIterator extends CellIterator {
  // pave the way for re-using Cell & CellIterator at a given level in the future.

  private final Shape shapeFilter;//possibly null
-  private final CellIterator[] iterStack;
+  private final CellIterator[] iterStack;//starts at level 1
  private int stackIdx;//-1 when done
  private boolean descend;

@ -40,8 +40,7 @@ class TreeCellIterator extends CellIterator {
    this.shapeFilter = shapeFilter;
    assert parentCell.getLevel() == 0;
    iterStack = new CellIterator[detailLevel];
-    CellIterator nextLevelCells = parentCell.getNextLevelCells(shapeFilter);
-    iterStack[0] = nextLevelCells;
+    iterStack[0] = parentCell.getNextLevelCells(shapeFilter);
    stackIdx = 0;//always points to an iter (non-null)
    //note: not obvious but needed to visit the first cell before trying to descend
    descend = false;
--- a/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
@ -18,6 +18,7 @@ package org.apache.solr.cloud;
 */

 import java.io.File;
+import java.net.ServerSocket;
 import java.net.URI;
 import java.net.URL;
 import java.util.ArrayList;
@ -62,7 +63,6 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
  
  private Map<URI,SocketProxy> proxies = new HashMap<URI,SocketProxy>();
  private AtomicInteger portCounter = new AtomicInteger(0);
-  private int basePort = 49900;
  
  public HttpPartitionTest() {
    super();
@ -106,20 +106,17 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
      String shardList, String solrConfigOverride, String schemaOverride)
      throws Exception {
    
-    int jettyPort = basePort + portCounter.incrementAndGet();
-    
    JettySolrRunner jetty = new JettySolrRunner(solrHome.getPath(), context,
-        jettyPort, solrConfigOverride, schemaOverride, false,
+        0, solrConfigOverride, schemaOverride, false,
        getExtraServlets(), sslConfig, getExtraRequestFilters());
    jetty.setShards(shardList);
-    jetty.setDataDir(getDataDir(dataDir));
+    jetty.setDataDir(getDataDir(dataDir));      
    
    // setup to proxy Http requests to this server unless it is the control
    // server
-    int proxyPort = basePort + portCounter.incrementAndGet();
-    jetty.setProxyPort(proxyPort);
-    
-    jetty.start();
+    int proxyPort = getNextAvailablePort();
+    jetty.setProxyPort(proxyPort);        
+    jetty.start();        
    
    // create a socket proxy for the jetty server ...
    SocketProxy proxy = new SocketProxy(proxyPort, jetty.getBaseUrl().toURI());
@ -127,6 +124,14 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
    
    return jetty;
  }
+  
+  protected int getNextAvailablePort() throws Exception {    
+    int port = -1;
+    try (ServerSocket s = new ServerSocket(0)) {
+      port = s.getLocalPort();
+    }
+    return port;
+  }
   
  @Override
  public void doTest() throws Exception {
--- a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java
+++ b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java
@ -1013,6 +1013,7 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
  }

  public static boolean recurseDelete(File f) {
+    if(f == null || !f.exists()) return false;
    if (f.isDirectory()) {
      for (File sub : f.listFiles()) {
        if (!recurseDelete(sub)) {