From f38c82b7a2b4570e8aef8c24d3405a455e80cf7a Mon Sep 17 00:00:00 2001
From: Mark Robert Miller <markrmiller@apache.org>
Date: Tue, 4 Mar 2014 03:18:47 +0000
Subject: [PATCH 01/38] SOLR-5714: You can now use one pool of memory for for
 the HDFS block cache that all collections share.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1573847 13f79535-47bb-0310-9956-ffa450edef68
---
 solr/CHANGES.txt                              |   3 +
 .../solr/core/HdfsDirectoryFactory.java       |  66 +++++--
 .../solr/store/blockcache/BlockCache.java     |   3 +
 .../solr/store/blockcache/BlockCacheKey.java  |  21 +-
 .../store/blockcache/BlockCacheLocation.java  |   3 +
 .../solr/store/blockcache/BlockDirectory.java |  25 ++-
 .../store/blockcache/BlockDirectoryCache.java |  20 +-
 .../solr/store/blockcache/BlockLocks.java     |   3 +
 .../solr/store/blockcache/BufferStore.java    |   4 +-
 .../apache/solr/store/blockcache/Cache.java   |   3 +
 .../store/blockcache/CachedIndexOutput.java   |   3 +-
 .../blockcache/CustomBufferedIndexInput.java  |   3 +
 .../apache/solr/store/blockcache/Metrics.java |   3 +
 .../blockcache/ReusedBufferedIndexOutput.java |   3 +
 .../apache/solr/store/blockcache/Store.java   |   3 +
 .../solr/store/hdfs/HdfsFileReader.java       |   3 +
 .../solr/store/hdfs/HdfsFileWriter.java       |   3 +
 .../solr/store/hdfs/NullIndexOutput.java      |   3 +
 .../solr/collection1/conf/solrconfig-tlog.xml |   1 +
 .../solr/collection1/conf/solrconfig.xml      |   3 +-
 .../cloud/ChaosMonkeyNothingIsSafeTest.java   |   4 +-
 .../solr/cloud/ChaosMonkeySafeLeaderTest.java |   2 +-
 .../org/apache/solr/cloud/RecoveryZkTest.java |   6 +-
 .../apache/solr/cloud/hdfs/HdfsTestUtil.java  |   3 +
 .../HdfsWriteToMultipleCollectionsTest.java   | 170 ++++++++++++++++
 .../solr/store/blockcache/BlockCacheTest.java |   2 +
 .../solr/collection1/conf/solrconfig.xml      |   3 +
 .../cloud/AbstractFullDistribZkTestBase.java  | 111 +----------
 .../solr/cloud/StopableIndexingThread.java    | 185 ++++++++++++++++++
 29 files changed, 522 insertions(+), 143 deletions(-)
 create mode 100644 solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java
 create mode 100644 solr/test-framework/src/java/org/apache/solr/cloud/StopableIndexingThread.java

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index f990f84d160..04430677f41 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -88,6 +88,9 @@ New Features
 * SOLR-5183: JSON updates now support nested child documents using a 
   "_childDocument_" object key.  (Varun Thacker, hossman)
 
+* SOLR-5714: You can now use one pool of memory for for the HDFS block cache
+  that all collections share. (Mark Miller, Gregory Chanan)
+
 Bug Fixes
 ----------------------
 
diff --git a/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java
index 466246ca664..af104c02475 100644
--- a/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java
@@ -51,6 +51,7 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
   public static final String BLOCKCACHE_SLAB_COUNT = "solr.hdfs.blockcache.slab.count";
   public static final String BLOCKCACHE_DIRECT_MEMORY_ALLOCATION = "solr.hdfs.blockcache.direct.memory.allocation";
   public static final String BLOCKCACHE_ENABLED = "solr.hdfs.blockcache.enabled";
+  public static final String BLOCKCACHE_GLOBAL = "solr.hdfs.blockcache.global";
   public static final String BLOCKCACHE_READ_ENABLED = "solr.hdfs.blockcache.read.enabled";
   public static final String BLOCKCACHE_WRITE_ENABLED = "solr.hdfs.blockcache.write.enabled";
   
@@ -72,6 +73,8 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
   private String hdfsDataDir;
   
   private String confDir;
+
+  private static BlockCache globalBlockCache;
   
   public static Metrics metrics;
   private static Boolean kerberosInit;
@@ -102,6 +105,7 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
     }
     
     boolean blockCacheEnabled = params.getBool(BLOCKCACHE_ENABLED, true);
+    boolean blockCacheGlobal = params.getBool(BLOCKCACHE_GLOBAL, false); // default to false for back compat
     boolean blockCacheReadEnabled = params.getBool(BLOCKCACHE_READ_ENABLED,
         true);
     boolean blockCacheWriteEnabled = params.getBool(BLOCKCACHE_WRITE_ENABLED, true);
@@ -117,8 +121,6 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
       boolean directAllocation = params.getBool(
           BLOCKCACHE_DIRECT_MEMORY_ALLOCATION, true);
       
-      BlockCache blockCache;
-      
       int slabSize = numberOfBlocksPerBank * blockSize;
       LOG.info(
           "Number of slabs of block cache [{}] with direct memory allocation set to [{}]",
@@ -131,22 +133,13 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
       int bufferSize = params.getInt("solr.hdfs.blockcache.bufferstore.buffersize", 128);
       int bufferCount = params.getInt("solr.hdfs.blockcache.bufferstore.buffercount", 128 * 128);
       
-      BufferStore.initNewBuffer(bufferSize, bufferCount);
-      long totalMemory = (long) bankCount * (long) numberOfBlocksPerBank
-          * (long) blockSize;
-      try {
-        blockCache = new BlockCache(metrics, directAllocation, totalMemory,
-            slabSize, blockSize);
-      } catch (OutOfMemoryError e) {
-        throw new RuntimeException(
-            "The max direct memory is likely too low.  Either increase it (by adding -XX:MaxDirectMemorySize=<size>g -XX:+UseLargePages to your containers startup args)"
-                + " or disable direct allocation using solr.hdfs.blockcache.direct.memory.allocation=false in solrconfig.xml. If you are putting the block cache on the heap,"
-                + " your java heap size might not be large enough."
-                + " Failed allocating ~" + totalMemory / 1000000.0 + " MB.", e);
-      }
-      Cache cache = new BlockDirectoryCache(blockCache, metrics);
+      BlockCache blockCache = getBlockDirectoryCache(path, numberOfBlocksPerBank,
+          blockSize, bankCount, directAllocation, slabSize,
+          bufferSize, bufferCount, blockCacheGlobal);
+      
+      Cache cache = new BlockDirectoryCache(blockCache, path, metrics);
       HdfsDirectory hdfsDirectory = new HdfsDirectory(new Path(path), conf);
-      dir = new BlockDirectory("solrcore", hdfsDirectory, cache, null,
+      dir = new BlockDirectory(path, hdfsDirectory, cache, null,
           blockCacheReadEnabled, blockCacheWriteEnabled);
     } else {
       dir = new HdfsDirectory(new Path(path), conf);
@@ -164,6 +157,45 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
     }
     return dir;
   }
+
+  private BlockCache getBlockDirectoryCache(String path,
+      int numberOfBlocksPerBank, int blockSize, int bankCount,
+      boolean directAllocation, int slabSize, int bufferSize, int bufferCount, boolean staticBlockCache) {
+    if (!staticBlockCache) {
+      LOG.info("Creating new single instance HDFS BlockCache");
+      return createBlockCache(numberOfBlocksPerBank, blockSize, bankCount, directAllocation, slabSize, bufferSize, bufferCount);
+    }
+    LOG.info("Creating new global HDFS BlockCache");
+    synchronized (HdfsDirectoryFactory.class) {
+      
+      if (globalBlockCache == null) {
+        globalBlockCache = createBlockCache(numberOfBlocksPerBank, blockSize, bankCount,
+            directAllocation, slabSize, bufferSize, bufferCount);
+      }
+    }
+    return globalBlockCache;
+  }
+
+  private BlockCache createBlockCache(int numberOfBlocksPerBank, int blockSize,
+      int bankCount, boolean directAllocation, int slabSize, int bufferSize,
+      int bufferCount) {
+    BufferStore.initNewBuffer(bufferSize, bufferCount);
+    long totalMemory = (long) bankCount * (long) numberOfBlocksPerBank
+        * (long) blockSize;
+    
+    BlockCache blockCache;
+    try {
+      blockCache = new BlockCache(metrics, directAllocation, totalMemory, slabSize, blockSize);
+    } catch (OutOfMemoryError e) {
+      throw new RuntimeException(
+          "The max direct memory is likely too low.  Either increase it (by adding -XX:MaxDirectMemorySize=<size>g -XX:+UseLargePages to your containers startup args)"
+              + " or disable direct allocation using solr.hdfs.blockcache.direct.memory.allocation=false in solrconfig.xml. If you are putting the block cache on the heap,"
+              + " your java heap size might not be large enough."
+              + " Failed allocating ~" + totalMemory / 1000000.0 + " MB.",
+          e);
+    }
+    return blockCache;
+  }
   
   @Override
   public boolean exists(String path) {
diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/BlockCache.java b/solr/core/src/java/org/apache/solr/store/blockcache/BlockCache.java
index a6cdf64923a..a520c6b6c29 100644
--- a/solr/core/src/java/org/apache/solr/store/blockcache/BlockCache.java
+++ b/solr/core/src/java/org/apache/solr/store/blockcache/BlockCache.java
@@ -24,6 +24,9 @@ import java.util.concurrent.atomic.AtomicInteger;
 import com.googlecode.concurrentlinkedhashmap.ConcurrentLinkedHashMap;
 import com.googlecode.concurrentlinkedhashmap.EvictionListener;
 
+/**
+ * @lucene.experimental
+ */
 public class BlockCache {
   
   public static final int _128M = 134217728;
diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/BlockCacheKey.java b/solr/core/src/java/org/apache/solr/store/blockcache/BlockCacheKey.java
index d0daefe0658..cf05c6936bc 100644
--- a/solr/core/src/java/org/apache/solr/store/blockcache/BlockCacheKey.java
+++ b/solr/core/src/java/org/apache/solr/store/blockcache/BlockCacheKey.java
@@ -16,12 +16,23 @@ package org.apache.solr.store.blockcache;
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
+/**
+ * @lucene.experimental
+ */
 public class BlockCacheKey implements Cloneable {
   
   private long block;
   private int file;
+  private String path;
   
+  public String getPath() {
+    return path;
+  }
+
+  public void setPath(String path) {
+    this.path = path;
+  }
+
   public long getBlock() {
     return block;
   }
@@ -44,9 +55,10 @@ public class BlockCacheKey implements Cloneable {
     int result = 1;
     result = prime * result + (int) (block ^ (block >>> 32));
     result = prime * result + file;
+    result = prime * result + ((path == null) ? 0 : path.hashCode());
     return result;
   }
-  
+
   @Override
   public boolean equals(Object obj) {
     if (this == obj) return true;
@@ -55,9 +67,12 @@ public class BlockCacheKey implements Cloneable {
     BlockCacheKey other = (BlockCacheKey) obj;
     if (block != other.block) return false;
     if (file != other.file) return false;
+    if (path == null) {
+      if (other.path != null) return false;
+    } else if (!path.equals(other.path)) return false;
     return true;
   }
-  
+
   @Override
   public BlockCacheKey clone() {
     try {
diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/BlockCacheLocation.java b/solr/core/src/java/org/apache/solr/store/blockcache/BlockCacheLocation.java
index 968628f058a..d2a124dda8c 100644
--- a/solr/core/src/java/org/apache/solr/store/blockcache/BlockCacheLocation.java
+++ b/solr/core/src/java/org/apache/solr/store/blockcache/BlockCacheLocation.java
@@ -19,6 +19,9 @@ package org.apache.solr.store.blockcache;
 
 import java.util.concurrent.atomic.AtomicBoolean;
 
+/**
+ * @lucene.experimental
+ */
 public class BlockCacheLocation {
   
   private int block;
diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/BlockDirectory.java b/solr/core/src/java/org/apache/solr/store/blockcache/BlockDirectory.java
index 9982197a574..028fd55aecb 100644
--- a/solr/core/src/java/org/apache/solr/store/blockcache/BlockDirectory.java
+++ b/solr/core/src/java/org/apache/solr/store/blockcache/BlockDirectory.java
@@ -34,6 +34,9 @@ import org.apache.solr.store.hdfs.HdfsDirectory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+/**
+ * @lucene.experimental
+ */
 public class BlockDirectory extends Directory {
   public static Logger LOG = LoggerFactory.getLogger(BlockDirectory.class);
   
@@ -82,11 +85,11 @@ public class BlockDirectory extends Directory {
   private Directory directory;
   private int blockSize;
   private String dirName;
-  private Cache cache;
+  private final Cache cache;
   private Set<String> blockCacheFileTypes;
   private final boolean blockCacheReadEnabled;
   private final boolean blockCacheWriteEnabled;
-  
+
   public BlockDirectory(String dirName, Directory directory, Cache cache,
       Set<String> blockCacheFileTypes, boolean blockCacheReadEnabled,
       boolean blockCacheWriteEnabled) throws IOException {
@@ -265,6 +268,15 @@ public class BlockDirectory extends Directory {
     return dirName + "/" + name;
   }
   
+  /**
+   * Expert: mostly for tests
+   * 
+   * @lucene.experimental
+   */
+  public Cache getCache() {
+    return cache;
+  }
+  
   @Override
   public void copy(Directory to, String src, String dest, IOContext context)
       throws IOException {
@@ -383,4 +395,13 @@ public class BlockDirectory extends Directory {
     return directory;
   }
   
+  
+  public boolean isBlockCacheReadEnabled() {
+    return blockCacheReadEnabled;
+  }
+
+  public boolean isBlockCacheWriteEnabled() {
+    return blockCacheWriteEnabled;
+  }
+  
 }
diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/BlockDirectoryCache.java b/solr/core/src/java/org/apache/solr/store/blockcache/BlockDirectoryCache.java
index 41ca9bb4775..592831b0dad 100644
--- a/solr/core/src/java/org/apache/solr/store/blockcache/BlockDirectoryCache.java
+++ b/solr/core/src/java/org/apache/solr/store/blockcache/BlockDirectoryCache.java
@@ -21,17 +21,31 @@ import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicInteger;
 
+/**
+ * @lucene.experimental
+ */
 public class BlockDirectoryCache implements Cache {
-  private BlockCache blockCache;
+  private final BlockCache blockCache;
   private AtomicInteger counter = new AtomicInteger();
   private Map<String,Integer> names = new ConcurrentHashMap<String,Integer>();
+  private String path;
   private Metrics metrics;
   
-  public BlockDirectoryCache(BlockCache blockCache, Metrics metrics) {
+  public BlockDirectoryCache(BlockCache blockCache, String path, Metrics metrics) {
     this.blockCache = blockCache;
+    this.path = path;
     this.metrics = metrics;
   }
   
+  /**
+   * Expert: mostly for tests
+   * 
+   * @lucene.experimental
+   */
+  public BlockCache getBlockCache() {
+    return blockCache;
+  }
+  
   @Override
   public void delete(String name) {
     names.remove(name);
@@ -46,6 +60,7 @@ public class BlockDirectoryCache implements Cache {
       names.put(name, file);
     }
     BlockCacheKey blockCacheKey = new BlockCacheKey();
+    blockCacheKey.setPath(path);
     blockCacheKey.setBlock(blockId);
     blockCacheKey.setFile(file);
     blockCache.store(blockCacheKey, blockOffset, buffer, offset, length);
@@ -59,6 +74,7 @@ public class BlockDirectoryCache implements Cache {
       return false;
     }
     BlockCacheKey blockCacheKey = new BlockCacheKey();
+    blockCacheKey.setPath(path);
     blockCacheKey.setBlock(blockId);
     blockCacheKey.setFile(file);
     boolean fetch = blockCache.fetch(blockCacheKey, b, blockOffset, off,
diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/BlockLocks.java b/solr/core/src/java/org/apache/solr/store/blockcache/BlockLocks.java
index e91ffb2ab4b..ba696506362 100644
--- a/solr/core/src/java/org/apache/solr/store/blockcache/BlockLocks.java
+++ b/solr/core/src/java/org/apache/solr/store/blockcache/BlockLocks.java
@@ -21,6 +21,9 @@ import java.util.concurrent.atomic.AtomicLongArray;
 
 import org.apache.lucene.util.LongBitSet;
 
+/**
+ * @lucene.experimental
+ */
 public class BlockLocks {
   
   private AtomicLongArray bits;
diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/BufferStore.java b/solr/core/src/java/org/apache/solr/store/blockcache/BufferStore.java
index 3e637d59d0b..f54b2757041 100644
--- a/solr/core/src/java/org/apache/solr/store/blockcache/BufferStore.java
+++ b/solr/core/src/java/org/apache/solr/store/blockcache/BufferStore.java
@@ -22,7 +22,9 @@ import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 
-
+/**
+ * @lucene.experimental
+ */
 public class BufferStore implements Store {
 
   private static final Store EMPTY = new Store() {
diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/Cache.java b/solr/core/src/java/org/apache/solr/store/blockcache/Cache.java
index 7e70ad0a775..dafa4ffcd9d 100644
--- a/solr/core/src/java/org/apache/solr/store/blockcache/Cache.java
+++ b/solr/core/src/java/org/apache/solr/store/blockcache/Cache.java
@@ -17,6 +17,9 @@ package org.apache.solr.store.blockcache;
  * limitations under the License.
  */
 
+/**
+ * @lucene.experimental
+ */
 public interface Cache {
   
   /**
diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java b/solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java
index 6e3c92ee1ac..858214cf83b 100644
--- a/solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java
+++ b/solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java
@@ -21,10 +21,11 @@ import java.io.IOException;
 
 import org.apache.lucene.store.IndexOutput;
 
-/*
+/**
  * Cache the blocks as they are written. The cache file name is the name of
  * the file until the file is closed, at which point the cache is updated
  * to include the last modified date (which is unknown until that point).
+ * @lucene.experimental
  */
 public class CachedIndexOutput extends ReusedBufferedIndexOutput {
   private final BlockDirectory directory;
diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/CustomBufferedIndexInput.java b/solr/core/src/java/org/apache/solr/store/blockcache/CustomBufferedIndexInput.java
index be8f260b902..aa79fb99804 100644
--- a/solr/core/src/java/org/apache/solr/store/blockcache/CustomBufferedIndexInput.java
+++ b/solr/core/src/java/org/apache/solr/store/blockcache/CustomBufferedIndexInput.java
@@ -23,6 +23,9 @@ import java.io.IOException;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 
+/**
+ * @lucene.experimental
+ */
 public abstract class CustomBufferedIndexInput extends IndexInput {
   
   public static final int BUFFER_SIZE = 32768;
diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/Metrics.java b/solr/core/src/java/org/apache/solr/store/blockcache/Metrics.java
index fce1b9d9a73..052e70442f8 100644
--- a/solr/core/src/java/org/apache/solr/store/blockcache/Metrics.java
+++ b/solr/core/src/java/org/apache/solr/store/blockcache/Metrics.java
@@ -29,6 +29,9 @@ import org.apache.hadoop.metrics.MetricsUtil;
 import org.apache.hadoop.metrics.Updater;
 import org.apache.hadoop.metrics.jvm.JvmMetrics;
 
+/**
+ * @lucene.experimental
+ */
 public class Metrics implements Updater {
   
   public static class MethodCall {
diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/ReusedBufferedIndexOutput.java b/solr/core/src/java/org/apache/solr/store/blockcache/ReusedBufferedIndexOutput.java
index 6b12c982e44..92018fce7af 100644
--- a/solr/core/src/java/org/apache/solr/store/blockcache/ReusedBufferedIndexOutput.java
+++ b/solr/core/src/java/org/apache/solr/store/blockcache/ReusedBufferedIndexOutput.java
@@ -21,6 +21,9 @@ import java.io.IOException;
 
 import org.apache.lucene.store.IndexOutput;
 
+/**
+ * @lucene.experimental
+ */
 public abstract class ReusedBufferedIndexOutput extends IndexOutput {
   
   public static final int BUFFER_SIZE = 1024;
diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/Store.java b/solr/core/src/java/org/apache/solr/store/blockcache/Store.java
index 3a491b3db83..8fb4e48cf38 100644
--- a/solr/core/src/java/org/apache/solr/store/blockcache/Store.java
+++ b/solr/core/src/java/org/apache/solr/store/blockcache/Store.java
@@ -17,6 +17,9 @@ package org.apache.solr.store.blockcache;
  * limitations under the License.
  */
 
+/**
+ * @lucene.experimental
+ */
 public interface Store {
 
   byte[] takeBuffer(int bufferSize);
diff --git a/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileReader.java b/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileReader.java
index 8a537935ea0..0294496c097 100644
--- a/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileReader.java
+++ b/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileReader.java
@@ -28,6 +28,9 @@ import org.apache.lucene.store.DataInput;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+/**
+ * @lucene.experimental
+ */
 public class HdfsFileReader extends DataInput {
   
   public static Logger LOG = LoggerFactory.getLogger(HdfsFileReader.class);
diff --git a/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileWriter.java b/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileWriter.java
index 459a6d13fe9..d73e353a71e 100644
--- a/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileWriter.java
+++ b/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileWriter.java
@@ -32,6 +32,9 @@ import org.apache.lucene.store.DataOutput;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+/**
+ * @lucene.experimental
+ */
 public class HdfsFileWriter extends DataOutput implements Closeable {
   public static Logger LOG = LoggerFactory.getLogger(HdfsFileWriter.class);
   
diff --git a/solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java b/solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java
index 044687c41d4..942dfd73f4f 100644
--- a/solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java
+++ b/solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java
@@ -21,6 +21,9 @@ import java.io.IOException;
 
 import org.apache.lucene.store.IndexOutput;
 
+/**
+ * @lucene.experimental
+ */
 public class NullIndexOutput extends IndexOutput {
   
   private long pos;
diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml
index 22c5b3ff57b..95a57ab23dd 100644
--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml
@@ -28,6 +28,7 @@
     <int name="solr.hdfs.blockcache.blocksperbank">${solr.hdfs.blockcache.blocksperbank:1024}</int>
     <str name="solr.hdfs.home">${solr.hdfs.home:}</str>
     <str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
+    <str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:false}</str>
   </directoryFactory>
   
   <dataDir>${solr.data.dir:}</dataDir>
diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
index d2413b09654..5fe25d7db76 100644
--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
@@ -46,7 +46,8 @@
     <double name="maxWriteMBPerSecMerge">3000000</double>
     <double name="maxWriteMBPerSecRead">4000000</double>
     <str name="solr.hdfs.home">${solr.hdfs.home:}</str>
-    <bool name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</bool>
+    <bool name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</bool> 
+    <str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:false}</str>
   </directoryFactory>
 
   <luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java
index 8650f216431..ba0f0817843 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java
@@ -131,7 +131,7 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
       int threadCount = 1;
       int i = 0;
       for (i = 0; i < threadCount; i++) {
-        StopableIndexingThread indexThread = new StopableIndexingThread(Integer.toString(i), true);
+        StopableIndexingThread indexThread = new StopableIndexingThread(controlClient, cloudClient, Integer.toString(i), true);
         threads.add(indexThread);
         indexThread.start();
       }
@@ -270,7 +270,7 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
     
     public FullThrottleStopableIndexingThread(List<SolrServer> clients,
         String id, boolean doDeletes) {
-      super(id, doDeletes);
+      super(controlClient, cloudClient, id, doDeletes);
       setName("FullThrottleStopableIndexingThread");
       setDaemon(true);
       this.clients = clients;
diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java
index 98353476ff0..19e40bfd2db 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java
@@ -108,7 +108,7 @@ public class ChaosMonkeySafeLeaderTest extends AbstractFullDistribZkTestBase {
     List<StopableIndexingThread> threads = new ArrayList<StopableIndexingThread>();
     int threadCount = 2;
     for (int i = 0; i < threadCount; i++) {
-      StopableIndexingThread indexThread = new StopableIndexingThread(Integer.toString(i), true);
+      StopableIndexingThread indexThread = new StopableIndexingThread(controlClient, cloudClient, Integer.toString(i), true);
       threads.add(indexThread);
       indexThread.start();
     }
diff --git a/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java b/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java
index 6bb1328cfbd..a15a021706a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java
@@ -66,10 +66,10 @@ public class RecoveryZkTest extends AbstractFullDistribZkTestBase {
     
     int maxDoc = maxDocList[random().nextInt(maxDocList.length - 1)];
     
-    indexThread = new StopableIndexingThread("1", true, maxDoc);
+    indexThread = new StopableIndexingThread(controlClient, cloudClient, "1", true, maxDoc);
     indexThread.start();
     
-    indexThread2 = new StopableIndexingThread("2", true, maxDoc);
+    indexThread2 = new StopableIndexingThread(controlClient, cloudClient, "2", true, maxDoc);
     
     indexThread2.start();
 
@@ -100,7 +100,7 @@ public class RecoveryZkTest extends AbstractFullDistribZkTestBase {
     
     Thread.sleep(1000);
   
-    waitForThingsToLevelOut(45);
+    waitForThingsToLevelOut(90);
     
     Thread.sleep(2000);
     
diff --git a/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java b/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java
index 1788aa715ad..6dae9b555ce 100644
--- a/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java
+++ b/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java
@@ -64,6 +64,8 @@ public class HdfsTestUtil {
     
     System.setProperty("solr.hdfs.home", "/solr_hdfs_home");
     
+    System.setProperty("solr.hdfs.blockcache.global", Boolean.toString(LuceneTestCase.random().nextBoolean()));
+    
     final MiniDFSCluster dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null);
     dfsCluster.waitActive();
     
@@ -92,6 +94,7 @@ public class HdfsTestUtil {
     System.clearProperty("test.build.data");
     System.clearProperty("test.cache.data");
     System.clearProperty("solr.hdfs.home");
+    System.clearProperty("solr.hdfs.blockcache.global");
     if (dfsCluster != null) {
       timers.remove(dfsCluster);
       dfsCluster.shutdown();
diff --git a/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java b/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java
new file mode 100644
index 00000000000..5a737826566
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cloud.hdfs;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.NRTCachingDirectory;
+import org.apache.lucene.util.LuceneTestCase.Nightly;
+import org.apache.lucene.util.LuceneTestCase.Slow;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.client.solrj.impl.CloudSolrServer;
+import org.apache.solr.cloud.BasicDistributedZkTest;
+import org.apache.solr.cloud.StopableIndexingThread;
+import org.apache.solr.core.CoreContainer;
+import org.apache.solr.core.HdfsDirectoryFactory;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.servlet.SolrDispatchFilter;
+import org.apache.solr.store.blockcache.BlockCache;
+import org.apache.solr.store.blockcache.BlockDirectory;
+import org.apache.solr.store.blockcache.BlockDirectoryCache;
+import org.apache.solr.store.blockcache.Cache;
+import org.apache.solr.util.RefCounted;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
+
+@Slow
+@Nightly
+@ThreadLeakScope(Scope.NONE) // hdfs client currently leaks thread(s)
+public class HdfsWriteToMultipleCollectionsTest extends BasicDistributedZkTest {
+  private static final String SOLR_HDFS_HOME = "solr.hdfs.home";
+  private static final String SOLR_HDFS_BLOCKCACHE_GLOBAL = "solr.hdfs.blockcache.global";
+  private static final String ACOLLECTION = "acollection";
+  private static MiniDFSCluster dfsCluster;
+  
+  @BeforeClass
+  public static void setupClass() throws Exception {
+    schemaString = "schema15.xml";      // we need a string id
+    dfsCluster = HdfsTestUtil.setupClass(new File(TEMP_DIR,
+        HdfsBasicDistributedZk2Test.class.getName() + "_"
+            + System.currentTimeMillis()).getAbsolutePath());
+    System.setProperty(SOLR_HDFS_HOME, dfsCluster.getURI().toString() + "/solr");
+  }
+  
+  @AfterClass
+  public static void teardownClass() throws Exception {
+    HdfsTestUtil.teardownClass(dfsCluster);
+    System.clearProperty(SOLR_HDFS_HOME);
+    dfsCluster = null;
+  }
+  
+  @Override
+  protected String getDataDir(String dataDir) throws IOException {
+    return HdfsTestUtil.getDataDir(dfsCluster, dataDir);
+  }
+  
+  public HdfsWriteToMultipleCollectionsTest() {
+    super();
+    sliceCount = 1;
+    shardCount = 3;
+  }
+  
+  protected String getSolrXml() {
+    return "solr-no-core.xml";
+  }
+  
+  @Override
+  public void doTest() throws Exception {
+    int docCount = random().nextInt(1313) + 1;
+    int cnt = random().nextInt(4) + 1;
+    for (int i = 0; i < cnt; i++) {
+      createCollection(ACOLLECTION + i, 2, 2, 9);
+    }
+    for (int i = 0; i < cnt; i++) {
+      waitForRecoveriesToFinish(ACOLLECTION + i, false);
+    }
+    List<CloudSolrServer> cloudServers = new ArrayList<CloudSolrServer>();
+    List<StopableIndexingThread> threads = new ArrayList<StopableIndexingThread>();
+    for (int i = 0; i < cnt; i++) {
+      CloudSolrServer server = new CloudSolrServer(zkServer.getZkAddress());
+      server.setDefaultCollection(ACOLLECTION + i);
+      cloudServers.add(server);
+      StopableIndexingThread indexThread = new StopableIndexingThread(null, server, "1", true, docCount);
+      threads.add(indexThread);
+      indexThread.start();
+    }
+    
+    int addCnt = 0;
+    for (StopableIndexingThread thread : threads) {
+      thread.join();
+      addCnt += thread.getNumAdds() - thread.getNumDeletes();
+    }
+   
+    long collectionsCount = 0;
+    for (CloudSolrServer server : cloudServers) {
+      server.commit();
+      collectionsCount += server.query(new SolrQuery("*:*")).getResults().getNumFound();
+    }
+    
+    for (CloudSolrServer server : cloudServers) {
+      server.shutdown();
+    }
+
+    assertEquals(addCnt, collectionsCount);
+    
+    BlockCache lastBlockCache = null;
+    // assert that we are using the block directory and that write and read caching are being used
+    for (JettySolrRunner jetty : jettys) {
+      CoreContainer cores = ((SolrDispatchFilter) jetty.getDispatchFilter()
+          .getFilter()).getCores();
+      Collection<SolrCore> solrCores = cores.getCores();
+      for (SolrCore core : solrCores) {
+        if (core.getCoreDescriptor().getCloudDescriptor().getCollectionName()
+            .startsWith(ACOLLECTION)) {
+          assertTrue(core.getDirectoryFactory() instanceof HdfsDirectoryFactory);
+          RefCounted<IndexWriter> iwRef = core.getUpdateHandler()
+              .getSolrCoreState().getIndexWriter(core);
+          try {
+            IndexWriter iw = iwRef.get();
+            NRTCachingDirectory directory = (NRTCachingDirectory) iw
+                .getDirectory();
+            BlockDirectory blockDirectory = (BlockDirectory) directory
+                .getDelegate();
+            assertTrue(blockDirectory.isBlockCacheReadEnabled());
+            assertTrue(blockDirectory.isBlockCacheWriteEnabled());
+            Cache cache = blockDirectory.getCache();
+            // we know its a BlockDirectoryCache, but future proof
+            assertTrue(cache instanceof BlockDirectoryCache);
+            BlockCache blockCache = ((BlockDirectoryCache) cache)
+                .getBlockCache();
+            if (lastBlockCache != null) {
+              if (Boolean.getBoolean(SOLR_HDFS_BLOCKCACHE_GLOBAL)) {
+                assertEquals(lastBlockCache, blockCache);
+              } else {
+                assertNotSame(lastBlockCache, blockCache);
+              }
+            }
+            lastBlockCache = blockCache;
+          } finally {
+            iwRef.decref();
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/solr/core/src/test/org/apache/solr/store/blockcache/BlockCacheTest.java b/solr/core/src/test/org/apache/solr/store/blockcache/BlockCacheTest.java
index 70fd813aead..bc5e75c844a 100644
--- a/solr/core/src/test/org/apache/solr/store/blockcache/BlockCacheTest.java
+++ b/solr/core/src/test/org/apache/solr/store/blockcache/BlockCacheTest.java
@@ -51,6 +51,7 @@ public class BlockCacheTest extends LuceneTestCase {
       int file = 0;
       blockCacheKey.setBlock(block);
       blockCacheKey.setFile(file);
+      blockCacheKey.setPath("/");
 
       if (blockCache.fetch(blockCacheKey, buffer)) {
         hitsInCache.incrementAndGet();
@@ -91,6 +92,7 @@ public class BlockCacheTest extends LuceneTestCase {
     BlockCacheKey blockCacheKey = new BlockCacheKey();
     blockCacheKey.setBlock(0);
     blockCacheKey.setFile(0);
+    blockCacheKey.setPath("/");
     byte[] newData = new byte[blockSize*3];
     byte[] testData = testData(random, blockSize, newData);
 
diff --git a/solr/example/solr/collection1/conf/solrconfig.xml b/solr/example/solr/collection1/conf/solrconfig.xml
index 3126c21d74d..192cbcf24bc 100755
--- a/solr/example/solr/collection1/conf/solrconfig.xml
+++ b/solr/example/solr/collection1/conf/solrconfig.xml
@@ -129,6 +129,9 @@
     <str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
     <!-- Enable/Disable the hdfs cache. -->    
     <str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
+    <!-- Enable/Disable using one global cache for all SolrCores. 
+         The settings used will be from the first HdfsDirectoryFactory created. -->    
+    <str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
     
   </directoryFactory> 
 
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
index e3f193bbe71..66a3adf468a 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
@@ -1428,122 +1428,13 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
     return rsp;
   }
   
-  abstract class StopableThread extends Thread {
+  static abstract class StopableThread extends Thread {
     public StopableThread(String name) {
       super(name);
     }
     public abstract void safeStop();
   }
   
-  class StopableIndexingThread extends StopableThread {
-    private volatile boolean stop = false;
-    protected final String id;
-    protected final List<String> deletes = new ArrayList<String>();
-    protected Set<String> addFails = new HashSet<String>();
-    protected Set<String> deleteFails = new HashSet<String>();
-    protected boolean doDeletes;
-    private int numCycles;
-    
-    public StopableIndexingThread(String id, boolean doDeletes) {
-      this(id, doDeletes, -1);
-    }
-    
-    public StopableIndexingThread(String id, boolean doDeletes, int numCycles) {
-      super("StopableIndexingThread");
-      this.id = id;
-      this.doDeletes = doDeletes;
-      this.numCycles = numCycles;
-      setDaemon(true);
-    }
-    
-    @Override
-    public void run() {
-      int i = 0;
-      int numDone = 0;
-      int numDeletes = 0;
-      int numAdds = 0;
-      
-      while (true && !stop) {
-        if (numCycles != -1) {
-          if (numDone > numCycles) {
-            break;
-          }
-        }
-        ++numDone;
-        String id = this.id + "-" + i;
-        ++i;
-        boolean addFailed = false;
-        
-        if (doDeletes && random().nextBoolean() && deletes.size() > 0) {
-          String delete = deletes.remove(0);
-          try {
-            numDeletes++;
-            UpdateRequest req = new UpdateRequest();
-            req.deleteById(delete);
-            req.setParam("CONTROL", "TRUE");
-            req.process(controlClient);
-            
-            cloudClient.deleteById(delete);
-          } catch (Exception e) {
-            System.err.println("REQUEST FAILED:");
-            e.printStackTrace();
-            if (e instanceof SolrServerException) {
-              System.err.println("ROOT CAUSE:");
-              ((SolrServerException) e).getRootCause().printStackTrace();
-            }
-            deleteFails.add(id);
-          }
-        }
-        
-        try {
-          numAdds++;
-          indexr("id", id, i1, 50, t1,
-              "to come to the aid of their country.");
-        } catch (Exception e) {
-          addFailed = true;
-          System.err.println("REQUEST FAILED:");
-          e.printStackTrace();
-          if (e instanceof SolrServerException) {
-            System.err.println("ROOT CAUSE:");
-            ((SolrServerException) e).getRootCause().printStackTrace();
-          }
-          addFails.add(id);
-        }
-        
-        if (!addFailed && doDeletes && random().nextBoolean()) {
-          deletes.add(id);
-        }
-        
-        try {
-          Thread.currentThread().sleep(random().nextInt(100));
-        } catch (InterruptedException e) {
-          Thread.currentThread().interrupt();
-        }
-      }
-      
-      System.err.println("added docs:" + numAdds + " with " + (addFails.size() + deleteFails.size()) + " fails"
-          + " deletes:" + numDeletes);
-    }
-    
-    @Override
-    public void safeStop() {
-      stop = true;
-    }
-    
-    public Set<String> getAddFails() {
-      return addFails;
-    }
-    
-    public Set<String> getDeleteFails() {
-      return deleteFails;
-    }
-    
-    public int getFailCount() {
-      return addFails.size() + deleteFails.size();
-    }
-    
-  };
-  
   class StopableSearchThread extends StopableThread {
     private volatile boolean stop = false;
     protected final AtomicInteger queryFails = new AtomicInteger();
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/StopableIndexingThread.java b/solr/test-framework/src/java/org/apache/solr/cloud/StopableIndexingThread.java
new file mode 100644
index 00000000000..8446f086849
--- /dev/null
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/StopableIndexingThread.java
@@ -0,0 +1,185 @@
+package org.apache.solr.cloud;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.solr.client.solrj.SolrServer;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.common.SolrInputDocument;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class StopableIndexingThread extends AbstractFullDistribZkTestBase.StopableThread {
+  private static String t1 = "a_t";
+  private static String i1 = "a_si";
+  private volatile boolean stop = false;
+  protected final String id;
+  protected final List<String> deletes = new ArrayList<String>();
+  protected Set<String> addFails = new HashSet<String>();
+  protected Set<String> deleteFails = new HashSet<String>();
+  protected boolean doDeletes;
+  private int numCycles;
+  private SolrServer controlClient;
+  private SolrServer cloudClient;
+  private int numDeletes;
+  private int numAdds;
+
+  public StopableIndexingThread(SolrServer controlClient, SolrServer cloudClient, String id, boolean doDeletes) {
+    this(controlClient, cloudClient, id, doDeletes, -1);
+  }
+  
+  public StopableIndexingThread(SolrServer controlClient, SolrServer cloudClient, String id, boolean doDeletes, int numCycles) {
+    super("StopableIndexingThread");
+    this.controlClient = controlClient;
+    this.cloudClient = cloudClient;
+    this.id = id;
+    this.doDeletes = doDeletes;
+    this.numCycles = numCycles;
+    setDaemon(true);
+  }
+  
+  @Override
+  public void run() {
+    int i = 0;
+    int numDone = 0;
+    numDeletes = 0;
+    numAdds = 0;
+    
+    while (true && !stop) {
+      if (numCycles != -1) {
+        if (numDone > numCycles) {
+          break;
+        }
+      }
+      ++numDone;
+      String id = this.id + "-" + i;
+      ++i;
+      boolean addFailed = false;
+      
+      if (doDeletes && AbstractFullDistribZkTestBase.random().nextBoolean() && deletes.size() > 0) {
+        String delete = deletes.remove(0);
+        try {
+          numDeletes++;
+          if (controlClient != null) {
+            UpdateRequest req = new UpdateRequest();
+            req.deleteById(delete);
+            req.setParam("CONTROL", "TRUE");
+            req.process(controlClient);
+          }
+          
+          cloudClient.deleteById(delete);
+        } catch (Exception e) {
+          System.err.println("REQUEST FAILED:");
+          e.printStackTrace();
+          if (e instanceof SolrServerException) {
+            System.err.println("ROOT CAUSE:");
+            ((SolrServerException) e).getRootCause().printStackTrace();
+          }
+          deleteFails.add(id);
+        }
+      }
+      
+      try {
+        numAdds++;
+        indexr("id", id, i1, 50, t1,
+            "to come to the aid of their country.");
+      } catch (Exception e) {
+        addFailed = true;
+        System.err.println("REQUEST FAILED:");
+        e.printStackTrace();
+        if (e instanceof SolrServerException) {
+          System.err.println("ROOT CAUSE:");
+          ((SolrServerException) e).getRootCause().printStackTrace();
+        }
+        addFails.add(id);
+      }
+      
+      if (!addFailed && doDeletes && AbstractFullDistribZkTestBase.random().nextBoolean()) {
+        deletes.add(id);
+      }
+      
+      try {
+        Thread.currentThread().sleep(AbstractFullDistribZkTestBase.random().nextInt(100));
+      } catch (InterruptedException e) {
+        Thread.currentThread().interrupt();
+      }
+    }
+    
+    System.err.println("added docs:" + numAdds + " with " + (addFails.size() + deleteFails.size()) + " fails"
+        + " deletes:" + numDeletes);
+  }
+  
+  @Override
+  public void safeStop() {
+    stop = true;
+  }
+  
+  public Set<String> getAddFails() {
+    return addFails;
+  }
+  
+  public Set<String> getDeleteFails() {
+    return deleteFails;
+  }
+  
+  public int getFailCount() {
+    return addFails.size() + deleteFails.size();
+  }
+  
+  protected void addFields(SolrInputDocument doc, Object... fields) {
+    for (int i = 0; i < fields.length; i += 2) {
+      doc.addField((String) (fields[i]), fields[i + 1]);
+    }
+  }
+  
+  protected void indexr(Object... fields) throws Exception {
+    SolrInputDocument doc = new SolrInputDocument();
+    addFields(doc, fields);
+    addFields(doc, "rnd_b", true);
+    indexDoc(doc);
+  }
+  
+  protected void indexDoc(SolrInputDocument doc) throws IOException,
+      SolrServerException {
+    
+    if (controlClient != null) {
+      UpdateRequest req = new UpdateRequest();
+      req.add(doc);
+      req.setParam("CONTROL", "TRUE");
+      req.process(controlClient);
+    }
+
+    
+    UpdateRequest ureq = new UpdateRequest();
+    ureq.add(doc);
+    ureq.process(cloudClient);
+  }
+  
+  public int getNumDeletes() {
+    return numDeletes;
+  }
+
+  public int getNumAdds() {
+    return numAdds;
+  }
+  
+}
\ No newline at end of file

From 3064419624fd10622da2844399d4009fc2e62e00 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Tue, 4 Mar 2014 17:04:48 +0000
Subject: [PATCH 02/38] LUCENE-5224: Add iconv, oconv, and ignore support to
 HunspellStemFilter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574135 13f79535-47bb-0310-9956-ffa450edef68
---
 lucene/CHANGES.txt                            |   3 +
 .../lucene/analysis/hunspell/Dictionary.java  | 171 +++++++++++++++++-
 .../lucene/analysis/hunspell/Stemmer.java     |  45 ++++-
 .../lucene/analysis/hunspell/TestConv.java    |  36 ++++
 .../analysis/hunspell/TestDictionary.java     |  57 +++++-
 .../hunspell/TestHunspellStemFilter.java      |  18 +-
 .../lucene/analysis/hunspell/TestIgnore.java  |  36 ++++
 .../apache/lucene/analysis/hunspell/conv.aff  |  16 ++
 .../apache/lucene/analysis/hunspell/conv.dic  |   2 +
 .../lucene/analysis/hunspell/ignore.aff       |   6 +
 .../lucene/analysis/hunspell/ignore.dic       |   3 +
 11 files changed, 373 insertions(+), 20 deletions(-)
 create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestConv.java
 create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestIgnore.java
 create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/conv.aff
 create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/conv.dic
 create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ignore.aff
 create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ignore.dic

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 30774b5b579..cca653dc97a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -89,6 +89,9 @@ New Features
 
 * LUCENE-5485: Add circumfix support to HunspellStemFilter. (Robert Muir)
 
+* LUCENE-5224: Add iconv, oconv, and ignore support to HunspellStemFilter.
+  (Robert Muir)
+
 API Changes
 
 * LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
index 5242f5c77a5..1d3e60b970c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
@@ -21,14 +21,17 @@ import org.apache.lucene.store.ByteArrayDataOutput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.OfflineSorter;
 import org.apache.lucene.util.OfflineSorter.ByteSequencesReader;
 import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
 import org.apache.lucene.util.fst.Builder;
+import org.apache.lucene.util.fst.CharSequenceOutputs;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.IntSequenceOutputs;
+import org.apache.lucene.util.fst.Outputs;
 import org.apache.lucene.util.fst.Util;
 
 import java.io.BufferedInputStream;
@@ -67,6 +70,9 @@ public class Dictionary {
   private static final String FLAG_KEY = "FLAG";
   private static final String COMPLEXPREFIXES_KEY = "COMPLEXPREFIXES";
   private static final String CIRCUMFIX_KEY = "CIRCUMFIX";
+  private static final String IGNORE_KEY = "IGNORE";
+  private static final String ICONV_KEY = "ICONV";
+  private static final String OCONV_KEY = "OCONV";
 
   private static final String NUM_FLAG_TYPE = "num";
   private static final String UTF8_FLAG_TYPE = "UTF-8";
@@ -110,6 +116,16 @@ public class Dictionary {
   
   int circumfix = -1; // circumfix flag, or -1 if one is not defined
   
+  // ignored characters (dictionary, affix, inputs)
+  private char[] ignore;
+  
+  // FSTs used for ICONV/OCONV, output ord pointing to replacement text
+  FST<CharsRef> iconv;
+  FST<CharsRef> oconv;
+  
+  boolean needsInputCleaning;
+  boolean needsOutputCleaning;
+  
   /**
    * Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
    * and dictionary files.
@@ -136,6 +152,8 @@ public class Dictionary {
    */
   public Dictionary(InputStream affix, List<InputStream> dictionaries, boolean ignoreCase) throws IOException, ParseException {
     this.ignoreCase = ignoreCase;
+    this.needsInputCleaning = ignoreCase;
+    this.needsOutputCleaning = false; // set if we have an OCONV
     // hungarian has thousands of AF before the SET, so a 32k buffer is needed 
     BufferedInputStream buffered = new BufferedInputStream(affix, 32768);
     buffered.mark(32768);
@@ -249,6 +267,29 @@ public class Dictionary {
           throw new ParseException("Illegal CIRCUMFIX declaration", reader.getLineNumber());
         }
         circumfix = flagParsingStrategy.parseFlag(parts[1]);
+      } else if (line.startsWith(IGNORE_KEY)) {
+        String parts[] = line.split("\\s+");
+        if (parts.length != 2) {
+          throw new ParseException("Illegal IGNORE declaration", reader.getLineNumber());
+        }
+        ignore = parts[1].toCharArray();
+        Arrays.sort(ignore);
+        needsInputCleaning = true;
+      } else if (line.startsWith(ICONV_KEY) || line.startsWith(OCONV_KEY)) {
+        String parts[] = line.split("\\s+");
+        String type = parts[0];
+        if (parts.length != 2) {
+          throw new ParseException("Illegal " + type + " declaration", reader.getLineNumber());
+        }
+        int num = Integer.parseInt(parts[1]);
+        FST<CharsRef> res = parseConversions(reader, num);
+        if (type.equals("ICONV")) {
+          iconv = res;
+          needsInputCleaning |= iconv != null;
+        } else {
+          oconv = res;
+          needsOutputCleaning |= oconv != null;
+        }
       }
     }
     
@@ -291,6 +332,7 @@ public class Dictionary {
                           Map<String,Integer> seenPatterns) throws IOException, ParseException {
     
     BytesRef scratch = new BytesRef();
+    StringBuilder sb = new StringBuilder();
     String args[] = header.split("\\s+");
 
     boolean crossProduct = args[2].equals("Y");
@@ -300,9 +342,6 @@ public class Dictionary {
     ByteArrayDataOutput affixWriter = new ByteArrayDataOutput(affixData, currentAffix << 3, numLines << 3);
     
     for (int i = 0; i < numLines; i++) {
-      if (currentAffix > Short.MAX_VALUE) {
-        throw new UnsupportedOperationException("Too many affixes, please report this to dev@lucene.apache.org");
-      }
       assert affixWriter.getPosition() == currentAffix << 3;
       String line = reader.readLine();
       String ruleArgs[] = line.split("\\s+");
@@ -345,6 +384,9 @@ public class Dictionary {
       Integer patternIndex = seenPatterns.get(regex);
       if (patternIndex == null) {
         patternIndex = patterns.size();
+        if (patternIndex > Short.MAX_VALUE) {
+          throw new UnsupportedOperationException("Too many patterns, please report this to dev@lucene.apache.org");          
+        }
         seenPatterns.put(regex, patternIndex);
         Pattern pattern = Pattern.compile(regex);
         patterns.add(pattern);
@@ -355,6 +397,8 @@ public class Dictionary {
       if (stripOrd < 0) {
         // already exists in our hash
         stripOrd = (-stripOrd)-1;
+      } else if (stripOrd > Character.MAX_VALUE) {
+        throw new UnsupportedOperationException("Too many unique strips, please report this to dev@lucene.apache.org");
       }
 
       if (appendFlags == null) {
@@ -368,7 +412,7 @@ public class Dictionary {
         appendFlagsOrd = (-appendFlagsOrd)-1;
       } else if (appendFlagsOrd > Short.MAX_VALUE) {
         // this limit is probably flexible, but its a good sanity check too
-        throw new UnsupportedOperationException("Too many unique flags, please report this to dev@lucene.apache.org");
+        throw new UnsupportedOperationException("Too many unique append flags, please report this to dev@lucene.apache.org");
       }
       
       affixWriter.writeShort((short)flag);
@@ -378,6 +422,11 @@ public class Dictionary {
       affixWriter.writeShort((short)patternOrd);
       affixWriter.writeShort((short)appendFlagsOrd);
       
+      if (needsInputCleaning) {
+        CharSequence cleaned = cleanInput(affixArg, sb);
+        affixArg = cleaned.toString();
+      }
+      
       List<Character> list = affixes.get(affixArg);
       if (list == null) {
         list = new ArrayList<Character>();
@@ -388,6 +437,31 @@ public class Dictionary {
       currentAffix++;
     }
   }
+  
+  private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
+    Map<String,String> mappings = new TreeMap<>();
+    
+    for (int i = 0; i < num; i++) {
+      String line = reader.readLine();
+      String parts[] = line.split("\\s+");
+      if (parts.length != 3) {
+        throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
+      }
+      if (mappings.put(parts[1], parts[2]) != null) {
+        throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
+      }
+    }
+    
+    Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
+    Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
+    IntsRef scratchInts = new IntsRef();
+    for (Map.Entry<String,String> entry : mappings.entrySet()) {
+      Util.toUTF16(entry.getKey(), scratchInts);
+      builder.add(scratchInts, new CharsRef(entry.getValue()));
+    }
+    
+    return builder.finish();
+  }
 
   /**
    * Parses the encoding specified in the affix file readable through the provided InputStream
@@ -485,6 +559,8 @@ public class Dictionary {
     BytesRef flagsScratch = new BytesRef();
     IntsRef scratchInts = new IntsRef();
     
+    StringBuilder sb = new StringBuilder();
+    
     File unsorted = File.createTempFile("unsorted", "dat", tempDir);
     try (ByteSequencesWriter writer = new ByteSequencesWriter(unsorted)) {
       for (InputStream dictionary : dictionaries) {
@@ -492,16 +568,19 @@ public class Dictionary {
         String line = lines.readLine(); // first line is number of entries (approximately, sometimes)
         
         while ((line = lines.readLine()) != null) {
-          if (ignoreCase) {
+          if (needsInputCleaning) {
             int flagSep = line.lastIndexOf('/');
             if (flagSep == -1) {
-              writer.write(line.toLowerCase(Locale.ROOT).getBytes(IOUtils.CHARSET_UTF_8));
+              CharSequence cleansed = cleanInput(line, sb);
+              writer.write(cleansed.toString().getBytes(IOUtils.CHARSET_UTF_8));
             } else {
-              StringBuilder sb = new StringBuilder();
-              sb.append(line.substring(0, flagSep).toLowerCase(Locale.ROOT));
-              if (flagSep < line.length()) {
-                sb.append(line.substring(flagSep, line.length()));
+              String text = line.substring(0, flagSep);
+              CharSequence cleansed = cleanInput(text, sb);
+              if (cleansed != sb) {
+                sb.setLength(0);
+                sb.append(cleansed);
               }
+              sb.append(line.substring(flagSep));
               writer.write(sb.toString().getBytes(IOUtils.CHARSET_UTF_8));
             }
           } else {
@@ -761,4 +840,76 @@ public class Dictionary {
   static boolean hasFlag(char flags[], char flag) {
     return Arrays.binarySearch(flags, flag) >= 0;
   }
+  
+  CharSequence cleanInput(CharSequence input, StringBuilder reuse) {
+    reuse.setLength(0);
+    
+    for (int i = 0; i < input.length(); i++) {
+      char ch = input.charAt(i);
+      
+      if (ignore != null && Arrays.binarySearch(ignore, ch) >= 0) {
+        continue;
+      }
+      
+      if (ignoreCase && iconv == null) {
+        // if we have no input conversion mappings, do this on-the-fly
+        ch = Character.toLowerCase(ch);
+      }
+      
+      reuse.append(ch);
+    }
+    
+    if (iconv != null) {
+      try {
+        applyMappings(iconv, reuse);
+      } catch (IOException bogus) {
+        throw new RuntimeException(bogus);
+      }
+      if (ignoreCase) {
+        for (int i = 0; i < reuse.length(); i++) {
+          reuse.setCharAt(i, Character.toLowerCase(reuse.charAt(i)));
+        }
+      }
+    }
+    
+    return reuse;
+  }
+  
+  // TODO: this could be more efficient!
+  static void applyMappings(FST<CharsRef> fst, StringBuilder sb) throws IOException {
+    final FST.BytesReader bytesReader = fst.getBytesReader();
+    final FST.Arc<CharsRef> firstArc = fst.getFirstArc(new FST.Arc<CharsRef>());
+    final CharsRef NO_OUTPUT = fst.outputs.getNoOutput();
+    
+    // temporary stuff
+    final FST.Arc<CharsRef> arc = new FST.Arc<>();
+    int longestMatch;
+    CharsRef longestOutput;
+    
+    for (int i = 0; i < sb.length(); i++) {
+      arc.copyFrom(firstArc);
+      CharsRef output = NO_OUTPUT;
+      longestMatch = -1;
+      longestOutput = null;
+      
+      for (int j = i; j < sb.length(); j++) {
+        char ch = sb.charAt(j);
+        if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) {
+          break;
+        } else {
+          output = fst.outputs.add(output, arc.output);
+        }
+        if (arc.isFinal()) {
+          longestOutput = fst.outputs.add(output, arc.nextFinalOutput);
+          longestMatch = j;
+        }
+      }
+      
+      if (longestMatch >= 0) {
+        sb.delete(i, longestMatch+1);
+        sb.insert(i, longestOutput);
+        i += (longestOutput.length - 1);
+      }
+    }
+  }
 }
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
index 18e62c597e4..ff6cc0ae802 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.hunspell;
  * limitations under the License.
  */
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -24,8 +25,8 @@ import java.util.List;
 import java.util.regex.Pattern;
 
 import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.CharacterUtils;
 import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IntsRef;
@@ -40,8 +41,11 @@ final class Stemmer {
   private final BytesRef scratch = new BytesRef();
   private final StringBuilder segment = new StringBuilder();
   private final ByteArrayDataInput affixReader;
-  private final CharacterUtils charUtils = CharacterUtils.getInstance(Version.LUCENE_CURRENT);
-
+  
+  // used for normalization
+  private final StringBuilder scratchSegment = new StringBuilder();
+  private char scratchBuffer[] = new char[32];
+  
   /**
    * Constructs a new Stemmer which will use the provided Dictionary to create its stems.
    *
@@ -68,17 +72,25 @@ final class Stemmer {
    * @param word Word to find the stems for
    * @return List of stems for the word
    */
-  public List<CharsRef> stem(char word[], int length) {
-    if (dictionary.ignoreCase) {
-      charUtils.toLowerCase(word, 0, length);
+  public List<CharsRef> stem(char word[], int length) {    
+
+    if (dictionary.needsInputCleaning) {
+      scratchSegment.setLength(0);
+      scratchSegment.append(word, 0, length);
+      CharSequence cleaned = dictionary.cleanInput(scratchSegment, segment);
+      scratchBuffer = ArrayUtil.grow(scratchBuffer, cleaned.length());
+      length = segment.length();
+      segment.getChars(0, length, scratchBuffer, 0);
+      word = scratchBuffer;
     }
+    
     List<CharsRef> stems = new ArrayList<CharsRef>();
     IntsRef forms = dictionary.lookupWord(word, 0, length);
     if (forms != null) {
       // TODO: some forms should not be added, e.g. ONLYINCOMPOUND
       // just because it exists, does not make it valid...
       for (int i = 0; i < forms.length; i++) {
-        stems.add(new CharsRef(word, 0, length));
+        stems.add(newStem(word, length));
       }
     }
     stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false));
@@ -106,6 +118,23 @@ final class Stemmer {
     }
     return deduped;
   }
+  
+  private CharsRef newStem(char buffer[], int length) {
+    if (dictionary.needsOutputCleaning) {
+      scratchSegment.setLength(0);
+      scratchSegment.append(buffer, 0, length);
+      try {
+        Dictionary.applyMappings(dictionary.oconv, scratchSegment);
+      } catch (IOException bogus) {
+        throw new RuntimeException(bogus);
+      }
+      char cleaned[] = new char[scratchSegment.length()];
+      scratchSegment.getChars(0, cleaned.length, cleaned, 0);
+      return new CharsRef(cleaned, 0, cleaned.length);
+    } else {
+      return new CharsRef(buffer, 0, length);
+    }
+  }
 
   // ================================================= Helper Methods ================================================
 
@@ -292,7 +321,7 @@ final class Stemmer {
               continue;
             }
           }
-          stems.add(new CharsRef(strippedWord, 0, length));
+          stems.add(newStem(strippedWord, length));
         }
       }
     }
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestConv.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestConv.java
new file mode 100644
index 00000000000..c72fd3ff704
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestConv.java
@@ -0,0 +1,36 @@
+package org.apache.lucene.analysis.hunspell;
+
+import org.junit.BeforeClass;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestConv extends StemmerTestBase {
+  
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    init("conv.aff", "conv.dic");
+  }
+  
+  public void testConversion() {
+    assertStemsTo("drink", "drInk");
+    assertStemsTo("drInk", "drInk");
+    assertStemsTo("drInkAble", "drInk");
+    assertStemsTo("drInkABle", "drInk");
+    assertStemsTo("drinkABle", "drInk");
+  }
+}
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java
index ad4f257e628..5d7682e88c0 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java
@@ -22,10 +22,15 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.text.ParseException;
 
-import org.apache.lucene.analysis.hunspell.Dictionary;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.fst.Builder;
+import org.apache.lucene.util.fst.CharSequenceOutputs;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.Outputs;
+import org.apache.lucene.util.fst.Util;
 
 public class TestDictionary extends LuceneTestCase {
 
@@ -123,4 +128,54 @@ public class TestDictionary extends LuceneTestCase {
     assertTrue(affixStream.isClosed());
     assertTrue(dictStream.isClosed());
   }
+  
+  
+  
+  public void testReplacements() throws Exception {
+    Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
+    Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
+    IntsRef scratchInts = new IntsRef();
+    
+    // a -> b
+    Util.toUTF16("a", scratchInts);
+    builder.add(scratchInts, new CharsRef("b"));
+    
+    // ab -> c
+    Util.toUTF16("ab", scratchInts);
+    builder.add(scratchInts, new CharsRef("c"));
+    
+    // c -> de
+    Util.toUTF16("c", scratchInts);
+    builder.add(scratchInts, new CharsRef("de"));
+    
+    // def -> gh
+    Util.toUTF16("def", scratchInts);
+    builder.add(scratchInts, new CharsRef("gh"));
+    
+    FST<CharsRef> fst = builder.finish();
+    
+    StringBuilder sb = new StringBuilder("atestanother");
+    Dictionary.applyMappings(fst, sb);
+    assertEquals("btestbnother", sb.toString());
+    
+    sb = new StringBuilder("abtestanother");
+    Dictionary.applyMappings(fst, sb);
+    assertEquals("ctestbnother", sb.toString());
+    
+    sb = new StringBuilder("atestabnother");
+    Dictionary.applyMappings(fst, sb);
+    assertEquals("btestcnother", sb.toString());
+    
+    sb = new StringBuilder("abtestabnother");
+    Dictionary.applyMappings(fst, sb);
+    assertEquals("ctestcnother", sb.toString());
+    
+    sb = new StringBuilder("abtestabcnother");
+    Dictionary.applyMappings(fst, sb);
+    assertEquals("ctestcdenother", sb.toString());
+    
+    sb = new StringBuilder("defdefdefc");
+    Dictionary.applyMappings(fst, sb);
+    assertEquals("ghghghde", sb.toString());
+  }
 }
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java
index 3069c0ab1e2..f42afcfa9cc 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.hunspell;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Arrays;
+import java.util.Collections;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@@ -30,7 +31,6 @@ import org.apache.lucene.analysis.hunspell.Dictionary;
 import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.TestUtil;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 
@@ -94,4 +94,20 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
     };
     checkOneTerm(a, "", "");
   }
+  
+  public void testIgnoreCaseNoSideEffects() throws Exception {
+    final Dictionary d;
+    try (InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
+        InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic")) {
+      d = new Dictionary(affixStream, Collections.singletonList(dictStream), true);
+    }
+    Analyzer a = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        Tokenizer tokenizer = new KeywordTokenizer();
+        return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, d));
+      }
+    };
+    checkOneTerm(a, "NoChAnGy", "NoChAnGy");
+  }
 }
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestIgnore.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestIgnore.java
new file mode 100644
index 00000000000..723eca94d3d
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestIgnore.java
@@ -0,0 +1,36 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestIgnore extends StemmerTestBase {
+  
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    init("ignore.aff", "ignore.dic");
+  }
+  
+  public void testExamples() {
+    assertStemsTo("drink", "drink");
+    assertStemsTo("drinkable", "drink");
+    assertStemsTo("dr'ink-able", "drink");
+    assertStemsTo("drank-able", "drank");
+    assertStemsTo("'-'-'-");
+  }
+}
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/conv.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/conv.aff
new file mode 100644
index 00000000000..e860a87b7e9
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/conv.aff
@@ -0,0 +1,16 @@
+SET UTF-8
+
+ICONV 4
+ICONV A a
+ICONV B b
+ICONV C c
+ICONV I i
+
+OCONV 4
+OCONV a A
+OCONV b B
+OCONV c C
+OCONV i I
+
+SFX X Y 1
+SFX X 0 able . +ABLE
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/conv.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/conv.dic
new file mode 100644
index 00000000000..6b68dc80cc0
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/conv.dic
@@ -0,0 +1,2 @@
+1
+drink/X   [VERB]
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ignore.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ignore.aff
new file mode 100644
index 00000000000..65c4683fc0a
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ignore.aff
@@ -0,0 +1,6 @@
+SET UTF-8
+
+IGNORE '-
+
+SFX X Y 1
+SFX X 0 able . +ABLE
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ignore.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ignore.dic
new file mode 100644
index 00000000000..9ae92058f54
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ignore.dic
@@ -0,0 +1,3 @@
+1
+drink/X   [VERB]
+dr-ank/X  [VERB]
\ No newline at end of file

From b670831559cd4de9bbfecd1bdaf387cd52b6412c Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Tue, 4 Mar 2014 17:51:20 +0000
Subject: [PATCH 03/38] SOLR-2934: increase buffer size for recent dictionaries
 with large amounts of AF/AM lines before charset

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574158 13f79535-47bb-0310-9956-ffa450edef68
---
 .../org/apache/lucene/analysis/hunspell/Dictionary.java   | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
index 1d3e60b970c..974d24185db 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
@@ -154,9 +154,11 @@ public class Dictionary {
     this.ignoreCase = ignoreCase;
     this.needsInputCleaning = ignoreCase;
     this.needsOutputCleaning = false; // set if we have an OCONV
-    // hungarian has thousands of AF before the SET, so a 32k buffer is needed 
-    BufferedInputStream buffered = new BufferedInputStream(affix, 32768);
-    buffered.mark(32768);
+    // TODO: we really need to probably buffer this on disk since so many newer dictionaries
+    // (en_GB, hu_HU, etc) now have tons of AM lines (morph metadata) etc before they finally declare 
+    // their encoding... but for now this large buffer is a workaround
+    BufferedInputStream buffered = new BufferedInputStream(affix, 65536);
+    buffered.mark(65536);
     String encoding = getDictionaryEncoding(buffered);
     buffered.reset();
     CharsetDecoder decoder = getJavaEncoding(encoding);

From 96bcbefdd403bba362389b927a5f6ef927122d51 Mon Sep 17 00:00:00 2001
From: "Chris M. Hostetter" <hossman@apache.org>
Date: Wed, 5 Mar 2014 01:01:18 +0000
Subject: [PATCH 04/38] SOLR-5815: add some test logging to try and figure out
 WTF

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574273 13f79535-47bb-0310-9956-ffa450edef68
---
 .../org/apache/solr/core/TestNonNRTOpen.java  | 20 ++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/solr/core/src/test/org/apache/solr/core/TestNonNRTOpen.java b/solr/core/src/test/org/apache/solr/core/TestNonNRTOpen.java
index 8a5e493a8cc..b3b851448bf 100644
--- a/solr/core/src/test/org/apache/solr/core/TestNonNRTOpen.java
+++ b/solr/core/src/test/org/apache/solr/core/TestNonNRTOpen.java
@@ -30,8 +30,12 @@ import org.apache.solr.util.RefCounted;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 public class TestNonNRTOpen extends SolrTestCaseJ4 {
-  
+  private static final Logger log = LoggerFactory.getLogger(TestNonNRTOpen.class);
+
   @BeforeClass
   public static void beforeClass() throws Exception {
     // use a filesystem, because we need to create an index, then "start up solr"
@@ -80,6 +84,7 @@ public class TestNonNRTOpen extends SolrTestCaseJ4 {
     
     // core reload
     String core = h.getCore().getName();
+    log.info("Reloading core: " + h.getCore().toString());
     h.getCoreContainer().reload(core);
     assertNotNRT(1);
     
@@ -90,6 +95,7 @@ public class TestNonNRTOpen extends SolrTestCaseJ4 {
     
     // add a doc and core reload
     assertU(adoc("bazz", "doc2"));
+    log.info("Reloading core: " + h.getCore().toString());
     h.getCoreContainer().reload(core);
     assertNotNRT(3);
   }
@@ -127,11 +133,15 @@ public class TestNonNRTOpen extends SolrTestCaseJ4 {
   }
   
   static void assertNotNRT(int maxDoc) {
-    RefCounted<SolrIndexSearcher> searcher = h.getCore().getSearcher();
+    SolrCore core = h.getCore();
+    log.info("Checking notNRT & maxDoc=" + maxDoc + " of core=" + core.toString());
+    RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
     try {
-      DirectoryReader ir = searcher.get().getIndexReader();
-      assertEquals(maxDoc, ir.maxDoc());
-      assertFalse("expected non-NRT reader, got: " + ir, ir.toString().contains(":nrt"));
+      SolrIndexSearcher s = searcher.get();
+      DirectoryReader ir = s.getIndexReader();
+      assertEquals("SOLR-5815? : wrong maxDoc: core=" + core.toString() +" searcher=" + s.toString(),
+                   maxDoc, ir.maxDoc());
+      assertFalse("SOLR-5815? : expected non-NRT reader, got: " + ir, ir.toString().contains(":nrt"));
     } finally {
       searcher.decref();
     }

From 9f701bd30476bafac8d6ec2b5505fb1886541219 Mon Sep 17 00:00:00 2001
From: Mark Robert Miller <markrmiller@apache.org>
Date: Wed, 5 Mar 2014 01:33:35 +0000
Subject: [PATCH 05/38] SOLR-5811: The Overseer will retry work items until
 success, which is a serious problem if you hit a bad work item.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574280 13f79535-47bb-0310-9956-ffa450edef68
---
 solr/CHANGES.txt                              |   3 +
 .../java/org/apache/solr/cloud/Overseer.java  |  45 +++-
 .../org/apache/solr/cloud/ZkController.java   |   6 +
 .../org/apache/solr/cloud/OverseerTest.java   | 193 ++++++++++++------
 4 files changed, 178 insertions(+), 69 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 04430677f41..b9b572cc005 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -116,6 +116,9 @@ Bug Fixes
 * SOLR-5761: HttpSolrServer has a few fields that can be set via setters but
   are not volatile. (Mark Miller, Gregory Chanan)
 
+* SOLR-5811: The Overseer will retry work items until success, which is a serious
+  problem if you hit a bad work item. (Mark Miller)
+
 Optimizations
 ----------------------
 * SOLR-1880: Distributed Search skips GET_FIELDS stage if EXECUTE_QUERY
diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
index 666c7134d00..8bf202ce8ec 100644
--- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
@@ -123,7 +123,16 @@ public class Overseer {
                 else if (LeaderStatus.YES == isLeader) {
                   final ZkNodeProps message = ZkNodeProps.load(head);
                   final String operation = message.getStr(QUEUE_OPERATION);
-                  clusterState = processMessage(clusterState, message, operation);
+                  try {
+                    clusterState = processMessage(clusterState, message, operation);
+                  } catch (Exception e) {
+                    // generally there is nothing we can do - in most cases, we have
+                    // an issue that will fail again on retry or we cannot communicate with
+                    // ZooKeeper in which case another Overseer should take over
+                    // TODO: if ordering for the message is not important, we could
+                    // track retries and put it back on the end of the queue
+                    log.error("Could not process Overseer message", e);
+                  }
                   zkClient.setData(ZkStateReader.CLUSTER_STATE,
                       ZkStateReader.toJSON(clusterState), true);
                   
@@ -189,8 +198,16 @@ public class Overseer {
             while (head != null) {
               final ZkNodeProps message = ZkNodeProps.load(head.getBytes());
               final String operation = message.getStr(QUEUE_OPERATION);
-
-              clusterState = processMessage(clusterState, message, operation);
+              try {
+                clusterState = processMessage(clusterState, message, operation);
+              } catch (Exception e) {
+                // generally there is nothing we can do - in most cases, we have
+                // an issue that will fail again on retry or we cannot communicate with
+                // ZooKeeper in which case another Overseer should take over
+                // TODO: if ordering for the message is not important, we could
+                // track retries and put it back on the end of the queue
+                log.error("Could not process Overseer message", e);
+              }
               workQueue.offer(head.getBytes());
 
               stateUpdateQueue.poll();
@@ -294,6 +311,7 @@ public class Overseer {
     private ClusterState createReplica(ClusterState clusterState, ZkNodeProps message) {
       log.info("createReplica() {} ", message);
       String coll = message.getStr(ZkStateReader.COLLECTION_PROP);
+      checkCollection(message, coll);
       String slice = message.getStr(ZkStateReader.SHARD_ID_PROP);
       Slice sl = clusterState.getSlice(coll, slice);
       if(sl == null){
@@ -334,6 +352,7 @@ public class Overseer {
 
     private ClusterState updateShardState(ClusterState clusterState, ZkNodeProps message) {
       String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
+      checkCollection(message, collection);
       log.info("Update shard state invoked for collection: " + collection + " with message: " + message);
       for (String key : message.keySet()) {
         if (ZkStateReader.COLLECTION_PROP.equals(key)) continue;
@@ -358,6 +377,7 @@ public class Overseer {
 
     private ClusterState addRoutingRule(ClusterState clusterState, ZkNodeProps message) {
       String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
+      checkCollection(message, collection);
       String shard = message.getStr(ZkStateReader.SHARD_ID_PROP);
       String routeKey = message.getStr("routeKey");
       String range = message.getStr("range");
@@ -397,8 +417,15 @@ public class Overseer {
       return clusterState;
     }
 
+    private void checkCollection(ZkNodeProps message, String collection) {
+      if (collection == null || collection.trim().length() == 0) {
+        log.error("Skipping invalid Overseer message because it has no collection specified: " + message);
+      }
+    }
+
     private ClusterState removeRoutingRule(ClusterState clusterState, ZkNodeProps message) {
       String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
+      checkCollection(message, collection);
       String shard = message.getStr(ZkStateReader.SHARD_ID_PROP);
       String routeKeyStr = message.getStr("routeKey");
 
@@ -424,6 +451,7 @@ public class Overseer {
 
     private ClusterState createShard(ClusterState clusterState, ZkNodeProps message) {
       String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
+      checkCollection(message, collection);
       String shardId = message.getStr(ZkStateReader.SHARD_ID_PROP);
       Slice slice = clusterState.getSlice(collection, shardId);
       if (slice == null)  {
@@ -470,6 +498,7 @@ public class Overseer {
 
     private ClusterState updateStateNew(ClusterState clusterState, ZkNodeProps message) {
       String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
+      checkCollection(message, collection);
       String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP);
 
       if(collection==null || sliceName == null){
@@ -490,9 +519,7 @@ public class Overseer {
        */
       private ClusterState updateState(ClusterState state, final ZkNodeProps message) {
         final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
-        assert collection.length() > 0 : message;
-        
-
+        checkCollection(message, collection);
         Integer numShards = message.getInt(ZkStateReader.NUM_SHARDS_PROP, null);
         log.info("Update state numShards={} message={}", numShards, message);
 
@@ -851,9 +878,7 @@ public class Overseer {
       private ClusterState removeCollection(final ClusterState clusterState, ZkNodeProps message) {
 
         final String collection = message.getStr("name");
-
-//        final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
-//        newCollections.remove(collection);
+        checkCollection(message, collection);
 
 //        ClusterState newState = new ClusterState(clusterState.getLiveNodes(), newCollections);
         return clusterState.copyWith(singletonMap(collection, (DocCollection)null));
@@ -864,6 +889,7 @@ public class Overseer {
      */
     private ClusterState removeShard(final ClusterState clusterState, ZkNodeProps message) {
       final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
+      checkCollection(message, collection);
       final String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
 
       log.info("Removing collection: " + collection + " shard: " + sliceId + " from clusterstate");
@@ -889,6 +915,7 @@ public class Overseer {
         String cnn = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
 
         final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
+        checkCollection(message, collection);
 
 //        final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
 //        DocCollection coll = newCollections.get(collection);
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index 27dd8c196e9..ac36d4c5e2f 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -1064,6 +1064,12 @@ public final class ZkController {
     final String coreNodeName = cd.getCloudDescriptor().getCoreNodeName();
     final String collection = cd.getCloudDescriptor().getCollectionName();
     assert collection != null;
+    
+    if (collection == null || collection.trim().length() == 0) {
+      log.error("No collection was specified.");
+      return;
+    }
+    
     ElectionContext context = electionContexts.remove(new ContextKey(collection, coreNodeName));
     
     if (context != null) {
diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
index f63b1f9df99..a67a8be527a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
@@ -64,18 +64,18 @@ public class OverseerTest extends SolrTestCaseJ4 {
   private List<Overseer> overseers = new ArrayList<Overseer>();
   private List<ZkStateReader> readers = new ArrayList<ZkStateReader>();
   
+  private String collection = "collection1";
+  
   public static class MockZKController{
     
     private final SolrZkClient zkClient;
     private final ZkStateReader zkStateReader;
     private final String nodeName;
-    private final String collection;
     private final LeaderElector elector;
     private final Map<String, ElectionContext> electionContext = Collections.synchronizedMap(new HashMap<String, ElectionContext>());
     
-    public MockZKController(String zkAddress, String nodeName, String collection) throws InterruptedException, TimeoutException, IOException, KeeperException {
+    public MockZKController(String zkAddress, String nodeName) throws InterruptedException, TimeoutException, IOException, KeeperException {
       this.nodeName = nodeName;
-      this.collection = collection;
       zkClient = new SolrZkClient(zkAddress, TIMEOUT);
       zkStateReader = new ZkStateReader(zkClient);
       zkStateReader.createClusterStateWatchersAndUpdate();
@@ -105,7 +105,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
       zkClient.close();
     }
     
-    public String publishState(String coreName, String coreNodeName, String stateName, int numShards)
+    public String publishState(String collection, String coreName, String coreNodeName, String stateName, int numShards)
         throws KeeperException, InterruptedException, IOException {
       if (stateName == null) {
         ElectionContext ec = electionContext.remove(coreName);
@@ -134,41 +134,40 @@ public class OverseerTest extends SolrTestCaseJ4 {
         q.offer(ZkStateReader.toJSON(m));
       }
       
-      for (int i = 0; i < 120; i++) {
-        String shardId = getShardId("http://" + nodeName + "/solr/", coreName);
-        if (shardId != null) {
-          try {
-            zkClient.makePath("/collections/" + collection + "/leader_elect/"
-                + shardId + "/election", true);
-          } catch (NodeExistsException nee) {}
-          ZkNodeProps props = new ZkNodeProps(ZkStateReader.BASE_URL_PROP,
-              "http://" + nodeName + "/solr/", ZkStateReader.NODE_NAME_PROP,
-              nodeName, ZkStateReader.CORE_NAME_PROP, coreName,
-              ZkStateReader.SHARD_ID_PROP, shardId,
-              ZkStateReader.COLLECTION_PROP, collection,
-              ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName);
-          ShardLeaderElectionContextBase ctx = new ShardLeaderElectionContextBase(
-              elector, shardId, collection, nodeName + "_" + coreName, props,
-              zkStateReader);
-          elector.setup(ctx);
-          elector.joinElection(ctx, false);
-          return shardId;
+      if (collection.length() > 0) {
+        for (int i = 0; i < 120; i++) {
+          String shardId = getShardId(collection, coreNodeName);
+          if (shardId != null) {
+            try {
+              zkClient.makePath("/collections/" + collection + "/leader_elect/"
+                  + shardId + "/election", true);
+            } catch (NodeExistsException nee) {}
+            ZkNodeProps props = new ZkNodeProps(ZkStateReader.BASE_URL_PROP,
+                "http://" + nodeName + "/solr/", ZkStateReader.NODE_NAME_PROP,
+                nodeName, ZkStateReader.CORE_NAME_PROP, coreName,
+                ZkStateReader.SHARD_ID_PROP, shardId,
+                ZkStateReader.COLLECTION_PROP, collection,
+                ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName);
+            ShardLeaderElectionContextBase ctx = new ShardLeaderElectionContextBase(
+                elector, shardId, collection, nodeName + "_" + coreName, props,
+                zkStateReader);
+            elector.setup(ctx);
+            elector.joinElection(ctx, false);
+            return shardId;
+          }
+          Thread.sleep(500);
         }
-        Thread.sleep(500);
       }
       return null;
     }
     
-    private String getShardId(final String baseUrl, final String coreName) {
-      Map<String,Slice> slices = zkStateReader.getClusterState().getSlicesMap(
-          collection);
+    private String getShardId(String collection, String coreNodeName) {
+      Map<String,Slice> slices = zkStateReader.getClusterState().getSlicesMap(collection);
       if (slices != null) {
         for (Slice slice : slices.values()) {
           for (Replica replica : slice.getReplicas()) {
-            // TODO: for really large clusters, we could 'index' on this
-            String rbaseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP);
-            String rcore = replica.getStr(ZkStateReader.CORE_NAME_PROP);
-            if (baseUrl.equals(rbaseUrl) && coreName.equals(rcore)) {
+            String cnn = replica.getName();
+            if (coreNodeName.equals(cnn)) {
               return slice.getName();
             }
           }
@@ -226,17 +225,17 @@ public class OverseerTest extends SolrTestCaseJ4 {
       ZkStateReader reader = new ZkStateReader(zkClient);
       reader.createClusterStateWatchersAndUpdate();
       
-      zkController = new MockZKController(server.getZkAddress(), "127.0.0.1", "collection1");
+      zkController = new MockZKController(server.getZkAddress(), "127.0.0.1");
 
       final int numShards=6;
       
       for (int i = 0; i < numShards; i++) {
-        assertNotNull("shard got no id?", zkController.publishState("core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3));
+        assertNotNull("shard got no id?", zkController.publishState(collection, "core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3));
       }
-
-      assertEquals(2, reader.getClusterState().getSlice("collection1", "shard1").getReplicasMap().size());
-      assertEquals(2, reader.getClusterState().getSlice("collection1", "shard2").getReplicasMap().size());
-      assertEquals(2, reader.getClusterState().getSlice("collection1", "shard3").getReplicasMap().size());
+      Map<String,Replica> rmap = reader.getClusterState().getSlice("collection1", "shard1").getReplicasMap();
+      assertEquals(rmap.toString(), 2, rmap.size());
+      assertEquals(rmap.toString(), 2, reader.getClusterState().getSlice("collection1", "shard2").getReplicasMap().size());
+      assertEquals(rmap.toString(), 2, reader.getClusterState().getSlice("collection1", "shard3").getReplicasMap().size());
       
       //make sure leaders are in cloud state
       assertNotNull(reader.getLeaderUrl("collection1", "shard1", 15000));
@@ -258,6 +257,81 @@ public class OverseerTest extends SolrTestCaseJ4 {
     }
   }
 
+  @Test
+  public void testBadQueueItem() throws Exception {
+    String zkDir = dataDir.getAbsolutePath() + File.separator
+        + "zookeeper/server1/data";
+
+    ZkTestServer server = new ZkTestServer(zkDir);
+
+    MockZKController zkController = null;
+    SolrZkClient zkClient = null;
+    SolrZkClient overseerClient = null;
+
+    try {
+      server.run();
+      AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
+      AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
+      
+      zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
+      zkClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
+
+      overseerClient = electNewOverseer(server.getZkAddress());
+
+      ZkStateReader reader = new ZkStateReader(zkClient);
+      reader.createClusterStateWatchersAndUpdate();
+      
+      zkController = new MockZKController(server.getZkAddress(), "127.0.0.1");
+
+      final int numShards=3;
+      
+      for (int i = 0; i < numShards; i++) {
+        assertNotNull("shard got no id?", zkController.publishState(collection, "core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3));
+      }
+
+      assertEquals(1, reader.getClusterState().getSlice(collection, "shard1").getReplicasMap().size());
+      assertEquals(1, reader.getClusterState().getSlice(collection, "shard2").getReplicasMap().size());
+      assertEquals(1, reader.getClusterState().getSlice(collection, "shard3").getReplicasMap().size());
+      
+      //make sure leaders are in cloud state
+      assertNotNull(reader.getLeaderUrl(collection, "shard1", 15000));
+      assertNotNull(reader.getLeaderUrl(collection, "shard2", 15000));
+      assertNotNull(reader.getLeaderUrl(collection, "shard3", 15000));
+      
+      // publish a bad queue item
+      String emptyCollectionName = "";
+      zkController.publishState(emptyCollectionName, "core0", "node0", ZkStateReader.ACTIVE, 1);
+      zkController.publishState(emptyCollectionName, "core0", "node0", null, 1);
+      
+      // make sure the Overseer is still processing items
+      for (int i = 0; i < numShards; i++) {
+        assertNotNull("shard got no id?", zkController.publishState("collection2", "core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3));
+      }
+
+      assertEquals(1, reader.getClusterState().getSlice("collection2", "shard1").getReplicasMap().size());
+      assertEquals(1, reader.getClusterState().getSlice("collection2", "shard2").getReplicasMap().size());
+      assertEquals(1, reader.getClusterState().getSlice("collection2", "shard3").getReplicasMap().size());
+      
+      //make sure leaders are in cloud state
+      assertNotNull(reader.getLeaderUrl("collection2", "shard1", 15000));
+      assertNotNull(reader.getLeaderUrl("collection2", "shard2", 15000));
+      assertNotNull(reader.getLeaderUrl("collection2", "shard3", 15000));
+      
+    } finally {
+      if (DEBUG) {
+        if (zkController != null) {
+          zkClient.printLayoutToStdOut();
+        }
+      }
+      close(zkClient);
+      if (zkController != null) {
+        zkController.close();
+      }
+      close(overseerClient);
+      server.shutdown();
+    }
+  }
+  
   @Test
   public void testShardAssignmentBigger() throws Exception {
     String zkDir = dataDir.getAbsolutePath() + File.separator
@@ -289,7 +363,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
       reader.createClusterStateWatchersAndUpdate();
 
       for (int i = 0; i < nodeCount; i++) {
-        controllers[i] = new MockZKController(server.getZkAddress(), "node" + i, "collection1");
+        controllers[i] = new MockZKController(server.getZkAddress(), "node" + i);
       }      
       for (int i = 0; i < nodeCount; i++) {
         nodeExecutors[i] = Executors.newFixedThreadPool(1, new DefaultSolrThreadFactory("testShardAssignment"));
@@ -306,7 +380,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
             final String coreName = "core" + slot;
             
             try {
-              ids[slot]=controllers[slot % nodeCount].publishState(coreName, "node" + slot, ZkStateReader.ACTIVE, sliceCount);
+              ids[slot]=controllers[slot % nodeCount].publishState(collection, coreName, "node" + slot, ZkStateReader.ACTIVE, sliceCount);
             } catch (Throwable e) {
               e.printStackTrace();
               fail("register threw exception:" + e.getClass());
@@ -551,21 +625,20 @@ public class OverseerTest extends SolrTestCaseJ4 {
       reader = new ZkStateReader(zkClient);
       reader.createClusterStateWatchersAndUpdate();
       
-      mockController = new MockZKController(server.getZkAddress(), "node1",
-          "collection1");
+      mockController = new MockZKController(server.getZkAddress(), "node1");
       
       overseerClient = electNewOverseer(server.getZkAddress());
       
       Thread.sleep(1000);
-      mockController.publishState("core1", "core_node1",
+      mockController.publishState(collection, "core1", "core_node1",
           ZkStateReader.RECOVERING, 1);
       
-      waitForCollections(reader, "collection1");
+      waitForCollections(reader, collection);
       verifyStatus(reader, ZkStateReader.RECOVERING);
       
       int version = getClusterStateVersion(zkClient);
       
-      mockController.publishState("core1", "core_node1", ZkStateReader.ACTIVE,
+      mockController.publishState(collection, "core1", "core_node1", ZkStateReader.ACTIVE,
           1);
       
       while (version == getClusterStateVersion(zkClient));
@@ -575,7 +648,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
       overseerClient.close();
       Thread.sleep(1000); // wait for overseer to get killed
       
-      mockController.publishState("core1", "core_node1",
+      mockController.publishState(collection, "core1", "core_node1",
           ZkStateReader.RECOVERING, 1);
       version = getClusterStateVersion(zkClient);
       
@@ -588,13 +661,13 @@ public class OverseerTest extends SolrTestCaseJ4 {
       assertEquals("Live nodes count does not match", 1, reader
           .getClusterState().getLiveNodes().size());
       assertEquals("Shard count does not match", 1, reader.getClusterState()
-          .getSlice("collection1", "shard1").getReplicasMap().size());
+          .getSlice(collection, "shard1").getReplicasMap().size());
       version = getClusterStateVersion(zkClient);
-      mockController.publishState("core1", "core_node1", null, 1);
+      mockController.publishState(collection, "core1", "core_node1", null, 1);
       while (version == getClusterStateVersion(zkClient));
       Thread.sleep(500);
       assertFalse("collection1 should be gone after publishing the null state",
-          reader.getClusterState().getCollections().contains("collection1"));
+          reader.getClusterState().getCollections().contains(collection));
     } finally {
       close(mockController);
       close(overseerClient);
@@ -676,17 +749,17 @@ public class OverseerTest extends SolrTestCaseJ4 {
 
       for (int i = 0; i < atLeast(4); i++) {
         killCounter.incrementAndGet(); //for each round allow 1 kill
-        mockController = new MockZKController(server.getZkAddress(), "node1", "collection1");
-        mockController.publishState("core1", "node1", "state1",1);
+        mockController = new MockZKController(server.getZkAddress(), "node1");
+        mockController.publishState(collection, "core1", "node1", "state1",1);
         if(mockController2!=null) {
           mockController2.close();
           mockController2 = null;
         }
-        mockController.publishState("core1", "node1","state2",1);
-        mockController2 = new MockZKController(server.getZkAddress(), "node2", "collection1");
-        mockController.publishState("core1", "node1", "state1",1);
+        mockController.publishState(collection, "core1", "node1","state2",1);
+        mockController2 = new MockZKController(server.getZkAddress(), "node2");
+        mockController.publishState(collection, "core1", "node1", "state1",1);
         verifyShardLeader(reader, "collection1", "shard1", "core1");
-        mockController2.publishState("core4", "node2", "state2" ,1);
+        mockController2.publishState(collection, "core4", "node2", "state2" ,1);
         mockController.close();
         mockController = null;
         verifyShardLeader(reader, "collection1", "shard1", "core4");
@@ -729,11 +802,11 @@ public class OverseerTest extends SolrTestCaseJ4 {
       reader = new ZkStateReader(controllerClient);
       reader.createClusterStateWatchersAndUpdate();
 
-      mockController = new MockZKController(server.getZkAddress(), "node1", "collection1");
+      mockController = new MockZKController(server.getZkAddress(), "node1");
       
       overseerClient = electNewOverseer(server.getZkAddress());
 
-      mockController.publishState("core1", "core_node1", ZkStateReader.RECOVERING, 1);
+      mockController.publishState(collection, "core1", "core_node1", ZkStateReader.RECOVERING, 1);
 
       waitForCollections(reader, "collection1");
       
@@ -743,8 +816,8 @@ public class OverseerTest extends SolrTestCaseJ4 {
 
       int version = getClusterStateVersion(controllerClient);
       
-      mockController = new MockZKController(server.getZkAddress(), "node1", "collection1");
-      mockController.publishState("core1", "core_node1", ZkStateReader.RECOVERING, 1);
+      mockController = new MockZKController(server.getZkAddress(), "node1");
+      mockController.publishState(collection, "core1", "core_node1", ZkStateReader.RECOVERING, 1);
 
       while (version == getClusterStateVersion(controllerClient));
       
@@ -794,11 +867,11 @@ public class OverseerTest extends SolrTestCaseJ4 {
       reader = new ZkStateReader(controllerClient);
       reader.createClusterStateWatchersAndUpdate();
 
-      mockController = new MockZKController(server.getZkAddress(), "node1", "collection1");
+      mockController = new MockZKController(server.getZkAddress(), "node1");
       
       overseerClient = electNewOverseer(server.getZkAddress());
 
-      mockController.publishState("core1", "node1", ZkStateReader.RECOVERING, 12);
+      mockController.publishState(collection, "core1", "node1", ZkStateReader.RECOVERING, 12);
 
       waitForCollections(reader, "collection1");
       

From 3e2a81753955e8e40460b0ac3579dc0b6140d8af Mon Sep 17 00:00:00 2001
From: Mark Robert Miller <markrmiller@apache.org>
Date: Wed, 5 Mar 2014 17:20:36 +0000
Subject: [PATCH 06/38] Fix javadoc spelling.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574577 13f79535-47bb-0310-9956-ffa450edef68
---
 .../src/java/org/apache/solr/common/cloud/ZkStateReader.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
index 4b608943fb2..12dc700c896 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
@@ -626,7 +626,7 @@ public class ZkStateReader {
   }
   
   /**
-   * Returns the baseURL corrisponding to a given node's nodeName --
+   * Returns the baseURL corresponding to a given node's nodeName --
    * NOTE: does not (currently) imply that the nodeName (or resulting 
    * baseURL) exists in the cluster.
    * @lucene.experimental

From e88091b3ddf90800cbfc2f71587774dfea57881d Mon Sep 17 00:00:00 2001
From: Mark Robert Miller <markrmiller@apache.org>
Date: Wed, 5 Mar 2014 17:28:31 +0000
Subject: [PATCH 07/38] SOLR-5811: Additional cleanup.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574580 13f79535-47bb-0310-9956-ffa450edef68
---
 .../java/org/apache/solr/cloud/Overseer.java  | 69 +++++++++----------
 .../org/apache/solr/cloud/ZkController.java   | 16 ++---
 2 files changed, 40 insertions(+), 45 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
index 8bf202ce8ec..1b4f38c2c8e 100644
--- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
@@ -311,7 +311,7 @@ public class Overseer {
     private ClusterState createReplica(ClusterState clusterState, ZkNodeProps message) {
       log.info("createReplica() {} ", message);
       String coll = message.getStr(ZkStateReader.COLLECTION_PROP);
-      checkCollection(message, coll);
+      if (!checkCollectionKeyExistence(message)) return clusterState;
       String slice = message.getStr(ZkStateReader.SHARD_ID_PROP);
       Slice sl = clusterState.getSlice(coll, slice);
       if(sl == null){
@@ -352,7 +352,7 @@ public class Overseer {
 
     private ClusterState updateShardState(ClusterState clusterState, ZkNodeProps message) {
       String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
-      checkCollection(message, collection);
+      if (!checkCollectionKeyExistence(message)) return clusterState;
       log.info("Update shard state invoked for collection: " + collection + " with message: " + message);
       for (String key : message.keySet()) {
         if (ZkStateReader.COLLECTION_PROP.equals(key)) continue;
@@ -377,7 +377,7 @@ public class Overseer {
 
     private ClusterState addRoutingRule(ClusterState clusterState, ZkNodeProps message) {
       String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
-      checkCollection(message, collection);
+      if (!checkCollectionKeyExistence(message)) return clusterState;
       String shard = message.getStr(ZkStateReader.SHARD_ID_PROP);
       String routeKey = message.getStr("routeKey");
       String range = message.getStr("range");
@@ -417,15 +417,22 @@ public class Overseer {
       return clusterState;
     }
 
-    private void checkCollection(ZkNodeProps message, String collection) {
-      if (collection == null || collection.trim().length() == 0) {
-        log.error("Skipping invalid Overseer message because it has no collection specified: " + message);
+    private boolean checkCollectionKeyExistence(ZkNodeProps message) {
+      return checkKeyExistence(message, ZkStateReader.COLLECTION_PROP);
+    }
+    
+    private boolean checkKeyExistence(ZkNodeProps message, String key) {
+      String value = message.getStr(key);
+      if (value == null || value.trim().length() == 0) {
+        log.error("Skipping invalid Overseer message because it has no " + key + " specified: " + message);
+        return false;
       }
+      return true;
     }
 
     private ClusterState removeRoutingRule(ClusterState clusterState, ZkNodeProps message) {
       String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
-      checkCollection(message, collection);
+      if (!checkCollectionKeyExistence(message)) return clusterState;
       String shard = message.getStr(ZkStateReader.SHARD_ID_PROP);
       String routeKeyStr = message.getStr("routeKey");
 
@@ -451,7 +458,7 @@ public class Overseer {
 
     private ClusterState createShard(ClusterState clusterState, ZkNodeProps message) {
       String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
-      checkCollection(message, collection);
+      if (!checkCollectionKeyExistence(message)) return clusterState;
       String shardId = message.getStr(ZkStateReader.SHARD_ID_PROP);
       Slice slice = clusterState.getSlice(collection, shardId);
       if (slice == null)  {
@@ -498,7 +505,7 @@ public class Overseer {
 
     private ClusterState updateStateNew(ClusterState clusterState, ZkNodeProps message) {
       String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
-      checkCollection(message, collection);
+      if (!checkCollectionKeyExistence(message)) return clusterState;
       String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP);
 
       if(collection==null || sliceName == null){
@@ -517,30 +524,30 @@ public class Overseer {
       /**
        * Try to assign core to the cluster. 
        */
-      private ClusterState updateState(ClusterState state, final ZkNodeProps message) {
+      private ClusterState updateState(ClusterState clusterState, final ZkNodeProps message) {
         final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
-        checkCollection(message, collection);
+        if (!checkCollectionKeyExistence(message)) return clusterState;
         Integer numShards = message.getInt(ZkStateReader.NUM_SHARDS_PROP, null);
         log.info("Update state numShards={} message={}", numShards, message);
 
         List<String> shardNames  = new ArrayList<String>();
 
         //collection does not yet exist, create placeholders if num shards is specified
-        boolean collectionExists = state.hasCollection(collection);
+        boolean collectionExists = clusterState.hasCollection(collection);
         if (!collectionExists && numShards!=null) {
           getShardNames(numShards, shardNames);
-          state = createCollection(state, collection, shardNames, message);
+          clusterState = createCollection(clusterState, collection, shardNames, message);
         }
         String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP);
 
         String coreNodeName = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
         if (coreNodeName == null) {
-          coreNodeName = getAssignedCoreNodeName(state, message);
+          coreNodeName = getAssignedCoreNodeName(clusterState, message);
           if (coreNodeName != null) {
             log.info("node=" + coreNodeName + " is already registered");
           } else {
             // if coreNodeName is null, auto assign one
-            coreNodeName = Assign.assignNode(collection, state);
+            coreNodeName = Assign.assignNode(collection, clusterState);
           }
           message.getProperties().put(ZkStateReader.CORE_NODE_NAME_PROP,
               coreNodeName);
@@ -549,7 +556,7 @@ public class Overseer {
         // use the provided non null shardId
         if (sliceName == null) {
           //get shardId from ClusterState
-          sliceName = getAssignedId(state, coreNodeName, message);
+          sliceName = getAssignedId(clusterState, coreNodeName, message);
           if (sliceName != null) {
             log.info("shard=" + sliceName + " is already registered");
           }
@@ -558,14 +565,14 @@ public class Overseer {
           //request new shardId 
           if (collectionExists) {
             // use existing numShards
-            numShards = state.getCollection(collection).getSlices().size();
+            numShards = clusterState.getCollection(collection).getSlices().size();
             log.info("Collection already exists with " + ZkStateReader.NUM_SHARDS_PROP + "=" + numShards);
           }
-          sliceName = Assign.assignShard(collection, state, numShards);
+          sliceName = Assign.assignShard(collection, clusterState, numShards);
           log.info("Assigning new node to shard shard=" + sliceName);
         }
 
-        Slice slice = state.getSlice(collection, sliceName);
+        Slice slice = clusterState.getSlice(collection, sliceName);
         
         Map<String,Object> replicaProps = new LinkedHashMap<String,Object>();
 
@@ -611,9 +618,9 @@ public class Overseer {
           Map<String,Replica> replicas;
 
           if (slice != null) {
-            state = checkAndCompleteShardSplit(state, collection, coreNodeName, sliceName, replicaProps);
+            clusterState = checkAndCompleteShardSplit(clusterState, collection, coreNodeName, sliceName, replicaProps);
             // get the current slice again because it may have been updated due to checkAndCompleteShardSplit method
-            slice = state.getSlice(collection, sliceName);
+            slice = clusterState.getSlice(collection, sliceName);
             sliceProps = slice.getProperties();
             replicas = slice.getReplicasCopy();
           } else {
@@ -627,7 +634,7 @@ public class Overseer {
           replicas.put(replica.getName(), replica);
           slice = new Slice(sliceName, replicas, sliceProps);
 
-          ClusterState newClusterState = updateSlice(state, collection, slice);
+          ClusterState newClusterState = updateSlice(clusterState, collection, slice);
           return newClusterState;
       }
 
@@ -876,11 +883,9 @@ public class Overseer {
        * Remove collection from cloudstate
        */
       private ClusterState removeCollection(final ClusterState clusterState, ZkNodeProps message) {
-
         final String collection = message.getStr("name");
-        checkCollection(message, collection);
+        if (!checkKeyExistence(message, "name")) return clusterState;
 
-//        ClusterState newState = new ClusterState(clusterState.getLiveNodes(), newCollections);
         return clusterState.copyWith(singletonMap(collection, (DocCollection)null));
       }
 
@@ -888,34 +893,28 @@ public class Overseer {
      * Remove collection slice from cloudstate
      */
     private ClusterState removeShard(final ClusterState clusterState, ZkNodeProps message) {
-      final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
-      checkCollection(message, collection);
       final String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
+      final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
+      if (!checkCollectionKeyExistence(message)) return clusterState;
 
       log.info("Removing collection: " + collection + " shard: " + sliceId + " from clusterstate");
 
-//      final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
       DocCollection coll = clusterState.getCollection(collection);
 
       Map<String, Slice> newSlices = new LinkedHashMap<String, Slice>(coll.getSlicesMap());
       newSlices.remove(sliceId);
 
       DocCollection newCollection = new DocCollection(coll.getName(), newSlices, coll.getProperties(), coll.getRouter());
-//      newCollections.put(newCollection.getName(), newCollection);
       return newState(clusterState, singletonMap(collection,newCollection));
-
-//     return new ClusterState(clusterState.getLiveNodes(), newCollections);
     }
 
     /*
        * Remove core from cloudstate
        */
       private ClusterState removeCore(final ClusterState clusterState, ZkNodeProps message) {
-        
-        String cnn = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
-
+        final String cnn = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
         final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
-        checkCollection(message, collection);
+        if (!checkCollectionKeyExistence(message)) return clusterState;
 
 //        final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
 //        DocCollection coll = newCollections.get(collection);
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index ac36d4c5e2f..cee27041728 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -1013,7 +1013,8 @@ public final class ZkController {
         core.close();
       }
     }
-    log.info("publishing core={} state={}", cd.getName(), state);
+    String collection = cd.getCloudDescriptor().getCollectionName();
+    log.info("publishing core={} state={} collection={}", cd.getName(), state, collection);
     //System.out.println(Thread.currentThread().getStackTrace()[3]);
     Integer numShards = cd.getCloudDescriptor().getNumShards();
     if (numShards == null) { //XXX sys prop hack
@@ -1021,8 +1022,7 @@ public final class ZkController {
       numShards = Integer.getInteger(ZkStateReader.NUM_SHARDS_PROP);
     }
     
-    assert cd.getCloudDescriptor().getCollectionName() != null && cd.getCloudDescriptor()
-        .getCollectionName().length() > 0;
+    assert collection != null && collection.length() > 0;
     
     String coreNodeName = cd.getCloudDescriptor().getCoreNodeName();
     //assert cd.getCloudDescriptor().getShardId() != null;
@@ -1033,12 +1033,9 @@ public final class ZkController {
         ZkStateReader.ROLES_PROP, cd.getCloudDescriptor().getRoles(),
         ZkStateReader.NODE_NAME_PROP, getNodeName(),
         ZkStateReader.SHARD_ID_PROP, cd.getCloudDescriptor().getShardId(),
-        ZkStateReader.COLLECTION_PROP, cd.getCloudDescriptor()
-            .getCollectionName(),
-        ZkStateReader.NUM_SHARDS_PROP, numShards != null ? numShards.toString()
-            : null,
-        ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName != null ? coreNodeName
-            : null);
+        ZkStateReader.COLLECTION_PROP, collection,
+        ZkStateReader.NUM_SHARDS_PROP, numShards != null ? numShards.toString() : null,
+        ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName != null ? coreNodeName : null);
     if (updateLastState) {
       cd.getCloudDescriptor().lastPublished = state;
     }
@@ -1368,7 +1365,6 @@ public final class ZkController {
 
       CloudDescriptor cloudDesc = cd.getCloudDescriptor();
 
-
       // make sure the node name is set on the descriptor
       if (cloudDesc.getCoreNodeName() == null) {
         cloudDesc.setCoreNodeName(coreNodeName);

From af101b305706ffbb1272281490e1d18fcd86c2db Mon Sep 17 00:00:00 2001
From: Mark Robert Miller <markrmiller@apache.org>
Date: Wed, 5 Mar 2014 17:48:27 +0000
Subject: [PATCH 08/38] SOLR-5813: tests for "" or null collection name -
 should default to core name.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574589 13f79535-47bb-0310-9956-ffa450edef68
---
 .../CollectionsAPIDistributedZkTest.java      | 38 ++++++++++++++++++-
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java
index 310c97d7ffa..e257b5e66ca 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java
@@ -203,14 +203,14 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
     testCollectionsAPI();
     testCollectionsAPIAddRemoveStress();
     testErrorHandling();
+    testNoCollectionSpecified();
     deletePartiallyCreatedCollection();
     deleteCollectionRemovesStaleZkCollectionsNode();
     clusterPropTest();
-
     addReplicaTest();
-
     // last
     deleteCollectionWithDownNodes();
+    
     if (DEBUG) {
       super.printLayout();
     }
@@ -578,6 +578,40 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
     String val2 = failure.getVal(0).toString();
     assertTrue(val1.contains("SolrException") || val2.contains("SolrException"));
   }
+  
+  private void testNoCollectionSpecified() throws Exception {
+    
+    cloudClient.getZkStateReader().updateClusterState(true);
+    assertFalse(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection"));
+    assertFalse(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection2"));
+    
+    // try and create a SolrCore with no collection name
+    Create createCmd = new Create();
+    createCmd.setCoreName("corewithnocollection");
+    createCmd.setCollection("");
+    String dataDir = SolrTestCaseJ4.dataDir.getAbsolutePath() + File.separator
+        + System.currentTimeMillis() + "corewithnocollection" + "_1v";
+    createCmd.setDataDir(dataDir);
+    createCmd.setNumShards(1);
+    if (secondConfigSet) {
+      createCmd.setCollectionConfigName("conf1");
+    }
+    
+    createNewSolrServer("", getBaseUrl((HttpSolrServer) clients.get(1)))
+        .request(createCmd);
+    
+    // try and create a SolrCore with no collection name
+    createCmd.setCollection(null);
+    createCmd.setCoreName("corewithnocollection2");
+    
+    createNewSolrServer("", getBaseUrl((HttpSolrServer) clients.get(1)))
+        .request(createCmd);
+    
+    // in both cases, the collection should have default to the core name
+    cloudClient.getZkStateReader().updateClusterState(true);
+    assertTrue(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection"));
+    assertTrue(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection2"));
+  }
 
   private void testNodesUsedByCreate() throws Exception {
     // we can use this client because we just want base url

From f54178970e5ab2d6a77ace8e45f735b2168e2c87 Mon Sep 17 00:00:00 2001
From: "Chris M. Hostetter" <hossman@apache.org>
Date: Wed, 5 Mar 2014 18:05:25 +0000
Subject: [PATCH 09/38] LUCENE-5472: IndexWriter.addDocument will now throw an
 IllegalArgumentException if a Term to be indexed exceeds
 IndexWriter.MAX_TERM_LENGTH

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574595 13f79535-47bb-0310-9956-ffa450edef68
---
 lucene/CHANGES.txt                            |   7 +
 .../lucene/index/DocFieldProcessor.java       |   5 -
 .../lucene/index/DocInverterPerField.java     |  12 +-
 .../org/apache/lucene/index/IndexWriter.java  |   7 +-
 .../lucene/index/TermsHashPerField.java       |  11 +-
 .../lucene/index/TestExceedMaxTermLength.java | 105 ++++++++++++
 .../apache/lucene/index/TestIndexWriter.java  |  26 +--
 solr/CHANGES.txt                              |  16 +-
 .../solr/collection1/conf/schema11.xml        |  13 ++
 .../solr/update/TestExceedMaxTermLength.java  | 153 ++++++++++++++++++
 10 files changed, 326 insertions(+), 29 deletions(-)
 create mode 100644 lucene/core/src/test/org/apache/lucene/index/TestExceedMaxTermLength.java
 create mode 100644 solr/core/src/test/org/apache/solr/update/TestExceedMaxTermLength.java

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index cca653dc97a..5f4511a6d8c 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -68,6 +68,13 @@ Optimizations
 
 ======================= Lucene 4.8.0 =======================
 
+Changes in Runtime Behavior
+
+* LUCENE-5472: IndexWriter.addDocument will now throw an IllegalArgumentException 
+  if a Term to be indexed exceeds IndexWriter.MAX_TERM_LENGTH.  To recreate previous
+  behavior of silently ignoring these terms, use LengthFilter in your Analyzer.
+  (hossman, Mike McCandless, Varun Thacker)
+
 New Features
 
 * LUCENE-5454: Add SortedSetSortField to lucene/sandbox, to allow sorting
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java
index c35b914fc47..23c60a09c25 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java
@@ -209,11 +209,6 @@ final class DocFieldProcessor extends DocConsumer {
       final DocFieldProcessorPerField perField = fields[i];
       perField.consumer.processFields(perField.fields, perField.fieldCount);
     }
-
-    if (docState.maxTermPrefix != null && docState.infoStream.isEnabled("IW")) {
-      docState.infoStream.message("IW", "WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
-      docState.maxTermPrefix = null;
-    }
   }
 
   private DocFieldProcessorPerField processField(FieldInfos.Builder fieldInfos,
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java b/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java
index df21f211b24..39167f4c79c 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java
@@ -23,7 +23,6 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
-import org.apache.lucene.util.IOUtils;
 
 /**
  * Holds state for inverting all occurrences of a single
@@ -182,6 +181,17 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
           // when we come back around to the field...
           fieldState.position += posIncrAttribute.getPositionIncrement();
           fieldState.offset += offsetAttribute.endOffset();
+
+
+          if (docState.maxTermPrefix != null) {
+            final String msg = "Document contains at least one immense term in field=\"" + fieldInfo.name + "\" (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'";
+            if (docState.infoStream.isEnabled("IW")) {
+              docState.infoStream.message("IW", "ERROR: " + msg);
+            }
+            docState.maxTermPrefix = null;
+            throw new IllegalArgumentException(msg);
+          }
+
           /* if success was false above there is an exception coming through and we won't get here.*/
           succeededInProcessingField = true;
         } finally {
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index 0a792b8d1dd..ce54da7db6d 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -207,8 +207,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
   /**
    * Absolute hard maximum length for a term, in bytes once
    * encoded as UTF8.  If a term arrives from the analyzer
-   * longer than this length, it is skipped and a message is
-   * printed to infoStream, if set (see {@link
+   * longer than this length, an
+   * <code>IllegalArgumentException</code>  is thrown
+   * and a message is printed to infoStream, if set (see {@link
    * IndexWriterConfig#setInfoStream(InfoStream)}).
    */
   public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8;
@@ -1159,7 +1160,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
    * merge policy.
    *
    * <p>Note that each term in the document can be no longer
-   * than 16383 characters, otherwise an
+   * than {@link #MAX_TERM_LENGTH} in bytes, otherwise an
    * IllegalArgumentException will be thrown.</p>
    *
    * <p>Note that it's possible to create an invalid Unicode
diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java b/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java
index bb67d642c3b..aa4fcba2647 100644
--- a/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java
@@ -179,12 +179,11 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
     try {
       termID = bytesHash.add(termBytesRef, termAtt.fillBytesRef());
     } catch (MaxBytesLengthExceededException e) {
-      // Not enough room in current block
-      // Just skip this term, to remain as robust as
-      // possible during indexing.  A TokenFilter
-      // can be inserted into the analyzer chain if
-      // other behavior is wanted (pruning the term
-      // to a prefix, throwing an exception, etc).
+      // Term is too large; record this here (can't throw an
+      // exc because DocInverterPerField will then abort the
+      // entire segment) and then throw an exc later in
+      // DocInverterPerField.java.  LengthFilter can always be
+      // used to prune the term before indexing:
       if (docState.maxTermPrefix == null) {
         final int saved = termBytesRef.length;
         try {
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestExceedMaxTermLength.java b/lucene/core/src/test/org/apache/lucene/index/TestExceedMaxTermLength.java
new file mode 100644
index 00000000000..ec8ea99d753
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/index/TestExceedMaxTermLength.java
@@ -0,0 +1,105 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+import java.io.IOException;
+
+import org.junit.Before;
+import org.junit.After;
+
+/**
+ * Tests that a useful exception is thrown when attempting to index a term that is 
+ * too large
+ *
+ * @see IndexWriter#MAX_TERM_LENGTH
+ */
+public class TestExceedMaxTermLength extends LuceneTestCase {
+
+  private final static int minTestTermLength = IndexWriter.MAX_TERM_LENGTH + 1;
+  private final static int maxTestTermLegnth = IndexWriter.MAX_TERM_LENGTH * 2;
+
+  Directory dir = null;
+
+  @Before
+  public void createDir() {
+    dir = newDirectory();
+  }
+  @After
+  public void destroyDir() throws IOException {
+    dir.close();
+    dir = null;
+  }
+
+  public void test() throws Exception {
+    
+    IndexWriter w = new IndexWriter
+      (dir, newIndexWriterConfig(random(), 
+                                 TEST_VERSION_CURRENT,
+                                 new MockAnalyzer(random())));
+    try {
+      final FieldType ft = new FieldType();
+      ft.setIndexed(true);
+      ft.setStored(random().nextBoolean());
+      ft.freeze();
+      
+      final Document doc = new Document();
+      if (random().nextBoolean()) {
+        // totally ok short field value
+        doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10),
+                          TestUtil.randomSimpleString(random(), 1, 10),
+                          ft));
+      }
+      // problematic field
+      final String name = TestUtil.randomSimpleString(random(), 1, 50);
+      final String value = TestUtil.randomSimpleString(random(),
+                                                       minTestTermLength,
+                                                       maxTestTermLegnth);
+      final Field f = new Field(name, value, ft);
+      if (random().nextBoolean()) {
+        // totally ok short field value
+        doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10),
+                          TestUtil.randomSimpleString(random(), 1, 10),
+                          ft));
+      }
+      doc.add(f);
+      
+      try {
+        w.addDocument(doc);
+        fail("Did not get an exception from adding a monster term");
+      } catch (IllegalArgumentException e) {
+        final String maxLengthMsg = String.valueOf(IndexWriter.MAX_TERM_LENGTH);
+        final String msg = e.getMessage();
+        assertTrue("IllegalArgumentException didn't mention 'immense term': " + msg,
+                   msg.contains("immense term"));
+        assertTrue("IllegalArgumentException didn't mention max length ("+maxLengthMsg+"): " + msg,
+                   msg.contains(maxLengthMsg));
+        assertTrue("IllegalArgumentException didn't mention field name ("+name+"): " + msg,
+                   msg.contains(name));
+      }
+    } finally {
+      w.close();
+    }
+  }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
index cf8a1ba5b16..3284dde2782 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
@@ -1660,32 +1660,32 @@ public class TestIndexWriter extends LuceneTestCase {
     // This contents produces a too-long term:
     String contents = "abc xyz x" + bigTerm + " another term";
     doc.add(new TextField("content", contents, Field.Store.NO));
-    w.addDocument(doc);
+    try {
+      w.addDocument(doc);
+      fail("should have hit exception");
+    } catch (IllegalArgumentException iae) {
+      // expected
+    }
 
     // Make sure we can add another normal document
     doc = new Document();
     doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO));
     w.addDocument(doc);
 
+    // So we remove the deleted doc:
+    w.forceMerge(1);
+
     IndexReader reader = w.getReader();
     w.close();
 
     // Make sure all terms < max size were indexed
-    assertEquals(2, reader.docFreq(new Term("content", "abc")));
+    assertEquals(1, reader.docFreq(new Term("content", "abc")));
     assertEquals(1, reader.docFreq(new Term("content", "bbb")));
-    assertEquals(1, reader.docFreq(new Term("content", "term")));
-    assertEquals(1, reader.docFreq(new Term("content", "another")));
+    assertEquals(0, reader.docFreq(new Term("content", "term")));
 
-    // Make sure position is still incremented when
-    // massive term is skipped:
-    DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, null, "content", new BytesRef("another"));
-    assertEquals(0, tps.nextDoc());
-    assertEquals(1, tps.freq());
-    assertEquals(3, tps.nextPosition());
-
-    // Make sure the doc that has the massive term is in
+    // Make sure the doc that has the massive term is NOT in
     // the index:
-    assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());
+    assertEquals("document with wicked long term is in the index!", 1, reader.numDocs());
 
     reader.close();
     dir.close();
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index b9b572cc005..63d84a170c4 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -76,7 +76,16 @@ Velocity 1.7 and Velocity Tools 2.0
 Apache UIMA 2.3.1
 Apache ZooKeeper 3.4.5
 
-                      
+Upgrading from Solr 4.7
+----------------------
+
+* In previous versions of Solr, Terms that exceeded Lucene's MAX_TERM_LENGTH were
+  silently ignored when indexing documents.  Begining with Solr 4.8, a document
+  an error will be generated when attempting to index a document with a term
+  that is too large.  If you wish to continue to have large terms ignored,
+  use "solr.LengthFilterFactory" in all of your Analyzers.  See LUCENE-5472 for
+  more details.
+
 Detailed Change List
 ----------------------
 
@@ -154,6 +163,11 @@ Other Changes
   registration exists, wait a short time to see if it goes away.
   (Mark Miller)
 
+* LUCENE-5472: IndexWriter.addDocument will now throw an IllegalArgumentException 
+  if a Term to be indexed exceeds IndexWriter.MAX_TERM_LENGTH.  To recreate previous
+  behavior of silently ignoring these terms, use LengthFilter in your Analyzer.
+  (hossman, Mike McCandless, Varun Thacker)
+
 ==================  4.7.0 ==================
 
 Versions of Major Components
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema11.xml b/solr/core/src/test-files/solr/collection1/conf/schema11.xml
index a993cbd6f61..ea4edd553aa 100755
--- a/solr/core/src/test-files/solr/collection1/conf/schema11.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema11.xml
@@ -287,6 +287,16 @@ valued. -->
                class="solr.ExternalFileField"/>
 
     <fieldType name="text_no_analyzer" stored="false" indexed="true" class="solr.TextField" />
+
+    <fieldtype name="text_length" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StandardFilterFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.LengthFilterFactory" min="2" max="32768"/>
+      </analyzer>
+    </fieldtype>  
+
  </types>
 
 
@@ -324,6 +334,9 @@ valued. -->
 
    <field name="_version_" type="long" indexed="true" stored="true" multiValued="false" />
 
+   <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
+   <field name="cat_length" type="text_length" indexed="true" stored="true" multiValued="true"/>   
+
    <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
         will be used if the name matches any of the patterns.
         RESTRICTION: the glob-like pattern in the name attribute must have
diff --git a/solr/core/src/test/org/apache/solr/update/TestExceedMaxTermLength.java b/solr/core/src/test/org/apache/solr/update/TestExceedMaxTermLength.java
new file mode 100644
index 00000000000..ea34bf97e08
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/update/TestExceedMaxTermLength.java
@@ -0,0 +1,153 @@
+package org.apache.solr.update;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.util.TestUtil;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.SolrTestCaseJ4;
+
+import java.util.Locale;
+
+import org.junit.After;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestExceedMaxTermLength extends SolrTestCaseJ4 {
+
+  public final static String TEST_SOLRCONFIG_NAME = "solrconfig.xml";
+  public final static String TEST_SCHEMAXML_NAME = "schema11.xml";
+
+  private final static int minTestTermLength = IndexWriter.MAX_TERM_LENGTH + 1;
+  private final static int maxTestTermLegnth = IndexWriter.MAX_TERM_LENGTH * 2;
+
+  @BeforeClass
+  public static void beforeTests() throws Exception {
+    initCore(TEST_SOLRCONFIG_NAME, TEST_SCHEMAXML_NAME);
+  }
+
+  @After
+  public void cleanup() throws Exception {
+    assertU(delQ("*:*"));
+    assertU(commit());
+  }
+
+  @Test
+  public void testExceededMaxTermLength(){
+
+    // problematic field
+    final String longFieldName = "cat";
+    final String longFieldValue = TestUtil.randomSimpleString(random(),
+        minTestTermLength,
+        maxTestTermLegnth);
+
+    final String okayFieldName = TestUtil.randomSimpleString(random(), 1, 50) + "_sS" ; //Dynamic field
+    final String okayFieldValue = TestUtil.randomSimpleString(random(),
+        minTestTermLength,
+        maxTestTermLegnth);
+
+    boolean includeOkayFields = random().nextBoolean();
+
+    if(random().nextBoolean()) {
+      //Use XML
+      String doc;
+      if(includeOkayFields) {
+        doc = adoc("id", "1", longFieldName, longFieldValue, okayFieldName, okayFieldValue);
+      } else {
+        doc = adoc("id", "1", longFieldName, longFieldValue);
+      }
+      assertFailedU(doc);
+    } else {
+      //Use JSON
+      try {
+        if(includeOkayFields) {
+          String jsonStr = "[{'id':'1','%s':'%s', '%s': '%s'}]";
+          jsonStr = String.format(Locale.ROOT, jsonStr, longFieldName, longFieldValue, 
+                                  okayFieldName, okayFieldValue);
+          updateJ(json(jsonStr), null);
+        } else {
+          String jsonStr = "[{'id':'1','%s':'%s'}]";
+          jsonStr = String.format(Locale.ROOT, jsonStr, longFieldName, longFieldValue);
+          updateJ(json(jsonStr), null);
+        }
+      } catch (Exception e) {
+        //expected
+        String msg= e.getCause().getMessage();
+        assertTrue(msg.contains("one immense term in field=\"cat\""));
+      }
+
+    }
+
+    assertU(commit());
+
+    assertQ(req("q", "*:*"), "//*[@numFound='0']");
+  }
+
+  @Test
+  public void testExceededMaxTermLengthWithLimitingFilter(){
+
+    // problematic field
+    final String longFieldName = "cat_length";
+    final String longFieldValue = TestUtil.randomSimpleString(random(),
+        minTestTermLength,
+        maxTestTermLegnth);
+
+    final String okayFieldName = TestUtil.randomSimpleString(random(), 1, 50) + "_sS" ; //Dynamic field
+    final String okayFieldValue = TestUtil.randomSimpleString(random(),
+        minTestTermLength,
+        maxTestTermLegnth);
+
+    boolean includeOkayFields = random().nextBoolean();
+
+    if(random().nextBoolean()) {
+      //Use XML
+      String doc;
+      if(includeOkayFields) {
+        doc = adoc("id", "1", longFieldName, longFieldValue, okayFieldName, okayFieldValue);
+      } else {
+        doc = adoc("id", "1", longFieldName, longFieldValue);
+      }
+      assertU(doc);
+    } else {
+      //Use JSON
+      String jsonStr = null;
+      try {
+        if(includeOkayFields) {
+          jsonStr = "[{'id':'1','%s':'%s', '%s': '%s'}]";
+          jsonStr = String.format(Locale.ROOT, jsonStr, longFieldName, longFieldValue, 
+                                  okayFieldName, okayFieldValue);
+          updateJ(json(jsonStr), null);
+        } else {
+          jsonStr = "[{'id':'1','%s':'%s'}]";
+          jsonStr = String.format(Locale.ROOT, jsonStr, longFieldName, longFieldValue);
+          updateJ(json(jsonStr), null);
+        }
+      } catch (Exception e) {
+        //expected
+        fail("Should not have failed adding doc " + jsonStr);
+        String msg= e.getCause().getMessage();
+        assertTrue(msg.contains("one immense term in field=\"cat\""));
+      }
+
+    }
+
+    assertU(commit());
+
+    assertQ(req("q", "*:*"), "//*[@numFound='1']");
+  }
+}

From f9124b8d038a56c13a1b4690ad18cde18e350a2d Mon Sep 17 00:00:00 2001
From: Mark Robert Miller <markrmiller@apache.org>
Date: Wed, 5 Mar 2014 19:41:51 +0000
Subject: [PATCH 10/38] SOLR-5796: Increase how long we are willing to wait for
 a core to see the ZK advertised leader in it's local state. SOLR-5796: Make
 how long we are willing to wait for a core to see the ZK advertised leader in
 it's local state configurable.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574638 13f79535-47bb-0310-9956-ffa450edef68
---
 solr/CHANGES.txt                              |  7 ++++++
 .../org/apache/solr/cloud/ZkController.java   | 24 +++++++++++++++----
 .../java/org/apache/solr/core/ConfigSolr.java |  6 +++++
 .../org/apache/solr/core/ConfigSolrXml.java   |  2 ++
 .../org/apache/solr/core/ZkContainer.java     |  9 +++----
 .../apache/solr/cloud/ZkControllerTest.java   |  6 ++---
 6 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 63d84a170c4..4d10404fc78 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -127,6 +127,9 @@ Bug Fixes
 
 * SOLR-5811: The Overseer will retry work items until success, which is a serious
   problem if you hit a bad work item. (Mark Miller)
+  
+* SOLR-5796: Increase how long we are willing to wait for a core to see the ZK
+  advertised leader in it's local state. (Timothy Potter, Mark Miller)  
 
 Optimizations
 ----------------------
@@ -168,6 +171,10 @@ Other Changes
   behavior of silently ignoring these terms, use LengthFilter in your Analyzer.
   (hossman, Mike McCandless, Varun Thacker)
 
+* SOLR-5796: Make how long we are willing to wait for a core to see the ZK
+  advertised leader in it's local state configurable. 
+  (Timothy Potter via Mark Miller)
+
 ==================  4.7.0 ==================
 
 Versions of Major Components
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index cee27041728..053599818fd 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -160,16 +160,19 @@ public final class ZkController {
   protected volatile Overseer overseer;
 
   private int leaderVoteWait;
+  private int leaderConflictResolveWait;
   
   private boolean genericCoreNodeNames;
 
   private int clientTimeout;
 
   private volatile boolean isClosed;
-
+  
   public ZkController(final CoreContainer cc, String zkServerAddress, int zkClientTimeout, int zkClientConnectTimeout, String localHost, String locaHostPort,
-      String localHostContext, int leaderVoteWait, boolean genericCoreNodeNames, final CurrentCoreDescriptorProvider registerOnReconnect) throws InterruptedException,
-      TimeoutException, IOException {
+        String localHostContext, int leaderVoteWait, int leaderConflictResolveWait, boolean genericCoreNodeNames, final CurrentCoreDescriptorProvider registerOnReconnect) 
+      throws InterruptedException, TimeoutException, IOException
+  {
+
     if (cc == null) throw new IllegalArgumentException("CoreContainer cannot be null.");
     this.cc = cc;
     this.genericCoreNodeNames = genericCoreNodeNames;
@@ -188,6 +191,8 @@ public final class ZkController {
                                      this.localHostContext);
 
     this.leaderVoteWait = leaderVoteWait;
+    this.leaderConflictResolveWait = leaderConflictResolveWait;
+    
     this.clientTimeout = zkClientTimeout;
     zkClient = new SolrZkClient(zkServerAddress, zkClientTimeout,
         zkClientConnectTimeout, new DefaultConnectionStrategy(),
@@ -850,19 +855,28 @@ public final class ZkController {
           shardId, timeoutms * 2); // since we found it in zk, we are willing to
                                    // wait a while to find it in state
       int tries = 0;
+      final long msInSec = 1000L;
+      int maxTries = (int)Math.floor(leaderConflictResolveWait/msInSec);
       while (!leaderUrl.equals(clusterStateLeaderUrl)) {
-        if (tries == 60) {
+        if (tries > maxTries) {
           throw new SolrException(ErrorCode.SERVER_ERROR,
               "There is conflicting information about the leader of shard: "
                   + cloudDesc.getShardId() + " our state says:"
                   + clusterStateLeaderUrl + " but zookeeper says:" + leaderUrl);
         }
-        Thread.sleep(1000);
+        Thread.sleep(msInSec);
         tries++;
         clusterStateLeaderUrl = zkStateReader.getLeaderUrl(collection, shardId,
             timeoutms);
         leaderUrl = getLeaderProps(collection, cloudDesc.getShardId(), timeoutms)
             .getCoreUrl();
+        
+        if (tries % 30 == 0) {
+          String warnMsg = String.format("Still seeing conflicting information about the leader "
+              + "of shard %s for collection %s after %d seconds; our state says %s, but ZooKeeper says %s",
+              cloudDesc.getShardId(), collection, tries, clusterStateLeaderUrl, leaderUrl);
+          log.warn(warnMsg);
+        }
       }
       
     } catch (Exception e) {
diff --git a/solr/core/src/java/org/apache/solr/core/ConfigSolr.java b/solr/core/src/java/org/apache/solr/core/ConfigSolr.java
index 67810d3566b..e4e0b254006 100644
--- a/solr/core/src/java/org/apache/solr/core/ConfigSolr.java
+++ b/solr/core/src/java/org/apache/solr/core/ConfigSolr.java
@@ -138,6 +138,7 @@ public abstract class ConfigSolr {
 
   private static final int DEFAULT_ZK_CLIENT_TIMEOUT = 15000;
   private static final int DEFAULT_LEADER_VOTE_WAIT = 180000;  // 3 minutes
+  private static final int DEFAULT_LEADER_CONFLICT_RESOLVE_WAIT = 180000;
   private static final int DEFAULT_CORE_LOAD_THREADS = 3;
 
   protected static final String DEFAULT_CORE_ADMIN_PATH = "/admin/cores";
@@ -157,6 +158,10 @@ public abstract class ConfigSolr {
   public int getLeaderVoteWait() {
     return getInt(CfgProp.SOLR_LEADERVOTEWAIT, DEFAULT_LEADER_VOTE_WAIT);
   }
+  
+  public int getLeaderConflictResolveWait() {
+    return getInt(CfgProp.SOLR_LEADERCONFLICTRESOLVEWAIT, DEFAULT_LEADER_CONFLICT_RESOLVE_WAIT);
+  }
 
   public boolean getGenericCoreNodeNames() {
     return getBool(CfgProp.SOLR_GENERICCORENODENAMES, false);
@@ -255,6 +260,7 @@ public abstract class ConfigSolr {
     SOLR_GENERICCORENODENAMES,
     SOLR_ZKCLIENTTIMEOUT,
     SOLR_ZKHOST,
+    SOLR_LEADERCONFLICTRESOLVEWAIT,
 
     //TODO: Remove all of these elements for 5.0
     SOLR_PERSISTENT,
diff --git a/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java b/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java
index f55a84b5496..188034d2fd0 100644
--- a/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java
+++ b/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java
@@ -67,6 +67,7 @@ public class ConfigSolrXml extends ConfigSolr {
     failIfFound("solr/cores/@hostContext");
     failIfFound("solr/cores/@hostPort");
     failIfFound("solr/cores/@leaderVoteWait");
+    failIfFound("solr/cores/@leaderConflictResolveWait");
     failIfFound("solr/cores/@genericCoreNodeNames");
     failIfFound("solr/cores/@managementPath");
     failIfFound("solr/cores/@shareSchema");
@@ -113,6 +114,7 @@ public class ConfigSolrXml extends ConfigSolr {
     propMap.put(CfgProp.SOLR_HOSTCONTEXT, doSub("solr/solrcloud/str[@name='hostContext']"));
     propMap.put(CfgProp.SOLR_HOSTPORT, doSub("solr/solrcloud/int[@name='hostPort']"));
     propMap.put(CfgProp.SOLR_LEADERVOTEWAIT, doSub("solr/solrcloud/int[@name='leaderVoteWait']"));
+    propMap.put(CfgProp.SOLR_LEADERCONFLICTRESOLVEWAIT, doSub("solr/solrcloud/int[@name='leaderConflictResolveWait']"));
     propMap.put(CfgProp.SOLR_GENERICCORENODENAMES, doSub("solr/solrcloud/bool[@name='genericCoreNodeNames']"));
     propMap.put(CfgProp.SOLR_MANAGEMENTPATH, doSub("solr/str[@name='managementPath']"));
     propMap.put(CfgProp.SOLR_SHAREDLIB, doSub("solr/str[@name='sharedLib']"));
diff --git a/solr/core/src/java/org/apache/solr/core/ZkContainer.java b/solr/core/src/java/org/apache/solr/core/ZkContainer.java
index 5cf08ee39a5..e0bcd45feaa 100644
--- a/solr/core/src/java/org/apache/solr/core/ZkContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/ZkContainer.java
@@ -66,11 +66,12 @@ public class ZkContainer {
 
     initZooKeeper(cc, solrHome,
         config.getZkHost(), config.getZkClientTimeout(), config.getZkHostPort(), config.getZkHostContext(),
-        config.getHost(), config.getLeaderVoteWait(), config.getGenericCoreNodeNames());
+        config.getHost(), config.getLeaderVoteWait(), config.getLeaderConflictResolveWait(), config.getGenericCoreNodeNames());
   }
-  
+    
   public void initZooKeeper(final CoreContainer cc, String solrHome, String zkHost, int zkClientTimeout, String hostPort,
-                            String hostContext, String host, int leaderVoteWait, boolean genericCoreNodeNames) {
+        String hostContext, String host, int leaderVoteWait, int leaderConflictResolveWait, boolean genericCoreNodeNames) {
+
     ZkController zkController = null;
     
     // if zkHost sys property is not set, we are not using ZooKeeper
@@ -135,7 +136,7 @@ public class ZkContainer {
         }
         zkController = new ZkController(cc, zookeeperHost, zkClientTimeout,
             zkClientConnectTimeout, host, hostPort, hostContext,
-            leaderVoteWait, genericCoreNodeNames,
+            leaderVoteWait, leaderConflictResolveWait, genericCoreNodeNames,
             new CurrentCoreDescriptorProvider() {
 
               @Override
diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java b/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
index 2ea2a2d2fb2..b143e99f5a5 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
@@ -190,7 +190,7 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
       cc = getCoreContainer();
       
       ZkController zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, 10000,
-          "127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() {
+          "127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
             
             @Override
             public List<CoreDescriptor> getCurrentDescriptors() {
@@ -230,7 +230,7 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
       cc = getCoreContainer();
       
       zkController = new ZkController(cc, server.getZkAddress(),
-          TIMEOUT, 10000, "127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() {
+          TIMEOUT, 10000, "127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
             
             @Override
             public List<CoreDescriptor> getCurrentDescriptors() {
@@ -284,7 +284,7 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
 
       try {
         zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, 10000,
-            "http://127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() {
+            "http://127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
 
           @Override
           public List<CoreDescriptor> getCurrentDescriptors() {

From 7f31dc2d7d4c2016d43633d128478eab85bb1d96 Mon Sep 17 00:00:00 2001
From: Mark Robert Miller <markrmiller@apache.org>
Date: Wed, 5 Mar 2014 21:12:41 +0000
Subject: [PATCH 11/38] SOLR-5796: Fix illegal API call to format.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574664 13f79535-47bb-0310-9956-ffa450edef68
---
 solr/core/src/java/org/apache/solr/cloud/ZkController.java | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index 053599818fd..185a50ccb24 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -31,6 +31,7 @@ import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
@@ -872,7 +873,7 @@ public final class ZkController {
             .getCoreUrl();
         
         if (tries % 30 == 0) {
-          String warnMsg = String.format("Still seeing conflicting information about the leader "
+          String warnMsg = String.format(Locale.ENGLISH, "Still seeing conflicting information about the leader "
               + "of shard %s for collection %s after %d seconds; our state says %s, but ZooKeeper says %s",
               cloudDesc.getShardId(), collection, tries, clusterStateLeaderUrl, leaderUrl);
           log.warn(warnMsg);

From d23a3374e19f0270176e0d62b90a9ab198168f9d Mon Sep 17 00:00:00 2001
From: Mark Robert Miller <markrmiller@apache.org>
Date: Thu, 6 Mar 2014 02:29:43 +0000
Subject: [PATCH 12/38] SOLR-5811: Improve logged message.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574753 13f79535-47bb-0310-9956-ffa450edef68
---
 solr/core/src/java/org/apache/solr/cloud/Overseer.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
index 1b4f38c2c8e..0577d48beab 100644
--- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
@@ -131,7 +131,7 @@ public class Overseer {
                     // ZooKeeper in which case another Overseer should take over
                     // TODO: if ordering for the message is not important, we could
                     // track retries and put it back on the end of the queue
-                    log.error("Could not process Overseer message", e);
+                    log.error("Overseer could not process the current clusterstate state update message, skipping the message.", e);
                   }
                   zkClient.setData(ZkStateReader.CLUSTER_STATE,
                       ZkStateReader.toJSON(clusterState), true);
@@ -206,7 +206,7 @@ public class Overseer {
                 // ZooKeeper in which case another Overseer should take over
                 // TODO: if ordering for the message is not important, we could
                 // track retries and put it back on the end of the queue
-                log.error("Could not process Overseer message", e);
+                log.error("Overseer could not process the current clusterstate state update message, skipping the message.", e);
               }
               workQueue.offer(head.getBytes());
 

From a8012e34cbd13bece8caa26905cd216be2cc5def Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Thu, 6 Mar 2014 13:04:50 +0000
Subject: [PATCH 13/38] LUCENE-5493: create branch

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574866 13f79535-47bb-0310-9956-ffa450edef68

From c2fccd80e4a01990e051ebbc50817aae3ab835cd Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Thu, 6 Mar 2014 13:05:15 +0000
Subject: [PATCH 14/38] LUCENE-5493: commit current state

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574867 13f79535-47bb-0310-9956-ffa450edef68
---
 .../index/sorter/NumericDocValuesSorter.java  | 81 ----------------
 .../lucene/index/sorter/SortSorter.java       | 94 +++++++++++++++++++
 .../lucene/index/sorter/IndexSortingTest.java |  8 +-
 .../index/sorter/TestEarlyTermination.java    |  5 +-
 .../index/sorter/TestSortingMergePolicy.java  |  4 +-
 .../analyzing/AnalyzingInfixSuggester.java    | 33 +------
 6 files changed, 107 insertions(+), 118 deletions(-)
 delete mode 100644 lucene/misc/src/java/org/apache/lucene/index/sorter/NumericDocValuesSorter.java
 create mode 100644 lucene/misc/src/java/org/apache/lucene/index/sorter/SortSorter.java

diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/NumericDocValuesSorter.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/NumericDocValuesSorter.java
deleted file mode 100644
index 983fc67ac7f..00000000000
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/NumericDocValuesSorter.java
+++ /dev/null
@@ -1,81 +0,0 @@
-package org.apache.lucene.index.sorter;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.index.AtomicReader;
-import org.apache.lucene.index.NumericDocValues;
-
-/**
- * A {@link Sorter} which sorts documents according to their
- * {@link NumericDocValues}. One can specify ascending or descending sort order.
- * 
- * @lucene.experimental
- */
-public class NumericDocValuesSorter extends Sorter {
-
-  private final String fieldName;
-  private final boolean ascending;
-  
-  /** Constructor over the given field name, and ascending sort order. */
-  public NumericDocValuesSorter(final String fieldName) {
-    this(fieldName, true);
-  }
-  
-  /**
-   * Constructor over the given field name, and whether sorting should be
-   * ascending ({@code true}) or descending ({@code false}).
-   */
-  public NumericDocValuesSorter(final String fieldName, boolean ascending) {
-    this.fieldName = fieldName;
-    this.ascending = ascending;
-  }
-
-  @Override
-  public Sorter.DocMap sort(final AtomicReader reader) throws IOException {
-    final NumericDocValues ndv = reader.getNumericDocValues(fieldName);
-    final DocComparator comparator;
-    if (ascending) {
-      comparator = new DocComparator() {
-        @Override
-        public int compare(int docID1, int docID2) {
-          final long v1 = ndv.get(docID1);
-          final long v2 = ndv.get(docID2);
-          return v1 < v2 ? -1 : v1 == v2 ? 0 : 1;
-        }
-      };
-    } else {
-      comparator = new DocComparator() {
-        @Override
-        public int compare(int docID1, int docID2) {
-          final long v1 = ndv.get(docID1);
-          final long v2 = ndv.get(docID2);
-          return v1 > v2 ? -1 : v1 == v2 ? 0 : 1;
-        }
-      };
-    }
-    return sort(reader.maxDoc(), comparator);
-  }
-  
-  @Override
-  public String getID() {
-    return "DocValues(" + fieldName + "," + (ascending ? "ascending" : "descending") + ")";
-  }
-  
-}
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortSorter.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortSorter.java
new file mode 100644
index 00000000000..bb5020c4292
--- /dev/null
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortSorter.java
@@ -0,0 +1,94 @@
+package org.apache.lucene.index.sorter;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.search.FieldComparator;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+
+// nocommit: temporary class to engage the cutover!
+public class SortSorter extends Sorter {
+  final Sort sort;
+  
+  public SortSorter(Sort sort) {
+    this.sort = sort;
+  }
+
+  @Override
+  public DocMap sort(AtomicReader reader) throws IOException {
+    SortField fields[] = sort.getSort();
+    final int reverseMul[] = new int[fields.length];
+    final FieldComparator<?> comparators[] = new FieldComparator[fields.length];
+    
+    for (int i = 0; i < fields.length; i++) {
+      reverseMul[i] = fields[i].getReverse() ? -1 : 1;
+      comparators[i] = fields[i].getComparator(2, i);
+      comparators[i].setNextReader(reader.getContext());
+      comparators[i].setScorer(FAKESCORER);
+    }
+    final DocComparator comparator = new DocComparator() {
+      @Override
+      public int compare(int docID1, int docID2) {
+        try {
+          for (int i = 0; i < comparators.length; i++) {
+            comparators[i].copy(0, docID1);
+            comparators[i].copy(1, docID2);
+            int comp = reverseMul[i] * comparators[i].compare(0, 1);
+            if (comp != 0) {
+              return comp;
+            }
+          }
+          return Integer.compare(docID1, docID2); // docid order tiebreak
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+    };
+    return sort(reader.maxDoc(), comparator);
+  }
+
+  @Override
+  public String getID() {
+    return sort.toString();
+  }
+  
+  static final Scorer FAKESCORER = new Scorer(null) {
+    
+    @Override
+    public float score() throws IOException { throw new UnsupportedOperationException(); }
+    
+    @Override
+    public int freq() throws IOException { throw new UnsupportedOperationException(); }
+
+    @Override
+    public int docID() { throw new UnsupportedOperationException(); }
+
+    @Override
+    public int nextDoc() throws IOException { throw new UnsupportedOperationException(); }
+
+    @Override
+    public int advance(int target) throws IOException { throw new UnsupportedOperationException(); }
+
+    @Override
+    public long cost() { throw new UnsupportedOperationException(); }
+  };
+}
diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/IndexSortingTest.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/IndexSortingTest.java
index 5b28375f3d4..0674bbd5135 100644
--- a/lucene/misc/src/test/org/apache/lucene/index/sorter/IndexSortingTest.java
+++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/IndexSortingTest.java
@@ -24,6 +24,8 @@ import java.util.List;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.SlowCompositeReaderWrapper;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.TestUtil;
@@ -32,7 +34,7 @@ import org.junit.BeforeClass;
 public class IndexSortingTest extends SorterTestBase {
   
   private static final Sorter[] SORTERS = new Sorter[] {
-    new NumericDocValuesSorter(NUMERIC_DV_FIELD, true),
+    new SortSorter(new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG))),
     Sorter.REVERSE_DOCS,
   };
   
@@ -52,8 +54,8 @@ public class IndexSortingTest extends SorterTestBase {
       Collections.reverse(values);
     } else {
       Collections.sort(values);
-      if (sorter instanceof NumericDocValuesSorter && random().nextBoolean()) {
-        sorter = new NumericDocValuesSorter(NUMERIC_DV_FIELD, false); // descending
+      if (sorter instanceof SortSorter && random().nextBoolean()) {
+        sorter = new SortSorter(new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG, true))); // descending
         Collections.reverse(values);
       }
     }
diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestEarlyTermination.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestEarlyTermination.java
index a5bc3b23d4e..fa82a303bda 100644
--- a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestEarlyTermination.java
+++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestEarlyTermination.java
@@ -58,7 +58,7 @@ public class TestEarlyTermination extends LuceneTestCase {
   @Override
   public void setUp() throws Exception {
     super.setUp();
-    sorter = new NumericDocValuesSorter("ndv1");
+    sorter = new SortSorter(new Sort(new SortField("ndv1", SortField.Type.LONG)));
   }
 
   private Document randomDocument() {
@@ -144,7 +144,8 @@ public class TestEarlyTermination extends LuceneTestCase {
     for (int i = 0; i < iters; ++i) {
       final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
       searcher.search(query, collector1);
-      searcher.search(query, new EarlyTerminatingSortingCollector(collector2, new NumericDocValuesSorter("ndv2"), numHits) {
+      Sorter different = new SortSorter(new Sort(new SortField("ndv2", SortField.Type.LONG)));
+      searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits) {
         @Override
         public void setNextReader(AtomicReaderContext context) throws IOException {
           super.setNextReader(context);
diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java
index 93be226e1d5..bba5cb58d93 100644
--- a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java
+++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java
@@ -40,6 +40,8 @@ import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.SlowCompositeReaderWrapper;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TieredMergePolicy;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
@@ -57,7 +59,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
   @Override
   public void setUp() throws Exception {
     super.setUp();
-    sorter = new NumericDocValuesSorter("ndv");
+    sorter = new SortSorter(new Sort(new SortField("ndv", SortField.Type.LONG)));
     createRandomIndexes();
   }
 
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
index 354d1cfb1e8..56f88b4d1bf 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
@@ -50,7 +50,6 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.MultiDocValues;
-import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SegmentReader;
 import org.apache.lucene.index.SlowCompositeReaderWrapper;
 import org.apache.lucene.index.Term;
@@ -58,6 +57,7 @@ import org.apache.lucene.index.sorter.EarlyTerminatingSortingCollector;
 import org.apache.lucene.index.sorter.Sorter;
 import org.apache.lucene.index.sorter.SortingAtomicReader;
 import org.apache.lucene.index.sorter.SortingMergePolicy;
+import org.apache.lucene.index.sorter.SortSorter;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Collector;
@@ -360,36 +360,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
   }
 
   private void initSorter() {
-    sorter = new Sorter() {
-
-        @Override
-        public Sorter.DocMap sort(AtomicReader reader) throws IOException {
-          final NumericDocValues weights = reader.getNumericDocValues("weight");
-          final Sorter.DocComparator comparator = new Sorter.DocComparator() {
-              @Override
-              public int compare(int docID1, int docID2) {
-                final long v1 = weights.get(docID1);
-                final long v2 = weights.get(docID2);
-                // Reverse sort (highest weight first);
-                // java7 only:
-                //return Long.compare(v2, v1);
-                if (v1 > v2) {
-                  return -1;
-                } else if (v1 < v2) {
-                  return 1;
-                } else {
-                  return 0;
-                }
-              }
-            };
-          return Sorter.sort(reader.maxDoc(), comparator);
-        }
-
-        @Override
-        public String getID() {
-          return "BySuggestWeight";
-        }
-      };
+    sorter = new SortSorter(new Sort(new SortField("weight", SortField.Type.LONG, true)));
   }
 
   /**

From cdf254415b0494b23d468235333aacbff0243284 Mon Sep 17 00:00:00 2001
From: Mark Robert Miller <markrmiller@apache.org>
Date: Thu, 6 Mar 2014 14:06:35 +0000
Subject: [PATCH 15/38] SOLR-5820: OverseerCollectionProcessor#lookupReplicas
 has a timeout that is too short and a bad error message on timeout.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574883 13f79535-47bb-0310-9956-ffa450edef68
---
 .../cloud/OverseerCollectionProcessor.java    | 45 ++++++++++---------
 .../CollectionsAPIDistributedZkTest.java      |  2 +-
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java
index ee806a5c856..bee9e1848bc 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java
@@ -1627,7 +1627,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
       if (!created)
         throw new SolrException(ErrorCode.SERVER_ERROR, "Could not fully createcollection: " + message.getStr("name"));
 
-      log.info("going to create cores replicas shardNames {} , repFactor : {}", shardNames, repFactor);
+      log.info("Creating SolrCores for new collection, shardNames {} , replicationFactor : {}", shardNames, repFactor);
       Map<String ,ShardRequest> coresToCreate = new LinkedHashMap<String, ShardRequest>();
       for (int i = 1; i <= shardNames.size(); i++) {
         String sliceName = shardNames.get(i-1);
@@ -1671,14 +1671,17 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
           sreq.actualShards = sreq.shards;
           sreq.params = params;
 
-          if(isLegacyCloud) shardHandler.submit(sreq, sreq.shards[0], sreq.params);
-          else coresToCreate.put(coreName, sreq);
+          if(isLegacyCloud) {
+            shardHandler.submit(sreq, sreq.shards[0], sreq.params);
+          } else {
+            coresToCreate.put(coreName, sreq);
+          }
         }
       }
 
       if(!isLegacyCloud) {
-        //wait for all replica entries to be created
-        Map<String, Replica> replicas = lookupReplicas(collectionName, coresToCreate.keySet());
+        // wait for all replica entries to be created
+        Map<String, Replica> replicas = waitToSeeReplicasInState(collectionName, coresToCreate.keySet());
         for (Map.Entry<String, ShardRequest> e : coresToCreate.entrySet()) {
           ShardRequest sreq = e.getValue();
           sreq.params.set(CoreAdminParams.CORE_NODE_NAME, replicas.get(e.getKey()).getName());
@@ -1704,37 +1707,35 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
     }
   }
 
-  private Map<String, Replica> lookupReplicas(String collectionName, Collection<String> coreNames) throws InterruptedException {
+  private Map<String, Replica> waitToSeeReplicasInState(String collectionName, Collection<String> coreNames) throws InterruptedException {
     Map<String, Replica> result = new HashMap<String, Replica>();
-    long endTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(3, TimeUnit.SECONDS);
-    for(;;) {
-      DocCollection coll = zkStateReader.getClusterState().getCollection(collectionName);
-      for (String  coreName : coreNames) {
-        if(result.containsKey(coreName)) continue;
+    long endTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(30, TimeUnit.SECONDS);
+    while (true) {
+      DocCollection coll = zkStateReader.getClusterState().getCollection(
+          collectionName);
+      for (String coreName : coreNames) {
+        if (result.containsKey(coreName)) continue;
         for (Slice slice : coll.getSlices()) {
           for (Replica replica : slice.getReplicas()) {
-            if(coreName.equals(replica.getStr(ZkStateReader.CORE_NAME_PROP))) {
-              result.put(coreName,replica);
+            if (coreName.equals(replica.getStr(ZkStateReader.CORE_NAME_PROP))) {
+              result.put(coreName, replica);
               break;
             }
           }
         }
       }
-
-      if(result.size() == coreNames.size()) {
+      
+      if (result.size() == coreNames.size()) {
         return result;
       }
-      if( System.nanoTime() > endTime) {
-        //time up . throw exception and go out
-        throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to create replica entries in ZK");
+      if (System.nanoTime() > endTime) {
+        throw new SolrException(ErrorCode.SERVER_ERROR, "Timed out waiting to see all replicas in cluster state.");
       }
-
+      
       Thread.sleep(100);
     }
-
   }
 
-
   private void addReplica(ClusterState clusterState, ZkNodeProps message, NamedList results) throws KeeperException, InterruptedException {
     String collection = message.getStr(COLLECTION_PROP);
     String node = message.getStr("node");
@@ -1789,7 +1790,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
           ZkStateReader.STATE_PROP, ZkStateReader.DOWN,
           ZkStateReader.BASE_URL_PROP,zkStateReader.getBaseUrlForNodeName(node));
       Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(props));
-      params.set(CoreAdminParams.CORE_NODE_NAME, lookupReplicas(collection, Collections.singletonList(coreName)).get(coreName).getName());
+      params.set(CoreAdminParams.CORE_NODE_NAME, waitToSeeReplicasInState(collection, Collections.singletonList(coreName)).get(coreName).getName());
     }
 
 
diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java
index e257b5e66ca..5a4c45a04a3 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java
@@ -665,7 +665,7 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
     boolean disableLegacy = random().nextBoolean();
     CloudSolrServer client1 = null;
 
-    if(disableLegacy) {
+    if (disableLegacy) {
       log.info("legacyCloud=false");
       client1 = createCloudClient(null);
       setClusterProp(client1, ZkStateReader.LEGACY_CLOUD, "false");

From 28a2d1dc60291a39b3ae1dc43a4a83783f2db572 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Thu, 6 Mar 2014 15:08:01 +0000
Subject: [PATCH 16/38] LUCENE-5493: make BlockJoinSorter a ComparatorSource
 taking parent/child Sort

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574909 13f79535-47bb-0310-9956-ffa450edef68
---
 .../sorter/BlockJoinComparatorSource.java     | 207 ++++++++++++++++++
 .../lucene/index/sorter/BlockJoinSorter.java  |  88 --------
 .../index/sorter/TestBlockJoinSorter.java     |  43 +---
 3 files changed, 213 insertions(+), 125 deletions(-)
 create mode 100644 lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
 delete mode 100644 lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinSorter.java

diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
new file mode 100644
index 00000000000..ab29e74a428
--- /dev/null
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
@@ -0,0 +1,207 @@
+package org.apache.lucene.index.sorter;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.FieldComparator;
+import org.apache.lucene.search.FieldComparatorSource;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.util.FixedBitSet;
+
+/**
+ * Helper class to sort readers that contain blocks of documents.
+ */
+public class BlockJoinComparatorSource extends FieldComparatorSource {
+  final Filter parentsFilter;
+  final Sort parentSort;
+  final Sort childSort;
+  
+  /** 
+   * Create a new BlockJoinComparatorSource, sorting only blocks of documents
+   * with {@code parentSort} and not reordering children with a block.
+   * 
+   * @param parentsFilter Filter identifying parent documents
+   * @param parentSort Sort for parent documents
+   */
+  public BlockJoinComparatorSource(Filter parentsFilter, Sort parentSort) {
+    this(parentsFilter, parentSort, new Sort(SortField.FIELD_DOC));
+  }
+  
+  /** 
+   * Create a new BlockJoinComparatorSource, specifying the sort order for both
+   * blocks of documents and children within a block.
+   * 
+   * @param parentsFilter Filter identifying parent documents
+   * @param parentSort Sort for parent documents
+   * @param childSort Sort for child documents in the same block
+   */
+  public BlockJoinComparatorSource(Filter parentsFilter, Sort parentSort, Sort childSort) {
+    this.parentsFilter = parentsFilter;
+    this.parentSort = parentSort;
+    this.childSort = childSort;
+  }
+
+  @Override
+  public FieldComparator<Integer> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
+    // we keep parallel slots: the parent ids and the child ids
+    final int parentSlots[] = new int[numHits];
+    final int childSlots[] = new int[numHits];
+    
+    SortField parentFields[] = parentSort.getSort();
+    final int parentReverseMul[] = new int[parentFields.length];
+    final FieldComparator<?> parentComparators[] = new FieldComparator[parentFields.length];
+    for (int i = 0; i < parentFields.length; i++) {
+      parentReverseMul[i] = parentFields[i].getReverse() ? -1 : 1;
+      parentComparators[i] = parentFields[i].getComparator(2, i);
+    }
+    
+    SortField childFields[] = childSort.getSort();
+    final int childReverseMul[] = new int[childFields.length];
+    final FieldComparator<?> childComparators[] = new FieldComparator[childFields.length];
+    for (int i = 0; i < childFields.length; i++) {
+      childReverseMul[i] = childFields[i].getReverse() ? -1 : 1;
+      childComparators[i] = childFields[i].getComparator(2, i);
+    }
+        
+    // NOTE: not quite right i guess, really our sort "value" is more complex...
+    // but at the moment you really should only use this at indexing time.
+    return new FieldComparator<Integer>() {
+      int bottomParent;
+      int bottomChild;
+      FixedBitSet parentBits;
+      
+      @Override
+      public int compare(int slot1, int slot2) {
+        try {
+          return compare(childSlots[slot1], parentSlots[slot1], childSlots[slot2], parentSlots[slot2]);
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+
+      @Override
+      public void setBottom(int slot) {
+        bottomParent = parentSlots[slot];
+        bottomChild = childSlots[slot];
+      }
+
+      @Override
+      public void setTopValue(Integer value) {
+        // we dont have enough information (the docid is needed)
+        throw new UnsupportedOperationException("this comparator cannot be used with deep paging");
+      }
+
+      @Override
+      public int compareBottom(int doc) throws IOException {
+        return compare(bottomChild, bottomParent, doc, parent(doc));
+      }
+
+      @Override
+      public int compareTop(int doc) throws IOException {
+        // we dont have enough information (the docid is needed)
+        throw new UnsupportedOperationException("this comparator cannot be used with deep paging");
+      }
+
+      @Override
+      public void copy(int slot, int doc) throws IOException {
+        childSlots[slot] = doc;
+        parentSlots[slot] = parent(doc);
+      }
+
+      @Override
+      public FieldComparator<Integer> setNextReader(AtomicReaderContext context) throws IOException {
+        final DocIdSet parents = parentsFilter.getDocIdSet(context, null);
+        if (parents == null) {
+          throw new IllegalStateException("AtomicReader " + context.reader() + " contains no parents!");
+        }
+        if (!(parents instanceof FixedBitSet)) {
+          throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
+        }
+        parentBits = (FixedBitSet) parents;
+        for (int i = 0; i < parentComparators.length; i++) {
+          parentComparators[i] = parentComparators[i].setNextReader(context);
+        }
+        for (int i = 0; i < childComparators.length; i++) {
+          childComparators[i] = childComparators[i].setNextReader(context);
+        }
+        return this;
+      }
+
+      @Override
+      public Integer value(int slot) {
+        // really our sort "value" is more complex...
+        throw new UnsupportedOperationException();
+      }
+      
+      @Override
+      public void setScorer(Scorer scorer) {
+        super.setScorer(scorer);
+        for (FieldComparator<?> comp : parentComparators) {
+          comp.setScorer(scorer);
+        }
+        for (FieldComparator<?> comp : childComparators) {
+          comp.setScorer(scorer);
+        }
+      }
+
+      int parent(int doc) {
+        return parentBits.nextSetBit(doc);
+      }
+      
+      int compare(int docID1, int parent1, int docID2, int parent2) throws IOException {
+        if (parent1 == parent2) { // both are in the same block
+          // nocommit: should not be needed?
+          if (docID1 == parent1 || docID2 == parent2) {
+            // keep parents at the end of blocks
+            return docID1 - docID2;
+          } else {
+            return compare(docID1, docID2, childComparators, childReverseMul);
+          }
+        } else {
+          int cmp = compare(parent1, parent2, parentComparators, parentReverseMul);
+          // nocommit: should not be needed?
+          if (cmp == 0) {
+            return parent1 - parent2;
+          } else {
+            return cmp;
+          }
+        }
+      }
+      
+      int compare(int docID1, int docID2, FieldComparator<?> comparators[], int reverseMul[]) throws IOException {
+        for (int i = 0; i < comparators.length; i++) {
+          comparators[i].copy(0, docID1);
+          comparators[i].copy(1, docID2);
+          int comp = reverseMul[i] * comparators[i].compare(0, 1);
+          if (comp != 0) {
+            return comp;
+          }
+        }
+        return 0; // no need to docid tiebreak
+      }
+    };
+  }
+  
+  
+}
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinSorter.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinSorter.java
deleted file mode 100644
index 5b58c15122a..00000000000
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinSorter.java
+++ /dev/null
@@ -1,88 +0,0 @@
-package org.apache.lucene.index.sorter;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.index.AtomicReader;
-import org.apache.lucene.search.DocIdSet;
-import org.apache.lucene.search.Filter;
-import org.apache.lucene.util.FixedBitSet;
-
-/**
- * Helper class to sort readers that contain blocks of documents.
- */
-public abstract class BlockJoinSorter extends Sorter {
-
-  protected final Filter parentsFilter;
-
-  /** Sole constructor. */
-  public BlockJoinSorter(Filter parentsFilter) {
-    this.parentsFilter = parentsFilter;
-  }
-
-  /** Return a {@link Sorter.DocComparator} instance that will be called on
-   *  parent doc IDs. */
-  protected abstract DocComparator getParentComparator(AtomicReader reader);
-
-  /** Return a {@link Sorter.DocComparator} instance that will be called on
-   *  children of the same parent. By default, children of the same parent are
-   *  not reordered. */
-  protected DocComparator getChildComparator(AtomicReader reader) {
-    return INDEX_ORDER_COMPARATOR;
-  }
-
-  @Override
-  public final DocMap sort(AtomicReader reader) throws IOException {
-    final DocIdSet parents = parentsFilter.getDocIdSet(reader.getContext(), null);
-    if (parents == null) {
-      throw new IllegalStateException("AtomicReader " + reader + " contains no parents!");
-    }
-    if (!(parents instanceof FixedBitSet)) {
-      throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
-    }
-    final FixedBitSet parentBits = (FixedBitSet) parents;
-    final DocComparator parentComparator = getParentComparator(reader);
-    final DocComparator childComparator = getChildComparator(reader);
-    final DocComparator comparator = new DocComparator() {
-
-      @Override
-      public int compare(int docID1, int docID2) {
-        final int parent1 = parentBits.nextSetBit(docID1);
-        final int parent2 = parentBits.nextSetBit(docID2);
-        if (parent1 == parent2) { // both are in the same block
-          if (docID1 == parent1 || docID2 == parent2) {
-            // keep parents at the end of blocks
-            return docID1 - docID2;
-          } else {
-            return childComparator.compare(docID1, docID2);
-          }
-        } else {
-          int cmp = parentComparator.compare(parent1, parent2);
-          if (cmp == 0) {
-            cmp = parent1 - parent2;
-          }
-          return cmp;
-        }
-      }
-
-    };
-    return sort(reader.maxDoc(), comparator);
-  }
-
-}
diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestBlockJoinSorter.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestBlockJoinSorter.java
index 1f0214352ab..79b24968aa3 100644
--- a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestBlockJoinSorter.java
+++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestBlockJoinSorter.java
@@ -37,6 +37,8 @@ import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.QueryWrapperFilter;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.FixedBitSet;
@@ -89,47 +91,14 @@ public class TestBlockJoinSorter extends LuceneTestCase {
     final AtomicReader reader = getOnlySegmentReader(indexReader);
     final Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true"))));
     final FixedBitSet parentBits = (FixedBitSet) parentsFilter.getDocIdSet(reader.getContext(), null);
-
     final NumericDocValues parentValues = reader.getNumericDocValues("parent_val");
-    final Sorter.DocComparator parentComparator = new Sorter.DocComparator() {
-      @Override
-      public int compare(int docID1, int docID2) {
-        assertTrue(parentBits.get(docID1));
-        assertTrue(parentBits.get(docID2));
-        return Long.compare(parentValues.get(docID1), parentValues.get(docID2));
-      }
-    };
-
     final NumericDocValues childValues = reader.getNumericDocValues("child_val");
-    final Sorter.DocComparator childComparator = new Sorter.DocComparator() {
-      @Override
-      public int compare(int docID1, int docID2) {
-        assertFalse(parentBits.get(docID1));
-        assertFalse(parentBits.get(docID2));
-        return Long.compare(childValues.get(docID1), childValues.get(docID2));
-      }
-    };
 
-    final Sorter sorter = new BlockJoinSorter(parentsFilter) {
-      
-      @Override
-      public String getID() {
-        return "Dummy";
-      }
-      
-      @Override
-      protected DocComparator getParentComparator(AtomicReader r) {
-        assertEquals(reader, r);
-        return parentComparator;
-      }
+    final Sort parentSort = new Sort(new SortField("parent_val", SortField.Type.LONG));
+    final Sort childSort = new Sort(new SortField("child_val", SortField.Type.LONG));
 
-      @Override
-      protected DocComparator getChildComparator(AtomicReader r) {
-        assertEquals(reader, r);
-        return childComparator;
-      }
-
-    };
+    final Sort sort = new Sort(new SortField("custom", new BlockJoinComparatorSource(parentsFilter, parentSort, childSort)));
+    final Sorter sorter = new SortSorter(sort);
     final Sorter.DocMap docMap = sorter.sort(reader);
     assertEquals(reader.maxDoc(), docMap.size());
 

From 692f577d435b4a7deabc2ec3af193e3969e99dba Mon Sep 17 00:00:00 2001
From: Michael McCandless <mikemccand@apache.org>
Date: Thu, 6 Mar 2014 15:25:28 +0000
Subject: [PATCH 17/38] fix test failure: limit max token length

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574917 13f79535-47bb-0310-9956-ffa450edef68
---
 .../lucene/index/ThreadedIndexingAndSearchingTestCase.java    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/ThreadedIndexingAndSearchingTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/ThreadedIndexingAndSearchingTestCase.java
index 739d45a631c..bbf00a544f6 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/ThreadedIndexingAndSearchingTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/ThreadedIndexingAndSearchingTestCase.java
@@ -440,8 +440,10 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas
     if (dir instanceof BaseDirectoryWrapper) {
       ((BaseDirectoryWrapper) dir).setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves.
     }
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
     final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, 
-        new MockAnalyzer(random())).setInfoStream(new FailOnNonBulkMergesInfoStream());
+        analyzer).setInfoStream(new FailOnNonBulkMergesInfoStream());
 
     if (LuceneTestCase.TEST_NIGHTLY) {
       // newIWConfig makes smallish max seg size, which

From 38eb9cc96c80e93092ce617dc60464be9911badd Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Thu, 6 Mar 2014 15:25:40 +0000
Subject: [PATCH 18/38] LUCENE-5493: hide Sorter, SortSorter, fix tests, change
 suggest to use public Sort API, cut over collector to take Sort

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574918 13f79535-47bb-0310-9956-ffa450edef68
---
 .../EarlyTerminatingSortingCollector.java     | 17 +++++++-------
 .../lucene/index/sorter/SortSorter.java       |  2 +-
 .../apache/lucene/index/sorter/Sorter.java    |  2 +-
 .../index/sorter/SortingAtomicReader.java     |  9 ++++----
 .../index/sorter/SortingMergePolicy.java      | 22 +++++++++++--------
 .../lucene/index/sorter/IndexSortingTest.java | 15 +++++++------
 .../index/sorter/SortingAtomicReaderTest.java | 13 +++--------
 .../index/sorter/TestEarlyTermination.java    | 10 ++++-----
 .../index/sorter/TestSortingMergePolicy.java  | 12 +++++-----
 .../analyzing/AnalyzingInfixSuggester.java    |  8 +++----
 10 files changed, 54 insertions(+), 56 deletions(-)

diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java
index 92f6f10f13f..155b5c93568 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java
@@ -24,6 +24,7 @@ import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.search.CollectionTerminatedException;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TopDocsCollector;
 import org.apache.lucene.search.TotalHitCountCollector;
 
@@ -47,7 +48,7 @@ import org.apache.lucene.search.TotalHitCountCollector;
  * <p>
  * <b>NOTE</b>: This {@link Collector} uses {@link Sorter#getID()} to detect
  * whether a segment was sorted with the same {@link Sorter} as the one given in
- * {@link #EarlyTerminatingSortingCollector(Collector, Sorter, int)}. This has
+ * {@link #EarlyTerminatingSortingCollector(Collector, Sort, int)}. This has
  * two implications:
  * <ul>
  * <li>if {@link Sorter#getID()} is not implemented correctly and returns
@@ -61,10 +62,11 @@ import org.apache.lucene.search.TotalHitCountCollector;
  * 
  * @lucene.experimental
  */
+// nocommit: fix these javadocs to be about Sort
 public class EarlyTerminatingSortingCollector extends Collector {
 
   protected final Collector in;
-  protected final Sorter sorter;
+  protected final Sort sort;
   protected final int numDocsToCollect;
   
   protected int segmentTotalCollect;
@@ -77,20 +79,19 @@ public class EarlyTerminatingSortingCollector extends Collector {
    * 
    * @param in
    *          the collector to wrap
-   * @param sorter
-   *          the same sorter as the one which is used by {@link IndexWriter}'s
-   *          {@link SortingMergePolicy}
+   * @param sort
+   *          the sort you are sorting the search results on
    * @param numDocsToCollect
    *          the number of documents to collect on each segment. When wrapping
    *          a {@link TopDocsCollector}, this number should be the number of
    *          hits.
    */
-  public EarlyTerminatingSortingCollector(Collector in, Sorter sorter, int numDocsToCollect) {
+  public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect) {
     if (numDocsToCollect <= 0) {
       throw new IllegalStateException("numDocsToCollect must always be > 0, got " + segmentTotalCollect);
     }
     this.in = in;
-    this.sorter = sorter;
+    this.sort = sort;
     this.numDocsToCollect = numDocsToCollect;
   }
 
@@ -110,7 +111,7 @@ public class EarlyTerminatingSortingCollector extends Collector {
   @Override
   public void setNextReader(AtomicReaderContext context) throws IOException {
     in.setNextReader(context);
-    segmentSorted = SortingMergePolicy.isSorted(context.reader(), sorter);
+    segmentSorted = SortingMergePolicy.isSorted(context.reader(), sort);
     segmentTotalCollect = segmentSorted ? numDocsToCollect : Integer.MAX_VALUE;
     numCollected = 0;
   }
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortSorter.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortSorter.java
index bb5020c4292..5ec5cd8f025 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortSorter.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortSorter.java
@@ -26,7 +26,7 @@ import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 
 // nocommit: temporary class to engage the cutover!
-public class SortSorter extends Sorter {
+class SortSorter extends Sorter {
   final Sort sort;
   
   public SortSorter(Sort sort) {
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
index f09104f6938..73baf09b415 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
@@ -34,7 +34,7 @@ import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
  * when documents are directly comparable by their field values.
  * @lucene.experimental
  */
-public abstract class Sorter {
+abstract class Sorter {
 
   /** A comparator that keeps documents in index order. */
   public static final DocComparator INDEX_ORDER_COMPARATOR = new DocComparator() {
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java
index 0b3f201efac..3adabd2640a 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java
@@ -35,6 +35,7 @@ import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Sort;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.RAMFile;
@@ -710,12 +711,12 @@ public class SortingAtomicReader extends FilterAtomicReader {
   /** Return a sorted view of <code>reader</code> according to the order
    *  defined by <code>sorter</code>. If the reader is already sorted, this
    *  method might return the reader as-is. */
-  public static AtomicReader wrap(AtomicReader reader, Sorter sorter) throws IOException {
-    return wrap(reader, sorter.sort(reader));
+  public static AtomicReader wrap(AtomicReader reader, Sort sort) throws IOException {
+    return wrap(reader, new SortSorter(sort).sort(reader));
   }
 
-  /** Expert: same as {@link #wrap(AtomicReader, Sorter)} but operates directly on a {@link Sorter.DocMap}. */
-  public static AtomicReader wrap(AtomicReader reader, Sorter.DocMap docMap) {
+  /** Expert: same as {@link #wrap(AtomicReader, Sort)} but operates directly on a {@link Sorter.DocMap}. */
+  static AtomicReader wrap(AtomicReader reader, Sorter.DocMap docMap) {
     if (docMap == null) {
       // the reader is already sorter
       return reader;
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
index a7ce655d997..afdcd7ef6c2 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
@@ -33,11 +33,12 @@ import org.apache.lucene.index.SegmentCommitInfo;
 import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.index.SegmentReader;
 import org.apache.lucene.index.SlowCompositeReaderWrapper;
+import org.apache.lucene.search.Sort;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
 
-/** A {@link MergePolicy} that reorders documents according to a {@link Sorter}
+/** A {@link MergePolicy} that reorders documents according to a {@link Sort}
  *  before merging them. As a consequence, all segments resulting from a merge
  *  will be sorted while segments resulting from a flush will be in the order
  *  in which documents have been added.
@@ -45,11 +46,12 @@ import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
  *  {@link IndexWriter#addDocuments(Iterable, org.apache.lucene.analysis.Analyzer)}
  *  to have sequentially-assigned doc IDs, this policy will scatter doc IDs.
  *  <p><b>NOTE</b>: This {@link MergePolicy} should only be used with idempotent
- *  {@link Sorter}s so that the order of segments is predictable. For example,
+ *  {@link Sort}s so that the order of segments is predictable. For example,
  *  using {@link SortingMergePolicy} with {@link Sorter#REVERSE_DOCS} (which is
  *  not idempotent) will make the order of documents in a segment depend on the
  *  number of times the segment has been merged.
  *  @lucene.experimental */
+// nocommit: fix these jdocs around idempotency
 public final class SortingMergePolicy extends MergePolicy {
 
   /**
@@ -147,12 +149,12 @@ public final class SortingMergePolicy extends MergePolicy {
 
   }
 
-  /** Returns true if the given reader is sorted by the given sorter. */
-  public static boolean isSorted(AtomicReader reader, Sorter sorter) {
+  /** Returns true if the given reader is sorted by the given sort. */
+  public static boolean isSorted(AtomicReader reader, Sort sort) {
     if (reader instanceof SegmentReader) {
       final SegmentReader segReader = (SegmentReader) reader;
       final Map<String, String> diagnostics = segReader.getSegmentInfo().info.getDiagnostics();
-      if (diagnostics != null && sorter.getID().equals(diagnostics.get(SORTER_ID_PROP))) {
+      if (diagnostics != null && sort.toString().equals(diagnostics.get(SORTER_ID_PROP))) {
         return true;
       }
     }
@@ -172,11 +174,13 @@ public final class SortingMergePolicy extends MergePolicy {
 
   final MergePolicy in;
   final Sorter sorter;
+  final Sort sort;
 
-  /** Create a new {@link MergePolicy} that sorts documents with <code>sorter</code>. */
-  public SortingMergePolicy(MergePolicy in, Sorter sorter) {
+  /** Create a new {@link MergePolicy} that sorts documents with <code>sort</code>. */
+  public SortingMergePolicy(MergePolicy in, Sort sort) {
     this.in = in;
-    this.sorter = sorter;
+    this.sorter = new SortSorter(sort);
+    this.sort = sort;
   }
 
   @Override
@@ -200,7 +204,7 @@ public final class SortingMergePolicy extends MergePolicy {
 
   @Override
   public MergePolicy clone() {
-    return new SortingMergePolicy(in.clone(), sorter);
+    return new SortingMergePolicy(in.clone(), sort);
   }
 
   @Override
diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/IndexSortingTest.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/IndexSortingTest.java
index 0674bbd5135..16d4adf617b 100644
--- a/lucene/misc/src/test/org/apache/lucene/index/sorter/IndexSortingTest.java
+++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/IndexSortingTest.java
@@ -33,9 +33,9 @@ import org.junit.BeforeClass;
 
 public class IndexSortingTest extends SorterTestBase {
   
-  private static final Sorter[] SORTERS = new Sorter[] {
-    new SortSorter(new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG))),
-    Sorter.REVERSE_DOCS,
+  private static final Sort[] SORT = new Sort[] {
+    new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG)),
+    new Sort(new SortField(null, SortField.Type.DOC, true))
   };
   
   @BeforeClass
@@ -49,13 +49,14 @@ public class IndexSortingTest extends SorterTestBase {
         values.add(Integer.valueOf(reader.document(i).get(ID_FIELD)));
       }
     }
-    Sorter sorter = SORTERS[random().nextInt(SORTERS.length)];
-    if (sorter == Sorter.REVERSE_DOCS) {
+    int idx = random().nextInt(SORT.length);
+    Sort sorter = SORT[idx];
+    if (idx == 1) { // reverse doc sort
       Collections.reverse(values);
     } else {
       Collections.sort(values);
-      if (sorter instanceof SortSorter && random().nextBoolean()) {
-        sorter = new SortSorter(new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG, true))); // descending
+      if (random().nextBoolean()) {
+        sorter = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG, true)); // descending
         Collections.reverse(values);
       }
     }
diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java
index 63876c7c50d..89bb9223db6 100644
--- a/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java
+++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.Arrays;
 
 import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.TestUtil;
@@ -57,16 +58,8 @@ public class SortingAtomicReaderTest extends SorterTestBase {
       System.out.println("sortedValues: " + Arrays.toString(sortedValues));
     }
     
-    reader = SortingAtomicReader.wrap(reader, new Sorter() {
-      @Override
-      public Sorter.DocMap sort(AtomicReader reader) throws IOException {
-        return docMap;
-      }
-      @Override
-      public String getID() {
-        return ID_FIELD;
-      }
-    });
+    // TODO: what is this doing? like a no-op sort?
+    reader = SortingAtomicReader.wrap(reader, docMap);
     
     if (VERBOSE) {
       System.out.print("mapped-deleted-docs: ");
diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestEarlyTermination.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestEarlyTermination.java
index fa82a303bda..9601dc9c0e1 100644
--- a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestEarlyTermination.java
+++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestEarlyTermination.java
@@ -51,14 +51,14 @@ public class TestEarlyTermination extends LuceneTestCase {
   private int numDocs;
   private List<String> terms;
   private Directory dir;
-  private Sorter sorter;
+  private Sort sort;
   private RandomIndexWriter iw;
   private IndexReader reader;
 
   @Override
   public void setUp() throws Exception {
     super.setUp();
-    sorter = new SortSorter(new Sort(new SortField("ndv1", SortField.Type.LONG)));
+    sort = new Sort(new SortField("ndv1", SortField.Type.LONG));
   }
 
   private Document randomDocument() {
@@ -80,7 +80,7 @@ public class TestEarlyTermination extends LuceneTestCase {
     terms = new ArrayList<String>(randomTerms);
     final long seed = random().nextLong();
     final IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
-    iwc.setMergePolicy(TestSortingMergePolicy.newSortingMergePolicy(sorter));
+    iwc.setMergePolicy(TestSortingMergePolicy.newSortingMergePolicy(sort));
     iw = new RandomIndexWriter(new Random(seed), dir, iwc);
     for (int i = 0; i < numDocs; ++i) {
       final Document doc = randomDocument();
@@ -120,7 +120,7 @@ public class TestEarlyTermination extends LuceneTestCase {
     for (int i = 0; i < iters; ++i) {
       final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
       searcher.search(query, collector1);
-      searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sorter, numHits));
+      searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits));
     }
     assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
     assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
@@ -144,7 +144,7 @@ public class TestEarlyTermination extends LuceneTestCase {
     for (int i = 0; i < iters; ++i) {
       final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
       searcher.search(query, collector1);
-      Sorter different = new SortSorter(new Sort(new SortField("ndv2", SortField.Type.LONG)));
+      Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG));
       searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits) {
         @Override
         public void setNextReader(AtomicReaderContext context) throws IOException {
diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java
index bba5cb58d93..47fb654d3ef 100644
--- a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java
+++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java
@@ -52,14 +52,14 @@ public class TestSortingMergePolicy extends LuceneTestCase {
 
   private List<String> terms;
   private Directory dir1, dir2;
-  private Sorter sorter;
+  private Sort sort;
   private IndexReader reader;
   private IndexReader sortedReader;
 
   @Override
   public void setUp() throws Exception {
     super.setUp();
-    sorter = new SortSorter(new Sort(new SortField("ndv", SortField.Type.LONG)));
+    sort = new Sort(new SortField("ndv", SortField.Type.LONG));
     createRandomIndexes();
   }
 
@@ -70,7 +70,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
     return doc;
   }
 
-  static MergePolicy newSortingMergePolicy(Sorter sorter) {
+  static MergePolicy newSortingMergePolicy(Sort sort) {
     // create a MP with a low merge factor so that many merges happen
     MergePolicy mp;
     if (random().nextBoolean()) {
@@ -85,7 +85,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
       mp = lmp;
     }
     // wrap it with a sorting mp
-    return new SortingMergePolicy(mp, sorter);
+    return new SortingMergePolicy(mp, sort);
   }
 
   private void createRandomIndexes() throws IOException {
@@ -101,7 +101,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
     final long seed = random().nextLong();
     final IndexWriterConfig iwc1 = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
     final IndexWriterConfig iwc2 = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
-    iwc2.setMergePolicy(newSortingMergePolicy(sorter));
+    iwc2.setMergePolicy(newSortingMergePolicy(sort));
     final RandomIndexWriter iw1 = new RandomIndexWriter(new Random(seed), dir1, iwc1);
     final RandomIndexWriter iw2 = new RandomIndexWriter(new Random(seed), dir2, iwc2);
     for (int i = 0; i < numDocs; ++i) {
@@ -164,7 +164,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
   }
 
   public void testSortingMP() throws IOException {
-    final AtomicReader sortedReader1 = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sorter);
+    final AtomicReader sortedReader1 = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
     final AtomicReader sortedReader2 = SlowCompositeReaderWrapper.wrap(sortedReader);
 
     assertSorted(sortedReader1);
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
index 56f88b4d1bf..7a2b6e257a9 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
@@ -54,10 +54,8 @@ import org.apache.lucene.index.SegmentReader;
 import org.apache.lucene.index.SlowCompositeReaderWrapper;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.sorter.EarlyTerminatingSortingCollector;
-import org.apache.lucene.index.sorter.Sorter;
 import org.apache.lucene.index.sorter.SortingAtomicReader;
 import org.apache.lucene.index.sorter.SortingMergePolicy;
-import org.apache.lucene.index.sorter.SortSorter;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Collector;
@@ -131,7 +129,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
    *  PrefixQuery is used (4). */
   public static final int DEFAULT_MIN_PREFIX_CHARS = 4;
 
-  private Sorter sorter;
+  private Sort sorter;
 
   /** Create a new instance, loading from a previously built
    *  directory, if it exists. */
@@ -173,7 +171,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
   /** Override this to customize index settings, e.g. which
    *  codec to use. Sorter is null if this config is for
    *  the first pass writer. */
-  protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, Sorter sorter, IndexWriterConfig.OpenMode openMode) {
+  protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, Sort sorter, IndexWriterConfig.OpenMode openMode) {
     IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer);
     iwc.setCodec(new Lucene46Codec());
     iwc.setOpenMode(openMode);
@@ -360,7 +358,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
   }
 
   private void initSorter() {
-    sorter = new SortSorter(new Sort(new SortField("weight", SortField.Type.LONG, true)));
+    sorter = new Sort(new SortField("weight", SortField.Type.LONG, true));
   }
 
   /**

From e53d6642154f87df8331ba807c146a54b86e75ed Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Thu, 6 Mar 2014 15:39:35 +0000
Subject: [PATCH 19/38] LUCENE-5493: remove dead code

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574925 13f79535-47bb-0310-9956-ffa450edef68
---
 .../apache/lucene/index/sorter/Sorter.java    | 40 -------------------
 .../index/sorter/SortingMergePolicy.java      |  2 +-
 2 files changed, 1 insertion(+), 41 deletions(-)

diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
index 73baf09b415..cfa9fd9a690 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
@@ -36,14 +36,6 @@ import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
  */
 abstract class Sorter {
 
-  /** A comparator that keeps documents in index order. */
-  public static final DocComparator INDEX_ORDER_COMPARATOR = new DocComparator() {
-    @Override
-    public int compare(int docID1, int docID2) {
-      return docID1 - docID2;
-    }
-  };
-
   /**
    * A permutation of doc IDs. For every document ID between <tt>0</tt> and
    * {@link IndexReader#maxDoc()}, <code>oldToNew(newToOld(docID))</code> must
@@ -89,38 +81,6 @@ abstract class Sorter {
 
   }
 
-  /**
-   * Sorts documents in reverse order. <b>NOTE</b>: This {@link Sorter} is not
-   * idempotent. Sorting an {@link AtomicReader} once or twice will return two
-   * different {@link AtomicReader} views. This {@link Sorter} should not be
-   * used with {@link SortingMergePolicy}.
-   */
-  public static final Sorter REVERSE_DOCS = new Sorter() {
-    @Override
-    public DocMap sort(final AtomicReader reader) throws IOException {
-      final int maxDoc = reader.maxDoc();
-      return new DocMap() {
-        @Override
-        public int oldToNew(int docID) {
-          return maxDoc - docID - 1;
-        }
-        @Override
-        public int newToOld(int docID) {
-          return maxDoc - docID - 1;
-        }
-        @Override
-        public int size() {
-          return maxDoc;
-        }
-      };
-    }
-    
-    @Override
-    public String getID() {
-      return "ReverseDocs";
-    }
-  };
-  
   private static final class DocValueSorter extends TimSorter {
     
     private final int[] docs;
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
index afdcd7ef6c2..17a94ca3f3b 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
@@ -47,7 +47,7 @@ import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
  *  to have sequentially-assigned doc IDs, this policy will scatter doc IDs.
  *  <p><b>NOTE</b>: This {@link MergePolicy} should only be used with idempotent
  *  {@link Sort}s so that the order of segments is predictable. For example,
- *  using {@link SortingMergePolicy} with {@link Sorter#REVERSE_DOCS} (which is
+ *  using {@link SortingMergePolicy} with {@link Sort#INDEXORDER in reverse} (which is
  *  not idempotent) will make the order of documents in a segment depend on the
  *  number of times the segment has been merged.
  *  @lucene.experimental */

From 5483d93556bbd013c7ce96a1096c641a5ec0a4ab Mon Sep 17 00:00:00 2001
From: Michael McCandless <mikemccand@apache.org>
Date: Thu, 6 Mar 2014 15:40:49 +0000
Subject: [PATCH 20/38] LUCENE-5493: small clean ups

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574926 13f79535-47bb-0310-9956-ffa450edef68
---
 .../analyzing/AnalyzingInfixSuggester.java    | 31 +++++++------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
index 7a2b6e257a9..1f72b2b0cbf 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
@@ -129,7 +129,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
    *  PrefixQuery is used (4). */
   public static final int DEFAULT_MIN_PREFIX_CHARS = 4;
 
-  private Sort sorter;
+  /** How we sort the postings and search results. */
+  private static final Sort SORT = new Sort(new SortField("weight", SortField.Type.LONG, true));
 
   /** Create a new instance, loading from a previously built
    *  directory, if it exists. */
@@ -161,26 +162,25 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
 
     if (DirectoryReader.indexExists(dir)) {
       // Already built; open it:
-      initSorter();
       writer = new IndexWriter(dir,
-                               getIndexWriterConfig(matchVersion, getGramAnalyzer(), sorter, IndexWriterConfig.OpenMode.APPEND));
+                               getIndexWriterConfig(matchVersion, getGramAnalyzer(), SORT, IndexWriterConfig.OpenMode.APPEND));
       searcherMgr = new SearcherManager(writer, true, null);
     }
   }
 
   /** Override this to customize index settings, e.g. which
-   *  codec to use. Sorter is null if this config is for
+   *  codec to use. The sort is null if this config is for
    *  the first pass writer. */
-  protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, Sort sorter, IndexWriterConfig.OpenMode openMode) {
+  protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, Sort sort, IndexWriterConfig.OpenMode openMode) {
     IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer);
     iwc.setCodec(new Lucene46Codec());
     iwc.setOpenMode(openMode);
 
-    if (sorter != null) {
+    if (sort != null) {
       // This way all merged segments will be sorted at
       // merge time, allow for per-segment early termination
       // when those segments are searched:
-      iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), sorter));
+      iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), sort));
     }
     return iwc;
   }
@@ -264,12 +264,10 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
       // (no sense in fsync'ing it):
       w.rollback();
 
-      initSorter();
-
-      r = SortingAtomicReader.wrap(r, sorter);
+      r = SortingAtomicReader.wrap(r, SORT);
       
       writer = new IndexWriter(dir,
-                               getIndexWriterConfig(matchVersion, getGramAnalyzer(), sorter, IndexWriterConfig.OpenMode.CREATE));
+                               getIndexWriterConfig(matchVersion, getGramAnalyzer(), SORT, IndexWriterConfig.OpenMode.CREATE));
       writer.addIndexes(new IndexReader[] {r});
       r.close();
 
@@ -357,10 +355,6 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
     searcherMgr.maybeRefreshBlocking();
   }
 
-  private void initSorter() {
-    sorter = new Sort(new SortField("weight", SortField.Type.LONG, true));
-  }
-
   /**
    * Subclass can override this method to change the field type of the text field
    * e.g. to change the index options
@@ -466,12 +460,11 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
     //System.out.println("finalQuery=" + query);
 
     // Sort by weight, descending:
-    TopFieldCollector c = TopFieldCollector.create(new Sort(new SortField("weight", SortField.Type.LONG, true)),
-                                                   num, true, false, false, false);
+    TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false, false);
 
     // We sorted postings by weight during indexing, so we
     // only retrieve the first num hits now:
-    Collector c2 = new EarlyTerminatingSortingCollector(c, sorter, num);
+    Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
     IndexSearcher searcher = searcherMgr.acquire();
     List<LookupResult> results = null;
     try {
@@ -481,7 +474,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
       TopFieldDocs hits = (TopFieldDocs) c.topDocs();
 
       // Slower way if postings are not pre-sorted by weight:
-      // hits = searcher.search(query, null, num, new Sort(new SortField("weight", SortField.Type.LONG, true)));
+      // hits = searcher.search(query, null, num, SORT);
       results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
     } finally {
       searcherMgr.release(searcher);

From f39f4f66c1cb3f830de2e4161b522274a156387d Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Thu, 6 Mar 2014 15:49:18 +0000
Subject: [PATCH 21/38] LUCENE-5493: minor cleanups/opto

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574928 13f79535-47bb-0310-9956-ffa450edef68
---
 .../index/sorter/BlockJoinComparatorSource.java   | 15 ++++++++++-----
 .../apache/lucene/index/sorter/SortSorter.java    |  8 +++++---
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
index ab29e74a428..c2a2a476b28 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
@@ -73,7 +73,7 @@ public class BlockJoinComparatorSource extends FieldComparatorSource {
     final FieldComparator<?> parentComparators[] = new FieldComparator[parentFields.length];
     for (int i = 0; i < parentFields.length; i++) {
       parentReverseMul[i] = parentFields[i].getReverse() ? -1 : 1;
-      parentComparators[i] = parentFields[i].getComparator(2, i);
+      parentComparators[i] = parentFields[i].getComparator(1, i);
     }
     
     SortField childFields[] = childSort.getSort();
@@ -81,7 +81,7 @@ public class BlockJoinComparatorSource extends FieldComparatorSource {
     final FieldComparator<?> childComparators[] = new FieldComparator[childFields.length];
     for (int i = 0; i < childFields.length; i++) {
       childReverseMul[i] = childFields[i].getReverse() ? -1 : 1;
-      childComparators[i] = childFields[i].getComparator(2, i);
+      childComparators[i] = childFields[i].getComparator(1, i);
     }
         
     // NOTE: not quite right i guess, really our sort "value" is more complex...
@@ -191,9 +191,11 @@ public class BlockJoinComparatorSource extends FieldComparatorSource {
       
       int compare(int docID1, int docID2, FieldComparator<?> comparators[], int reverseMul[]) throws IOException {
         for (int i = 0; i < comparators.length; i++) {
+          // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
+          // the segments are always the same here...
           comparators[i].copy(0, docID1);
-          comparators[i].copy(1, docID2);
-          int comp = reverseMul[i] * comparators[i].compare(0, 1);
+          comparators[i].setBottom(0);
+          int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
           if (comp != 0) {
             return comp;
           }
@@ -203,5 +205,8 @@ public class BlockJoinComparatorSource extends FieldComparatorSource {
     };
   }
   
-  
+  @Override
+  public String toString() {
+    return "blockJoin(parentSort=" + parentSort + ",childSort=" + childSort + ")";
+  }
 }
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortSorter.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortSorter.java
index 5ec5cd8f025..52c3c849b5c 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortSorter.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortSorter.java
@@ -41,7 +41,7 @@ class SortSorter extends Sorter {
     
     for (int i = 0; i < fields.length; i++) {
       reverseMul[i] = fields[i].getReverse() ? -1 : 1;
-      comparators[i] = fields[i].getComparator(2, i);
+      comparators[i] = fields[i].getComparator(1, i);
       comparators[i].setNextReader(reader.getContext());
       comparators[i].setScorer(FAKESCORER);
     }
@@ -50,9 +50,11 @@ class SortSorter extends Sorter {
       public int compare(int docID1, int docID2) {
         try {
           for (int i = 0; i < comparators.length; i++) {
+            // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
+            // the segments are always the same here...
             comparators[i].copy(0, docID1);
-            comparators[i].copy(1, docID2);
-            int comp = reverseMul[i] * comparators[i].compare(0, 1);
+            comparators[i].setBottom(0);
+            int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
             if (comp != 0) {
               return comp;
             }

From b221115ebc471de39f462abef1a817a6d642d58f Mon Sep 17 00:00:00 2001
From: "Chris M. Hostetter" <hossman@apache.org>
Date: Thu, 6 Mar 2014 16:13:20 +0000
Subject: [PATCH 22/38] SOLR-5628: work arround for this test to avoid whatever
 bug is in the cloud test framework

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574941 13f79535-47bb-0310-9956-ffa450edef68
---
 .../test/org/apache/solr/cloud/TestDistribDocBasedVersion.java  | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/solr/core/src/test/org/apache/solr/cloud/TestDistribDocBasedVersion.java b/solr/core/src/test/org/apache/solr/cloud/TestDistribDocBasedVersion.java
index 2f6f796c69c..86043b0f011 100755
--- a/solr/core/src/test/org/apache/solr/cloud/TestDistribDocBasedVersion.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestDistribDocBasedVersion.java
@@ -103,6 +103,8 @@ public class TestDistribDocBasedVersion extends AbstractFullDistribZkTestBase {
       doTestDocVersions();
       doTestHardFail();
 
+      commit(); // work arround SOLR-5628
+
       testFinished = true;
     } finally {
       if (!testFinished) {

From 44e2e3155f1138465429d805ee1febec7cb28fac Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Thu, 6 Mar 2014 16:22:12 +0000
Subject: [PATCH 23/38] LUCENE-5493: simplify this test

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574945 13f79535-47bb-0310-9956-ffa450edef68
---
 .../index/sorter/SortingAtomicReaderTest.java | 34 ++++++-------------
 1 file changed, 11 insertions(+), 23 deletions(-)

diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java
index 89bb9223db6..5fd7540d4e7 100644
--- a/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java
+++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java
@@ -17,49 +17,37 @@ package org.apache.lucene.index.sorter;
  * limitations under the License.
  */
 
-import java.io.IOException;
 import java.util.Arrays;
 
-import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.TestUtil;
 import org.junit.BeforeClass;
 
 public class SortingAtomicReaderTest extends SorterTestBase {
   
   @BeforeClass
   public static void beforeClassSortingAtomicReaderTest() throws Exception {
-    // build the mapping from the reader, since we deleted documents, some of
-    // them might have disappeared from the index (e.g. if an entire segment is
-    // dropped b/c all its docs are deleted)
-    final int[] values = new int[reader.maxDoc()];
-    for (int i = 0; i < reader.maxDoc(); i++) {
-      values[i] = Integer.valueOf(reader.document(i).get(ID_FIELD));
-    }
-    final Sorter.DocComparator comparator = new Sorter.DocComparator() {
-      @Override
-      public int compare(int docID1, int docID2) {
-        final int v1 = values[docID1];
-        final int v2 = values[docID2];
-        return v1 < v2 ? -1 : v1 == v2 ? 0 : 1;
-      }
-    };
-
-    final Sorter.DocMap docMap = Sorter.sort(reader.maxDoc(), comparator);
+    
+    // sort the index by id (as integer, in NUMERIC_DV_FIELD)
+    Sort sort = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.INT));
+    final Sorter.DocMap docMap = new SortSorter(sort).sort(reader);
+ 
     // Sorter.compute also sorts the values
+    NumericDocValues dv = reader.getNumericDocValues(NUMERIC_DV_FIELD);
     sortedValues = new Integer[reader.maxDoc()];
     for (int i = 0; i < reader.maxDoc(); ++i) {
-      sortedValues[docMap.oldToNew(i)] = values[i];
+      sortedValues[docMap.oldToNew(i)] = (int)dv.get(i);
     }
     if (VERBOSE) {
       System.out.println("docMap: " + docMap);
       System.out.println("sortedValues: " + Arrays.toString(sortedValues));
     }
     
-    // TODO: what is this doing? like a no-op sort?
-    reader = SortingAtomicReader.wrap(reader, docMap);
+    // sort the index by id (as integer, in NUMERIC_DV_FIELD)
+    reader = SortingAtomicReader.wrap(reader, sort);
     
     if (VERBOSE) {
       System.out.print("mapped-deleted-docs: ");

From 328fdf92177a9cb85635f4a05581fe6b2436e538 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Thu, 6 Mar 2014 16:28:12 +0000
Subject: [PATCH 24/38] LUCENE-5493: merge Sorter and SortSorter (in progress)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574949 13f79535-47bb-0310-9956-ffa450edef68
---
 .../lucene/index/sorter/SortSorter.java       | 96 -------------------
 .../apache/lucene/index/sorter/Sorter.java    | 76 +++++++++++++--
 .../index/sorter/SortingAtomicReader.java     |  2 +-
 .../index/sorter/SortingMergePolicy.java      |  2 +-
 .../index/sorter/SortingAtomicReaderTest.java |  2 +-
 .../index/sorter/TestBlockJoinSorter.java     |  2 +-
 6 files changed, 72 insertions(+), 108 deletions(-)
 delete mode 100644 lucene/misc/src/java/org/apache/lucene/index/sorter/SortSorter.java

diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortSorter.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortSorter.java
deleted file mode 100644
index 52c3c849b5c..00000000000
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortSorter.java
+++ /dev/null
@@ -1,96 +0,0 @@
-package org.apache.lucene.index.sorter;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.index.AtomicReader;
-import org.apache.lucene.search.FieldComparator;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-
-// nocommit: temporary class to engage the cutover!
-class SortSorter extends Sorter {
-  final Sort sort;
-  
-  public SortSorter(Sort sort) {
-    this.sort = sort;
-  }
-
-  @Override
-  public DocMap sort(AtomicReader reader) throws IOException {
-    SortField fields[] = sort.getSort();
-    final int reverseMul[] = new int[fields.length];
-    final FieldComparator<?> comparators[] = new FieldComparator[fields.length];
-    
-    for (int i = 0; i < fields.length; i++) {
-      reverseMul[i] = fields[i].getReverse() ? -1 : 1;
-      comparators[i] = fields[i].getComparator(1, i);
-      comparators[i].setNextReader(reader.getContext());
-      comparators[i].setScorer(FAKESCORER);
-    }
-    final DocComparator comparator = new DocComparator() {
-      @Override
-      public int compare(int docID1, int docID2) {
-        try {
-          for (int i = 0; i < comparators.length; i++) {
-            // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
-            // the segments are always the same here...
-            comparators[i].copy(0, docID1);
-            comparators[i].setBottom(0);
-            int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
-            if (comp != 0) {
-              return comp;
-            }
-          }
-          return Integer.compare(docID1, docID2); // docid order tiebreak
-        } catch (IOException e) {
-          throw new RuntimeException(e);
-        }
-      }
-    };
-    return sort(reader.maxDoc(), comparator);
-  }
-
-  @Override
-  public String getID() {
-    return sort.toString();
-  }
-  
-  static final Scorer FAKESCORER = new Scorer(null) {
-    
-    @Override
-    public float score() throws IOException { throw new UnsupportedOperationException(); }
-    
-    @Override
-    public int freq() throws IOException { throw new UnsupportedOperationException(); }
-
-    @Override
-    public int docID() { throw new UnsupportedOperationException(); }
-
-    @Override
-    public int nextDoc() throws IOException { throw new UnsupportedOperationException(); }
-
-    @Override
-    public int advance(int target) throws IOException { throw new UnsupportedOperationException(); }
-
-    @Override
-    public long cost() { throw new UnsupportedOperationException(); }
-  };
-}
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
index cfa9fd9a690..756355e28f1 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
@@ -22,19 +22,25 @@ import java.util.Comparator;
 
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.FieldComparator;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.TimSorter;
 import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
 
 /**
  * Sorts documents of a given index by returning a permutation on the document
  * IDs.
- * <p><b>NOTE</b>: A {@link Sorter} implementation can be easily written from
- * a {@link DocComparator document comparator} by using the
- * {@link #sort(int, DocComparator)} helper method. This is especially useful
- * when documents are directly comparable by their field values.
  * @lucene.experimental
  */
-abstract class Sorter {
+final class Sorter {
+  final Sort sort;
+  
+  /** Creates a new Sorter to sort the index with {@code sort} */
+  Sorter(Sort sort) {
+    this.sort = sort;
+  }
 
   /**
    * A permutation of doc IDs. For every document ID between <tt>0</tt> and
@@ -54,7 +60,6 @@ abstract class Sorter {
      *  {@link AtomicReader#maxDoc() number of documents} of the
      *  {@link AtomicReader} which is sorted. */
     public abstract int size();
-
   }
 
   /** Check consistency of a {@link DocMap}, useful for assertions. */
@@ -202,7 +207,39 @@ abstract class Sorter {
    * <b>NOTE:</b> deleted documents are expected to appear in the mapping as
    * well, they will however be marked as deleted in the sorted view.
    */
-  public abstract DocMap sort(AtomicReader reader) throws IOException;
+  public DocMap sort(AtomicReader reader) throws IOException {
+    SortField fields[] = sort.getSort();
+    final int reverseMul[] = new int[fields.length];
+    final FieldComparator<?> comparators[] = new FieldComparator[fields.length];
+    
+    for (int i = 0; i < fields.length; i++) {
+      reverseMul[i] = fields[i].getReverse() ? -1 : 1;
+      comparators[i] = fields[i].getComparator(1, i);
+      comparators[i].setNextReader(reader.getContext());
+      comparators[i].setScorer(FAKESCORER);
+    }
+    final DocComparator comparator = new DocComparator() {
+      @Override
+      public int compare(int docID1, int docID2) {
+        try {
+          for (int i = 0; i < comparators.length; i++) {
+            // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
+            // the segments are always the same here...
+            comparators[i].copy(0, docID1);
+            comparators[i].setBottom(0);
+            int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
+            if (comp != 0) {
+              return comp;
+            }
+          }
+          return Integer.compare(docID1, docID2); // docid order tiebreak
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+    };
+    return sort(reader.maxDoc(), comparator);
+  }
 
   /**
    * Returns the identifier of this {@link Sorter}.
@@ -211,11 +248,34 @@ abstract class Sorter {
    * will have the same identifier. On the contrary, this identifier should be
    * different on different {@link Sorter sorters}.
    */
-  public abstract String getID();
+  public String getID() {
+    return sort.toString();
+  }
 
   @Override
   public String toString() {
     return getID();
   }
   
+  static final Scorer FAKESCORER = new Scorer(null) {
+    
+    @Override
+    public float score() throws IOException { throw new UnsupportedOperationException(); }
+    
+    @Override
+    public int freq() throws IOException { throw new UnsupportedOperationException(); }
+
+    @Override
+    public int docID() { throw new UnsupportedOperationException(); }
+
+    @Override
+    public int nextDoc() throws IOException { throw new UnsupportedOperationException(); }
+
+    @Override
+    public int advance(int target) throws IOException { throw new UnsupportedOperationException(); }
+
+    @Override
+    public long cost() { throw new UnsupportedOperationException(); }
+  };
+  
 }
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java
index 3adabd2640a..55693434459 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java
@@ -712,7 +712,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
    *  defined by <code>sorter</code>. If the reader is already sorted, this
    *  method might return the reader as-is. */
   public static AtomicReader wrap(AtomicReader reader, Sort sort) throws IOException {
-    return wrap(reader, new SortSorter(sort).sort(reader));
+    return wrap(reader, new Sorter(sort).sort(reader));
   }
 
   /** Expert: same as {@link #wrap(AtomicReader, Sort)} but operates directly on a {@link Sorter.DocMap}. */
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
index 17a94ca3f3b..9c1be675435 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
@@ -179,7 +179,7 @@ public final class SortingMergePolicy extends MergePolicy {
   /** Create a new {@link MergePolicy} that sorts documents with <code>sort</code>. */
   public SortingMergePolicy(MergePolicy in, Sort sort) {
     this.in = in;
-    this.sorter = new SortSorter(sort);
+    this.sorter = new Sorter(sort);
     this.sort = sort;
   }
 
diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java
index 5fd7540d4e7..89d6403619b 100644
--- a/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java
+++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java
@@ -33,7 +33,7 @@ public class SortingAtomicReaderTest extends SorterTestBase {
     
     // sort the index by id (as integer, in NUMERIC_DV_FIELD)
     Sort sort = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.INT));
-    final Sorter.DocMap docMap = new SortSorter(sort).sort(reader);
+    final Sorter.DocMap docMap = new Sorter(sort).sort(reader);
  
     // Sorter.compute also sorts the values
     NumericDocValues dv = reader.getNumericDocValues(NUMERIC_DV_FIELD);
diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestBlockJoinSorter.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestBlockJoinSorter.java
index 79b24968aa3..aa8e77e26db 100644
--- a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestBlockJoinSorter.java
+++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestBlockJoinSorter.java
@@ -98,7 +98,7 @@ public class TestBlockJoinSorter extends LuceneTestCase {
     final Sort childSort = new Sort(new SortField("child_val", SortField.Type.LONG));
 
     final Sort sort = new Sort(new SortField("custom", new BlockJoinComparatorSource(parentsFilter, parentSort, childSort)));
-    final Sorter sorter = new SortSorter(sort);
+    final Sorter sorter = new Sorter(sort);
     final Sorter.DocMap docMap = sorter.sort(reader);
     assertEquals(reader.maxDoc(), docMap.size());
 

From 31dacf44948d26cdd2d1330f86fac2c388de75b6 Mon Sep 17 00:00:00 2001
From: Steven Rowe <sarowe@apache.org>
Date: Thu, 6 Mar 2014 16:33:32 +0000
Subject: [PATCH 25/38] SOLR-3854: IntelliJ config: add solr example lib test
 dependency to map-reduce and dataimporthandler contribs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574951 13f79535-47bb-0310-9956-ffa450edef68
---
 .../idea/solr/contrib/dataimporthandler/dataimporthandler.iml    | 1 +
 dev-tools/idea/solr/contrib/map-reduce/map-reduce.iml            | 1 +
 2 files changed, 2 insertions(+)

diff --git a/dev-tools/idea/solr/contrib/dataimporthandler/dataimporthandler.iml b/dev-tools/idea/solr/contrib/dataimporthandler/dataimporthandler.iml
index 17eed225e48..380e99a7a3c 100644
--- a/dev-tools/idea/solr/contrib/dataimporthandler/dataimporthandler.iml
+++ b/dev-tools/idea/solr/contrib/dataimporthandler/dataimporthandler.iml
@@ -16,6 +16,7 @@
     <orderEntry type="library" scope="TEST" name="HSQLDB" level="project" />
     <orderEntry type="library" scope="TEST" name="Derby" level="project" />
     <orderEntry type="library" scope="TEST" name="Solr DIH test library" level="project" />
+    <orderEntry type="library" scope="TEST" name="Solr example library" level="project" />
     <orderEntry type="library" name="Solr core library" level="project" />
     <orderEntry type="library" name="Solrj library" level="project" />
     <orderEntry type="library" name="Solr DIH library" level="project" />
diff --git a/dev-tools/idea/solr/contrib/map-reduce/map-reduce.iml b/dev-tools/idea/solr/contrib/map-reduce/map-reduce.iml
index 676d4f64a9a..572e1d838ac 100644
--- a/dev-tools/idea/solr/contrib/map-reduce/map-reduce.iml
+++ b/dev-tools/idea/solr/contrib/map-reduce/map-reduce.iml
@@ -18,6 +18,7 @@
     <orderEntry type="library" name="Solr morphlines core library" level="project" />
     <orderEntry type="library" name="Solr morphlines cell library" level="project" />
     <orderEntry type="library" scope="TEST" name="Solr morphlines core test library" level="project" />
+    <orderEntry type="library" scope="TEST" name="Solr example library" level="project" />
     <orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
     <orderEntry type="module" scope="TEST" module-name="solr-test-framework" />
     <orderEntry type="module" module-name="solr-core" />

From 4244ad37479c9d76ac495dc4cc024132facaddfa Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Thu, 6 Mar 2014 16:39:21 +0000
Subject: [PATCH 26/38] LUCENE-5493: javadocs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574954 13f79535-47bb-0310-9956-ffa450edef68
---
 .../EarlyTerminatingSortingCollector.java      | 13 ++++++-------
 .../org/apache/lucene/index/sorter/Sorter.java | 18 +++++++++---------
 .../index/sorter/SortingMergePolicy.java       |  1 -
 .../apache/lucene/index/sorter/package.html    | 15 ++++++---------
 4 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java
index 155b5c93568..fa032edc462 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java
@@ -31,7 +31,7 @@ import org.apache.lucene.search.TotalHitCountCollector;
 /**
  * A {@link Collector} that early terminates collection of documents on a
  * per-segment basis, if the segment was sorted according to the given
- * {@link Sorter}.
+ * {@link Sort}.
  * 
  * <p>
  * <b>NOTE:</b> the {@link Collector} detects sorted segments according to
@@ -46,23 +46,22 @@ import org.apache.lucene.search.TotalHitCountCollector;
  * hit count} will be underestimated since not all matching documents will have
  * been collected.
  * <p>
- * <b>NOTE</b>: This {@link Collector} uses {@link Sorter#getID()} to detect
- * whether a segment was sorted with the same {@link Sorter} as the one given in
+ * <b>NOTE</b>: This {@link Collector} uses {@link Sort#toString()} to detect
+ * whether a segment was sorted with the same {@link Sort} as the one given in
  * {@link #EarlyTerminatingSortingCollector(Collector, Sort, int)}. This has
  * two implications:
  * <ul>
- * <li>if {@link Sorter#getID()} is not implemented correctly and returns
- * different identifiers for equivalent {@link Sorter}s, this collector will not
+ * <li>if a custom comparator is not implemented correctly and returns
+ * different identifiers for equivalent instances, this collector will not
  * detect sorted segments,</li>
  * <li>if you suddenly change the {@link IndexWriter}'s
  * {@link SortingMergePolicy} to sort according to another criterion and if both
- * the old and the new {@link Sorter}s have the same identifier, this
+ * the old and the new {@link Sort}s have the same identifier, this
  * {@link Collector} will incorrectly detect sorted segments.</li>
  * </ul>
  * 
  * @lucene.experimental
  */
-// nocommit: fix these javadocs to be about Sort
 public class EarlyTerminatingSortingCollector extends Collector {
 
   protected final Collector in;
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
index 756355e28f1..d32785f8876 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
@@ -47,19 +47,19 @@ final class Sorter {
    * {@link IndexReader#maxDoc()}, <code>oldToNew(newToOld(docID))</code> must
    * return <code>docID</code>.
    */
-  public static abstract class DocMap {
+  static abstract class DocMap {
 
     /** Given a doc ID from the original index, return its ordinal in the
      *  sorted index. */
-    public abstract int oldToNew(int docID);
+    abstract int oldToNew(int docID);
 
     /** Given the ordinal of a doc ID, return its doc ID in the original index. */
-    public abstract int newToOld(int docID);
+    abstract int newToOld(int docID);
 
     /** Return the number of documents in this map. This must be equal to the
      *  {@link AtomicReader#maxDoc() number of documents} of the
      *  {@link AtomicReader} which is sorted. */
-    public abstract int size();
+    abstract int size();
   }
 
   /** Check consistency of a {@link DocMap}, useful for assertions. */
@@ -78,7 +78,7 @@ final class Sorter {
   }
 
   /** A comparator of doc IDs. */
-  public static abstract class DocComparator {
+  static abstract class DocComparator {
 
     /** Compare docID1 against docID2. The contract for the return value is the
      *  same as {@link Comparator#compare(Object, Object)}. */
@@ -92,7 +92,7 @@ final class Sorter {
     private final Sorter.DocComparator comparator;
     private final int[] tmp;
     
-    public DocValueSorter(int[] docs, Sorter.DocComparator comparator) {
+    DocValueSorter(int[] docs, Sorter.DocComparator comparator) {
       super(docs.length / 64);
       this.docs = docs;
       this.comparator = comparator;
@@ -133,7 +133,7 @@ final class Sorter {
   }
 
   /** Computes the old-to-new permutation over the given comparator. */
-  protected static Sorter.DocMap sort(final int maxDoc, DocComparator comparator) {
+  private static Sorter.DocMap sort(final int maxDoc, DocComparator comparator) {
     // check if the index is sorted
     boolean sorted = true;
     for (int i = 1; i < maxDoc; ++i) {
@@ -207,7 +207,7 @@ final class Sorter {
    * <b>NOTE:</b> deleted documents are expected to appear in the mapping as
    * well, they will however be marked as deleted in the sorted view.
    */
-  public DocMap sort(AtomicReader reader) throws IOException {
+  DocMap sort(AtomicReader reader) throws IOException {
     SortField fields[] = sort.getSort();
     final int reverseMul[] = new int[fields.length];
     final FieldComparator<?> comparators[] = new FieldComparator[fields.length];
@@ -246,7 +246,7 @@ final class Sorter {
    * <p>This identifier is similar to {@link Object#hashCode()} and should be
    * chosen so that two instances of this class that sort documents likewise
    * will have the same identifier. On the contrary, this identifier should be
-   * different on different {@link Sorter sorters}.
+   * different on different {@link Sort sorts}.
    */
   public String getID() {
     return sort.toString();
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
index 9c1be675435..58263407e5d 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
@@ -51,7 +51,6 @@ import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
  *  not idempotent) will make the order of documents in a segment depend on the
  *  number of times the segment has been merged.
  *  @lucene.experimental */
-// nocommit: fix these jdocs around idempotency
 public final class SortingMergePolicy extends MergePolicy {
 
   /**
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/package.html b/lucene/misc/src/java/org/apache/lucene/index/sorter/package.html
index d8217b492b5..6f97b1612b7 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/package.html
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/package.html
@@ -17,19 +17,16 @@
 -->
 <html>
 <body>
-<p>Provides index sorting capablities. The application can use one of the
-pre-existing Sorter implementations, e.g. to sort by a
-{@link org.apache.lucene.index.sorter.NumericDocValuesSorter}
-or {@link org.apache.lucene.index.sorter.Sorter#REVERSE_DOCS reverse} the order
-of the documents. Additionally, the application can implement a custom
-{@link org.apache.lucene.index.sorter.Sorter} which returns a permutation on 
-a source {@link org.apache.lucene.index.AtomicReader}'s document IDs, to sort
-the input documents by additional criteria.
+<p>Provides index sorting capablities. The application can use any
+Sort specification, e.g. to sort by fields using DocValues or FieldCache, or to
+reverse the order of the documents (by using SortField.Type.DOC in reverse).
+Multi-level sorts can be specified the same way you would when searching, by
+building Sort from multiple SortFields.
 
 <p>{@link org.apache.lucene.index.sorter.SortingMergePolicy} can be used to
 make Lucene sort segments before merging them. This will ensure that every
 segment resulting from a merge will be sorted according to the provided
-{@link org.apache.lucene.index.sorter.Sorter}. This however makes merging and
+{@link org.apache.lucene.search.Sort}. This however makes merging and
 thus indexing slower.
 
 <p>Sorted segments allow for early query termination when the sort order

From 58198c299cd88fecacf3bfdbfbfb7f4aef7694b7 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Thu, 6 Mar 2014 17:03:56 +0000
Subject: [PATCH 27/38] LUCENE-5493: fix precommit

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574962 13f79535-47bb-0310-9956-ffa450edef68
---
 .../index/sorter/BlockJoinComparatorSource.java   | 15 +++++++++++----
 .../lucene/index/sorter/SortingAtomicReader.java  | 10 +++++-----
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
index c2a2a476b28..3029bcab656 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
@@ -31,7 +31,16 @@ import org.apache.lucene.util.FixedBitSet;
 
 /**
  * Helper class to sort readers that contain blocks of documents.
+ * <p>
+ * Note that this currently has some limitations:
+ * <ul>
+ *    <li>Cannot yet be used with IndexSearcher.searchAfter
+ *    <li>Filling sort value fields is not yet supported.
+ * </ul>
+ * Its intended to be used with {@link SortingMergePolicy}.
  */
+// TODO: can/should we clean this thing up (e.g. return a proper sort value)
+// and move to the join/ module?
 public class BlockJoinComparatorSource extends FieldComparatorSource {
   final Filter parentsFilter;
   final Sort parentSort;
@@ -84,8 +93,8 @@ public class BlockJoinComparatorSource extends FieldComparatorSource {
       childComparators[i] = childFields[i].getComparator(1, i);
     }
         
-    // NOTE: not quite right i guess, really our sort "value" is more complex...
-    // but at the moment you really should only use this at indexing time.
+    // NOTE: we could return parent ID as value but really our sort "value" is more complex...
+    // So we throw UOE for now. At the moment you really should only use this at indexing time.
     return new FieldComparator<Integer>() {
       int bottomParent;
       int bottomChild;
@@ -171,7 +180,6 @@ public class BlockJoinComparatorSource extends FieldComparatorSource {
       
       int compare(int docID1, int parent1, int docID2, int parent2) throws IOException {
         if (parent1 == parent2) { // both are in the same block
-          // nocommit: should not be needed?
           if (docID1 == parent1 || docID2 == parent2) {
             // keep parents at the end of blocks
             return docID1 - docID2;
@@ -180,7 +188,6 @@ public class BlockJoinComparatorSource extends FieldComparatorSource {
           }
         } else {
           int cmp = compare(parent1, parent2, parentComparators, parentReverseMul);
-          // nocommit: should not be needed?
           if (cmp == 0) {
             return parent1 - parent2;
           } else {
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java
index 55693434459..1ecde39d9e2 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java
@@ -49,13 +49,13 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
 
 /**
  * An {@link AtomicReader} which supports sorting documents by a given
- * {@link Sorter}. You can use this class to sort an index as follows:
+ * {@link Sort}. You can use this class to sort an index as follows:
  * 
  * <pre class="prettyprint">
  * IndexWriter writer; // writer to which the sorted index will be added
  * DirectoryReader reader; // reader on the input index
- * Sorter sorter; // determines how the documents are sorted
- * AtomicReader sortingReader = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sorter);
+ * Sort sort; // determines how the documents are sorted
+ * AtomicReader sortingReader = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
  * writer.addIndexes(reader);
  * writer.close();
  * reader.close();
@@ -481,7 +481,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
   static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum {
     
     /**
-     * A {@link Sorter} which sorts two parallel arrays of doc IDs and
+     * A {@link TimSorter} which sorts two parallel arrays of doc IDs and
      * offsets in one go. Everytime a doc ID is 'swapped', its correponding offset
      * is swapped too.
      */
@@ -709,7 +709,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
   }
 
   /** Return a sorted view of <code>reader</code> according to the order
-   *  defined by <code>sorter</code>. If the reader is already sorted, this
+   *  defined by <code>sort</code>. If the reader is already sorted, this
    *  method might return the reader as-is. */
   public static AtomicReader wrap(AtomicReader reader, Sort sort) throws IOException {
     return wrap(reader, new Sorter(sort).sort(reader));

From 57569ed1aaa91bdf693bf4fea8e9ff7ae96d1b0e Mon Sep 17 00:00:00 2001
From: Michael McCandless <mikemccand@apache.org>
Date: Thu, 6 Mar 2014 17:11:46 +0000
Subject: [PATCH 28/38] LUCENE-5493: don't do forceMerge on initital build of
 AnalyzingInfixSuggester

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574965 13f79535-47bb-0310-9956-ffa450edef68
---
 .../analyzing/AnalyzingInfixSuggester.java    |  75 +++-------
 .../analyzing/BlendedInfixSuggester.java      |  10 +-
 .../search/suggest/LookupBenchmarkTest.java   |   3 +-
 .../AnalyzingInfixSuggesterTest.java          | 131 +++---------------
 .../analyzing/BlendedInfixSuggesterTest.java  |  59 ++------
 5 files changed, 60 insertions(+), 218 deletions(-)

diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
index 1f72b2b0cbf..df3aa04b859 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
@@ -46,15 +46,12 @@ import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FilterAtomicReader;
-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.MultiDocValues;
 import org.apache.lucene.index.SegmentReader;
-import org.apache.lucene.index.SlowCompositeReaderWrapper;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.sorter.EarlyTerminatingSortingCollector;
-import org.apache.lucene.index.sorter.SortingAtomicReader;
 import org.apache.lucene.index.sorter.SortingMergePolicy;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
@@ -115,9 +112,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
   /** Analyzer used at index time */
   protected final Analyzer indexAnalyzer;
   final Version matchVersion;
-  private final File indexPath;
+  private final Directory dir;
   final int minPrefixChars;
-  private Directory dir;
 
   /** Used for ongoing NRT additions/updates. */
   private IndexWriter writer;
@@ -133,13 +129,15 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
   private static final Sort SORT = new Sort(new SortField("weight", SortField.Type.LONG, true));
 
   /** Create a new instance, loading from a previously built
-   *  directory, if it exists. */
-  public AnalyzingInfixSuggester(Version matchVersion, File indexPath, Analyzer analyzer) throws IOException {
-    this(matchVersion, indexPath, analyzer, analyzer, DEFAULT_MIN_PREFIX_CHARS);
+   *  directory, if it exists.  Note that {@link #close}
+   *  will also close the provided directory. */
+  public AnalyzingInfixSuggester(Version matchVersion, Directory dir, Analyzer analyzer) throws IOException {
+    this(matchVersion, dir, analyzer, analyzer, DEFAULT_MIN_PREFIX_CHARS);
   }
 
   /** Create a new instance, loading from a previously built
-   *  directory, if it exists.
+   *  directory, if it exists. Note that {@link #close}
+   *  will also close the provided directory.
    *
    *  @param minPrefixChars Minimum number of leading characters
    *     before PrefixQuery is used (default 4).
@@ -147,7 +145,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
    *     ngrams (increasing index size but making lookups
    *     faster).
    */
-  public AnalyzingInfixSuggester(Version matchVersion, File indexPath, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars) throws IOException {
+  public AnalyzingInfixSuggester(Version matchVersion, Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars) throws IOException {
 
     if (minPrefixChars < 0) {
       throw new IllegalArgumentException("minPrefixChars must be >= 0; got: " + minPrefixChars);
@@ -156,32 +154,29 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
     this.queryAnalyzer = queryAnalyzer;
     this.indexAnalyzer = indexAnalyzer;
     this.matchVersion = matchVersion;
-    this.indexPath = indexPath;
+    this.dir = dir;
     this.minPrefixChars = minPrefixChars;
-    dir = getDirectory(indexPath);
 
     if (DirectoryReader.indexExists(dir)) {
       // Already built; open it:
       writer = new IndexWriter(dir,
-                               getIndexWriterConfig(matchVersion, getGramAnalyzer(), SORT, IndexWriterConfig.OpenMode.APPEND));
+                               getIndexWriterConfig(matchVersion, getGramAnalyzer(), IndexWriterConfig.OpenMode.APPEND));
       searcherMgr = new SearcherManager(writer, true, null);
     }
   }
 
   /** Override this to customize index settings, e.g. which
-   *  codec to use. The sort is null if this config is for
-   *  the first pass writer. */
-  protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, Sort sort, IndexWriterConfig.OpenMode openMode) {
+   *  codec to use. */
+  protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, IndexWriterConfig.OpenMode openMode) {
     IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer);
     iwc.setCodec(new Lucene46Codec());
     iwc.setOpenMode(openMode);
 
-    if (sort != null) {
-      // This way all merged segments will be sorted at
-      // merge time, allow for per-segment early termination
-      // when those segments are searched:
-      iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), sort));
-    }
+    // This way all merged segments will be sorted at
+    // merge time, allow for per-segment early termination
+    // when those segments are searched:
+    iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), SORT));
+
     return iwc;
   }
 
@@ -204,16 +199,13 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
       writer = null;
     }
 
-    Directory dirTmp = getDirectory(new File(indexPath.toString() + ".tmp"));
-
-    IndexWriter w = null;
     AtomicReader r = null;
     boolean success = false;
     try {
       // First pass: build a temporary normal Lucene index,
       // just indexing the suggestions as they iterate:
-      w = new IndexWriter(dirTmp,
-                          getIndexWriterConfig(matchVersion, getGramAnalyzer(), null, IndexWriterConfig.OpenMode.CREATE));
+      writer = new IndexWriter(dir,
+                               getIndexWriterConfig(matchVersion, getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
       BytesRef text;
       Document doc = new Document();
       FieldType ft = getTextFieldType();
@@ -251,35 +243,17 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
         if (iter.hasPayloads()) {
           payloadField.setBytesValue(iter.payload());
         }
-        w.addDocument(doc);
+        writer.addDocument(doc);
       }
       //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
 
-      // Second pass: sort the entire index:
-      r = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(w, false));
-      //long t1 = System.nanoTime();
-
-      // We can rollback the first pass, now that have have
-      // the reader open, because we will discard it anyway
-      // (no sense in fsync'ing it):
-      w.rollback();
-
-      r = SortingAtomicReader.wrap(r, SORT);
-      
-      writer = new IndexWriter(dir,
-                               getIndexWriterConfig(matchVersion, getGramAnalyzer(), SORT, IndexWriterConfig.OpenMode.CREATE));
-      writer.addIndexes(new IndexReader[] {r});
-      r.close();
-
-      //System.out.println("sort time: " + ((System.nanoTime()-t1)/1000000) + " msec");
-
       searcherMgr = new SearcherManager(writer, true, null);
       success = true;
     } finally {
       if (success) {
-        IOUtils.close(w, r, dirTmp);
+        IOUtils.close(r);
       } else {
-        IOUtils.closeWhileHandlingException(w, writer, r, dirTmp);
+        IOUtils.closeWhileHandlingException(writer, r);
         writer = null;
       }
     }
@@ -638,11 +612,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
     }
     if (writer != null) {
       writer.close();
-      writer = null;
-    }
-    if (dir != null) {
       dir.close();
-      dir = null;
+      writer = null;
     }
   }
 
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
index 02281069a93..46df98c648e 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
@@ -17,7 +17,6 @@ package org.apache.lucene.search.suggest.analyzing;
  * limitations under the License.
  */
 
-import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Comparator;
@@ -38,6 +37,7 @@ import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.TopFieldDocs;
 import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.Version;
 
@@ -92,8 +92,8 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester {
    * Create a new instance, loading from a previously built
    * directory, if it exists.
    */
-  public BlendedInfixSuggester(Version matchVersion, File indexPath, Analyzer analyzer) throws IOException {
-    super(matchVersion, indexPath, analyzer);
+  public BlendedInfixSuggester(Version matchVersion, Directory dir, Analyzer analyzer) throws IOException {
+    super(matchVersion, dir, analyzer);
     this.blenderType = BlenderType.POSITION_LINEAR;
     this.numFactor = DEFAULT_NUM_FACTOR;
   }
@@ -106,9 +106,9 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester {
    * @param numFactor   Factor to multiply the number of searched elements before ponderate
    * @throws IOException If there are problems opening the underlying Lucene index.
    */
-  public BlendedInfixSuggester(Version matchVersion, File indexPath, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
+  public BlendedInfixSuggester(Version matchVersion, Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
                                int minPrefixChars, BlenderType blenderType, int numFactor) throws IOException {
-    super(matchVersion, indexPath, indexAnalyzer, queryAnalyzer, minPrefixChars);
+    super(matchVersion, dir, indexAnalyzer, queryAnalyzer, minPrefixChars);
     this.blenderType = blenderType;
     this.numFactor = numFactor;
   }
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
index 16ee899ac09..b2471ef6c5c 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
@@ -40,6 +40,7 @@ import org.apache.lucene.search.suggest.fst.FSTCompletionLookup;
 import org.apache.lucene.search.suggest.fst.WFSTCompletionLookup;
 import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
 import org.apache.lucene.search.suggest.tst.TSTLookup;
+import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.*;
 import org.junit.BeforeClass;
 import org.junit.Ignore;
@@ -161,7 +162,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
     } catch (InstantiationException e) {
       Analyzer a = new MockAnalyzer(random, MockTokenizer.KEYWORD, false);
       if (cls == AnalyzingInfixSuggester.class) {
-        lookup = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, TestUtil.getTempDir("LookupBenchmarkTest"), a);
+        lookup = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, FSDirectory.open(TestUtil.getTempDir("LookupBenchmarkTest")), a);
       } else {
         Constructor<? extends Lookup> ctor = cls.getConstructor(Analyzer.class);
         lookup = ctor.newInstance(a);
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
index e85713864a4..147ee3b1b61 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
@@ -21,7 +21,6 @@ import java.io.File;
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashSet;
@@ -39,7 +38,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.search.suggest.Input;
 import org.apache.lucene.search.suggest.InputArrayIterator;
 import org.apache.lucene.search.suggest.Lookup.LookupResult;
-import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 import org.apache.lucene.util.LuceneTestCase;
@@ -55,15 +53,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
       new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
     };
 
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
-
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
     suggester.build(new InputArrayIterator(keys));
 
     List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
@@ -106,22 +97,12 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
     File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
 
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newFSDirectory(path);
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, 3);
     suggester.build(new InputArrayIterator(keys));
     assertEquals(2, suggester.getCount());
     suggester.close();
 
-    suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newFSDirectory(path);
-        }
-      };
+    suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, 3);
     List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
     assertEquals(2, results.size());
     assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).key);
@@ -159,15 +140,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
       new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
     };
 
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
-
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3) {
         @Override
         protected Object highlight(String text, Set<String> matchedTokens, String prefixToken) throws IOException {
           try (TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text))) {
@@ -239,17 +213,11 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
       new Input("lend me your ear", 8, new BytesRef("foobar")),
       new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
     };
-
     File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
 
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
     int minPrefixLength = random().nextInt(10);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixLength) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newFSDirectory(path);
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixLength);
     suggester.build(new InputArrayIterator(keys));
 
     for(int i=0;i<2;i++) {
@@ -306,12 +274,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
 
       // Make sure things still work after close and reopen:
       suggester.close();
-      suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixLength) {
-          @Override
-          protected Directory getDirectory(File path) {
-            return newFSDirectory(path);
-          }
-        };
+      suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixLength);
     }
     suggester.close();
   }
@@ -321,15 +284,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
       new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
     };
 
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
-
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
     suggester.build(new InputArrayIterator(keys));
     List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
     assertEquals(1, results.size());
@@ -342,15 +298,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
       new Input("a Penny saved is a penny earned", 10, new BytesRef("foobaz")),
     };
 
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
-
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
     suggester.build(new InputArrayIterator(keys));
     List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
     assertEquals(1, results.size());
@@ -359,18 +308,13 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
 
     // Try again, but overriding addPrefixMatch to highlight
     // the entire hit:
-    suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
+    suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3) {
         @Override
         protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) {
           sb.append("<b>");
           sb.append(surface);
           sb.append("</b>");
         }
-
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
       };
     suggester.build(new InputArrayIterator(keys));
     results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
@@ -384,15 +328,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
       new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
     };
 
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
-
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
     suggester.build(new InputArrayIterator(keys));
     suggester.close();
     suggester.close();
@@ -418,14 +355,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
         }
       };
 
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
-
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, indexAnalyzer, queryAnalyzer, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), indexAnalyzer, queryAnalyzer, 3);
 
     Input keys[] = new Input[] {
       new Input("a bob for apples", 10, new BytesRef("foobaz")),
@@ -439,14 +369,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
   }
 
   public void testEmptyAtStart() throws Exception {
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
     suggester.build(new InputArrayIterator(new Input[0]));
     suggester.add(new BytesRef("a penny saved is a penny earned"), 10, new BytesRef("foobaz"));
     suggester.add(new BytesRef("lend me your ear"), 8, new BytesRef("foobar"));
@@ -483,14 +407,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
   }
 
   public void testBothExactAndPrefix() throws Exception {
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
     suggester.build(new InputArrayIterator(new Input[0]));
     suggester.add(new BytesRef("the pen is pretty"), 10, new BytesRef("foobaz"));
     suggester.refresh();
@@ -563,12 +481,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
       System.out.println("  minPrefixChars=" + minPrefixChars);
     }
 
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixChars) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newFSDirectory(path);
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixChars);
 
     // Initial suggester built with nothing:
     suggester.build(new InputArrayIterator(new Input[0]));
@@ -648,12 +561,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
         }
         lookupThread.finish();
         suggester.close();
-        suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixChars) {
-            @Override
-            protected Directory getDirectory(File path) {
-              return newFSDirectory(path);
-            }
-          };
+        suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixChars);
         lookupThread = new LookupThread(suggester);
         lookupThread.start();
 
@@ -824,15 +732,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
       new Input("lend me your ear", 8, new BytesRef("foobar")),
     };
 
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
-
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
     suggester.build(new InputArrayIterator(keys));
 
     List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
index 71ac3df23d0..89c9629a675 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
@@ -23,7 +23,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.search.suggest.Input;
 import org.apache.lucene.search.suggest.InputArrayIterator;
 import org.apache.lucene.search.suggest.Lookup;
-import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
@@ -49,15 +48,10 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
     File tempDir = TestUtil.getTempDir("BlendedInfixSuggesterTest");
 
     Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
-    BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
-        AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
-        BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
-        BlendedInfixSuggester.DEFAULT_NUM_FACTOR) {
-      @Override
-      protected Directory getDirectory(File path) {
-        return newFSDirectory(path);
-      }
-    };
+    BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
+                                                                AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
+                                                                BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
+                                                                BlendedInfixSuggester.DEFAULT_NUM_FACTOR);
     suggester.build(new InputArrayIterator(keys));
 
     // we query for star wars and check that the weight
@@ -94,12 +88,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
     Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
 
     // BlenderType.LINEAR is used by default (remove position*10%)
-    BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a) {
-      @Override
-      protected Directory getDirectory(File path) {
-        return newFSDirectory(path);
-      }
-    };
+    BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a);
     suggester.build(new InputArrayIterator(keys));
 
     assertEquals(w, getInResults(suggester, "top", pl, 1));
@@ -109,13 +98,8 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
     suggester.close();
 
     // BlenderType.RECIPROCAL is using 1/(1+p) * w where w is weight and p the position of the word
-    suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
-        AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1) {
-      @Override
-      protected Directory getDirectory(File path) {
-        return newFSDirectory(path);
-      }
-    };
+    suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
+                                          AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1);
     suggester.build(new InputArrayIterator(keys));
 
     assertEquals(w, getInResults(suggester, "top", pl, 1));
@@ -145,13 +129,8 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
     Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
 
     // if factor is small, we don't get the expected element
-    BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
-        AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1) {
-      @Override
-      protected Directory getDirectory(File path) {
-        return newFSDirectory(path);
-      }
-    };
+    BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
+                                                                AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1);
 
     suggester.build(new InputArrayIterator(keys));
 
@@ -169,13 +148,8 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
     suggester.close();
 
     // if we increase the factor we have it
-    suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
-        AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 2) {
-      @Override
-      protected Directory getDirectory(File path) {
-        return newFSDirectory(path);
-      }
-    };
+    suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
+                                          AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 2);
     suggester.build(new InputArrayIterator(keys));
 
     // we have it
@@ -205,14 +179,9 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
     Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
 
     // if factor is small, we don't get the expected element
-    BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
-        AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL,
-        BlendedInfixSuggester.DEFAULT_NUM_FACTOR) {
-      @Override
-      protected Directory getDirectory(File path) {
-        return newFSDirectory(path);
-      }
-    };
+    BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
+                                                                AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL,
+                                                                BlendedInfixSuggester.DEFAULT_NUM_FACTOR);
     suggester.build(new InputArrayIterator(keys));
 
 

From cee0e37635f042286f555e660caa417f8186e48b Mon Sep 17 00:00:00 2001
From: Michael McCandless <mikemccand@apache.org>
Date: Thu, 6 Mar 2014 17:20:45 +0000
Subject: [PATCH 29/38] LUCENE-5493: fix solr

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574969 13f79535-47bb-0310-9956-ffa450edef68
---
 .../spelling/suggest/fst/AnalyzingInfixLookupFactory.java  | 4 +++-
 .../spelling/suggest/fst/BlendedInfixLookupFactory.java    | 7 +++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java
index f09c089d743..a11d6d22361 100644
--- a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java
+++ b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java
@@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.search.suggest.Lookup;
 import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
 import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester;
+import org.apache.lucene.store.FSDirectory;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.schema.FieldType;
@@ -90,7 +91,8 @@ public class AnalyzingInfixLookupFactory extends LookupFactory {
 
     try {
       return new AnalyzingInfixSuggester(core.getSolrConfig().luceneMatchVersion, 
-          new File(indexPath), indexAnalyzer, queryAnalyzer, minPrefixChars);
+                                         FSDirectory.open(new File(indexPath)), indexAnalyzer,
+                                         queryAnalyzer, minPrefixChars);
     } catch (IOException e) {
       throw new RuntimeException();
     }
diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/BlendedInfixLookupFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/BlendedInfixLookupFactory.java
index 1662913c694..7c20b5645a5 100644
--- a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/BlendedInfixLookupFactory.java
+++ b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/BlendedInfixLookupFactory.java
@@ -23,8 +23,9 @@ import java.io.IOException;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.search.suggest.Lookup;
 import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
-import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester;
 import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester.BlenderType;
+import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester;
+import org.apache.lucene.store.FSDirectory;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.schema.FieldType;
@@ -94,7 +95,9 @@ public class BlendedInfixLookupFactory extends AnalyzingInfixLookupFactory {
     
     try {
       return new BlendedInfixSuggester(core.getSolrConfig().luceneMatchVersion, 
-          new File(indexPath), indexAnalyzer, queryAnalyzer, minPrefixChars, blenderType, numFactor);
+                                       FSDirectory.open(new File(indexPath)),
+                                       indexAnalyzer, queryAnalyzer, minPrefixChars,
+                                       blenderType, numFactor);
     } catch (IOException e) {
       throw new RuntimeException();
     }

From 740034cdc182974779dab1201a3f4abf1700f339 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Thu, 6 Mar 2014 17:27:19 +0000
Subject: [PATCH 30/38] LUCENE-5493: add CHANGES

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574972 13f79535-47bb-0310-9956-ffa450edef68
---
 lucene/CHANGES.txt | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 5f4511a6d8c..7e2fc66352e 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -99,6 +99,10 @@ New Features
 * LUCENE-5224: Add iconv, oconv, and ignore support to HunspellStemFilter.
   (Robert Muir)
 
+* LUCENE-5493: SortingMergePolicy, and EarlyTerminatingSortingCollector
+  support arbitrary Sort specifications.  
+  (Robert Muir, Mike McCandless, Adrien Grand)
+
 API Changes
 
 * LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues
@@ -106,6 +110,12 @@ API Changes
 
 * LUCENE-5468: Move offline Sort (from suggest module) to OfflineSort. (Robert Muir)
 
+* LUCENE-5493: SortingMergePolicy and EarlyTerminatingSortingCollector take
+  Sort instead of Sorter. BlockJoinSorter is removed, replaced with 
+  BlockJoinComparatorSource, which can take a Sort for ordering of parents
+  and a separate Sort for ordering of children within a block. 
+  (Robert Muir, Mike McCandless, Adrien Grand)
+
 Optimizations
 
 * LUCENE-5468: HunspellStemFilter uses 10 to 100x less RAM. It also loads

From 6890323868b0e615f46d913ae988940cb0163096 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Thu, 6 Mar 2014 19:04:40 +0000
Subject: [PATCH 31/38] LUCENE-5493: javadocs cleanups

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1575008 13f79535-47bb-0310-9956-ffa450edef68
---
 .../sorter/BlockJoinComparatorSource.java     | 13 +++++----
 .../EarlyTerminatingSortingCollector.java     | 28 ++++++++++---------
 .../index/sorter/SortingMergePolicy.java      | 19 +++++++------
 3 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
index 3029bcab656..6d5ff0bdd89 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
@@ -24,6 +24,9 @@ import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.FieldComparator;
 import org.apache.lucene.search.FieldComparatorSource;
 import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.IndexSearcher; // javadocs
+import org.apache.lucene.search.Query; // javadocs
+import org.apache.lucene.search.ScoreDoc; // javadocs
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
@@ -32,12 +35,12 @@ import org.apache.lucene.util.FixedBitSet;
 /**
  * Helper class to sort readers that contain blocks of documents.
  * <p>
- * Note that this currently has some limitations:
+ * Note that this class is intended to used with {@link SortingMergePolicy},
+ * and for other purposes has some limitations:
  * <ul>
- *    <li>Cannot yet be used with IndexSearcher.searchAfter
- *    <li>Filling sort value fields is not yet supported.
+ *    <li>Cannot yet be used with {@link IndexSearcher#searchAfter(ScoreDoc, Query, int, Sort) IndexSearcher.searchAfter}
+ *    <li>Filling sort field values is not yet supported.
  * </ul>
- * Its intended to be used with {@link SortingMergePolicy}.
  */
 // TODO: can/should we clean this thing up (e.g. return a proper sort value)
 // and move to the join/ module?
@@ -160,7 +163,7 @@ public class BlockJoinComparatorSource extends FieldComparatorSource {
       @Override
       public Integer value(int slot) {
         // really our sort "value" is more complex...
-        throw new UnsupportedOperationException();
+        throw new UnsupportedOperationException("filling sort field values is not yet supported");
       }
       
       @Override
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java
index fa032edc462..23772e18f23 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java
@@ -34,41 +34,43 @@ import org.apache.lucene.search.TotalHitCountCollector;
  * {@link Sort}.
  * 
  * <p>
- * <b>NOTE:</b> the {@link Collector} detects sorted segments according to
+ * <b>NOTE:</b> the {@code Collector} detects sorted segments according to
  * {@link SortingMergePolicy}, so it's best used in conjunction with it. Also,
- * it collects up to a specified num docs from each segment, and therefore is
- * mostly suitable for use in conjunction with collectors such as
+ * it collects up to a specified {@code numDocsToCollect} from each segment, 
+ * and therefore is mostly suitable for use in conjunction with collectors such as
  * {@link TopDocsCollector}, and not e.g. {@link TotalHitCountCollector}.
  * <p>
- * <b>NOTE</b>: If you wrap a {@link TopDocsCollector} that sorts in the same
- * order as the index order, the returned {@link TopDocsCollector#topDocs()}
+ * <b>NOTE</b>: If you wrap a {@code TopDocsCollector} that sorts in the same
+ * order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs}
  * will be correct. However the total of {@link TopDocsCollector#getTotalHits()
  * hit count} will be underestimated since not all matching documents will have
  * been collected.
  * <p>
- * <b>NOTE</b>: This {@link Collector} uses {@link Sort#toString()} to detect
- * whether a segment was sorted with the same {@link Sort} as the one given in
- * {@link #EarlyTerminatingSortingCollector(Collector, Sort, int)}. This has
+ * <b>NOTE</b>: This {@code Collector} uses {@link Sort#toString()} to detect
+ * whether a segment was sorted with the same {@code Sort}. This has
  * two implications:
  * <ul>
  * <li>if a custom comparator is not implemented correctly and returns
  * different identifiers for equivalent instances, this collector will not
  * detect sorted segments,</li>
  * <li>if you suddenly change the {@link IndexWriter}'s
- * {@link SortingMergePolicy} to sort according to another criterion and if both
- * the old and the new {@link Sort}s have the same identifier, this
- * {@link Collector} will incorrectly detect sorted segments.</li>
+ * {@code SortingMergePolicy} to sort according to another criterion and if both
+ * the old and the new {@code Sort}s have the same identifier, this
+ * {@code Collector} will incorrectly detect sorted segments.</li>
  * </ul>
  * 
  * @lucene.experimental
  */
 public class EarlyTerminatingSortingCollector extends Collector {
-
+  /** The wrapped Collector */
   protected final Collector in;
+  /** Sort used to sort the search results */
   protected final Sort sort;
+  /** Number of documents to collect in each segment */
   protected final int numDocsToCollect;
-  
+  /** Number of documents to collect in the current segment being processed */
   protected int segmentTotalCollect;
+  /** True if the current segment being processed is sorted by {@link #sort} */
   protected boolean segmentSorted;
 
   private int numCollected;
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
index 58263407e5d..8b11b689fd9 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
@@ -22,6 +22,7 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.lucene.analysis.Analyzer; // javadocs
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
@@ -42,14 +43,14 @@ import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
  *  before merging them. As a consequence, all segments resulting from a merge
  *  will be sorted while segments resulting from a flush will be in the order
  *  in which documents have been added.
- *  <p><b>NOTE</b>: Never use this {@link MergePolicy} if you rely on
- *  {@link IndexWriter#addDocuments(Iterable, org.apache.lucene.analysis.Analyzer)}
+ *  <p><b>NOTE</b>: Never use this policy if you rely on
+ *  {@link IndexWriter#addDocuments(Iterable, Analyzer) IndexWriter.addDocuments}
  *  to have sequentially-assigned doc IDs, this policy will scatter doc IDs.
- *  <p><b>NOTE</b>: This {@link MergePolicy} should only be used with idempotent
- *  {@link Sort}s so that the order of segments is predictable. For example,
- *  using {@link SortingMergePolicy} with {@link Sort#INDEXORDER in reverse} (which is
- *  not idempotent) will make the order of documents in a segment depend on the
- *  number of times the segment has been merged.
+ *  <p><b>NOTE</b>: This policy should only be used with idempotent {@code Sort}s 
+ *  so that the order of segments is predictable. For example, using 
+ *  {@link Sort#INDEXORDER} in reverse (which is not idempotent) will make 
+ *  the order of documents in a segment depend on the number of times the segment 
+ *  has been merged.
  *  @lucene.experimental */
 public final class SortingMergePolicy extends MergePolicy {
 
@@ -148,7 +149,7 @@ public final class SortingMergePolicy extends MergePolicy {
 
   }
 
-  /** Returns true if the given reader is sorted by the given sort. */
+  /** Returns {@code true} if the given {@code reader} is sorted by the specified {@code sort}. */
   public static boolean isSorted(AtomicReader reader, Sort sort) {
     if (reader instanceof SegmentReader) {
       final SegmentReader segReader = (SegmentReader) reader;
@@ -175,7 +176,7 @@ public final class SortingMergePolicy extends MergePolicy {
   final Sorter sorter;
   final Sort sort;
 
-  /** Create a new {@link MergePolicy} that sorts documents with <code>sort</code>. */
+  /** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */
   public SortingMergePolicy(MergePolicy in, Sort sort) {
     this.in = in;
     this.sorter = new Sorter(sort);

From c16165e760969d5c4571551641128e191f3f7357 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Thu, 6 Mar 2014 19:15:25 +0000
Subject: [PATCH 32/38] LUCENE-5493: add missing experimental tag

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1575017 13f79535-47bb-0310-9956-ffa450edef68
---
 .../apache/lucene/index/sorter/BlockJoinComparatorSource.java    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
index 6d5ff0bdd89..af91463b297 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
@@ -41,6 +41,7 @@ import org.apache.lucene.util.FixedBitSet;
  *    <li>Cannot yet be used with {@link IndexSearcher#searchAfter(ScoreDoc, Query, int, Sort) IndexSearcher.searchAfter}
  *    <li>Filling sort field values is not yet supported.
  * </ul>
+ * @lucene.experimental
  */
 // TODO: can/should we clean this thing up (e.g. return a proper sort value)
 // and move to the join/ module?

From 7f695434f44ff2718b7f85a1fd88ae848d766a4c Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Thu, 6 Mar 2014 19:53:10 +0000
Subject: [PATCH 33/38] disable slow solr tests in smoketester

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1575024 13f79535-47bb-0310-9956-ffa450edef68
---
 dev-tools/scripts/smokeTestRelease.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev-tools/scripts/smokeTestRelease.py b/dev-tools/scripts/smokeTestRelease.py
index b1a5953232c..c56f696b3c5 100644
--- a/dev-tools/scripts/smokeTestRelease.py
+++ b/dev-tools/scripts/smokeTestRelease.py
@@ -731,7 +731,7 @@ def verifyUnpacked(project, artifact, unpackPath, svnRevision, version, testArgs
       os.chdir('solr')
 
       print("    run tests w/ Java 7 and testArgs='%s'..." % testArgs)
-      run('%s; ant clean test %s' % (javaExe('1.7'), testArgs), '%s/test.log' % unpackPath)
+      run('%s; ant clean test -Dtests.slow=false %s' % (javaExe('1.7'), testArgs), '%s/test.log' % unpackPath)
  
       # test javadocs
       print('    generate javadocs w/ Java 7...')

From 1e02da264569cb09ceefdc1bdececc2c61e75673 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Fri, 7 Mar 2014 02:46:49 +0000
Subject: [PATCH 34/38] add thunderbird version of TestAllDictionaries

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1575126 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hunspell/TestAllDictionaries2.java        | 219 ++++++++++++++++++
 1 file changed, 219 insertions(+)
 create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java

diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java
new file mode 100644
index 00000000000..d0a83561802
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java
@@ -0,0 +1,219 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.InputStream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+
+import org.apache.lucene.analysis.hunspell.Dictionary;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.junit.Ignore;
+
+/**
+ * These thunderbird dictionaries can be retrieved via:
+ * https://addons.mozilla.org/en-US/thunderbird/language-tools/
+ * You must click and download every file: sorry!
+ */
+@Ignore("enable manually")
+public class TestAllDictionaries2 extends LuceneTestCase {
+  
+  // set this to the location of where you downloaded all the files
+  static final File DICTIONARY_HOME = 
+      new File("/data/thunderbirdDicts");
+  
+  final String tests[] = {
+    /* zip file */                                                                    /* dictionary */                      /* affix */
+    "addon-0.4.5-an+fx+tb+fn+sm.xpi",                                                 "dictionaries/ru.dic",                "dictionaries/ru.aff",
+    "addon-0.5.5-fx+tb.xpi",                                                          "dictionaries/ko-KR.dic",             "dictionaries/ko-KR.aff",
+    "afrikaans_spell_checker-20110323-fx+tb+fn+sm.xpi",                               "dictionaries/af-ZA.dic",             "dictionaries/af-ZA.aff",
+    "albanisches_worterbuch-1.6.9-fx+tb+sm+fn.xpi",                                   "dictionaries/sq.dic",                "dictionaries/sq.aff",
+    "amharic_spell_checker-0.4-fx+fn+tb+sm.xpi",                                      "dictionaries/am_ET.dic",             "dictionaries/am_ET.aff",
+//BUG! "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi",                        "dictionaries/ar.dic",                "dictionaries/ar.aff",
+//BUG! "armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi",                            "dictionaries/hy_AM.dic",             "dictionaries/hy_AM.aff",
+    "azerbaijani_spell_checker-0.3-fx+tb+fn+sm+sb.xpi",                               "dictionaries/az-Latn-AZ.dic",        "dictionaries/az-Latn-AZ.aff",
+    "belarusian_classic_dictionary-0.1.2-tb+fx+sm.xpi",                               "dictionaries/be-classic.dic",        "dictionaries/be-classic.aff",
+    "belarusian_dictionary-0.1.2-fx+sm+tb.xpi",                                       "dictionaries/be.dic",                "dictionaries/be.aff",
+    "bengali_bangladesh_dictionary-0.08-sm+tb+fx.xpi",                                "dictionaries/bn-BD.dic",             "dictionaries/bn-BD.aff",
+    "brazilian_portuguese_dictionary_former_spelling-28.20140203-tb+sm+fx.xpi",       "dictionaries/pt-BR-antigo.dic",      "dictionaries/pt-BR-antigo.aff",
+    "brazilian_portuguese_dictionary_new_spelling-28.20140203-fx+sm+tb.xpi",          "dictionaries/pt-BR.dic",             "dictionaries/pt-BR.aff",
+    "british_english_dictionary_updated-1.19.5-sm+fx+tb.xpi",                         "dictionaries/en-GB.dic",             "dictionaries/en-GB.aff",
+    "bulgarian_dictionary-4.3-fx+tb+sm.xpi",                                          "dictionaries/bg.dic",                "dictionaries/bg.aff",
+    "canadian_english_dictionary-2.0.8-fx+sm+tb.xpi",                                 "dictionaries/en-CA.dic",             "dictionaries/en-CA.aff",
+    "ceske_slovniky_pro_kontrolu_pravopisu-1.0.4-tb+sm+fx.xpi",                       "dictionaries/cs.dic",                "dictionaries/cs.aff",
+    "chichewa_spell_checker-0.3-fx+tb+fn+sm+sb.xpi",                                  "dictionaries/ny_MW.dic",             "dictionaries/ny_MW.aff",
+    "corrector_de_galego-13.10.0-fn+sm+tb+fx.xpi",                                    "dictionaries/gl_ES.dic",             "dictionaries/gl_ES.aff",
+    "corrector_orthographic_de_interlingua-6.0-fn+sm+tb+fx.xpi",                      "dictionaries/ia-ia.dic",             "dictionaries/ia-ia.aff",
+    "corrector_ortografico_aragones-0.2-fx+tb+sm.xpi",                                "dictionaries/an_ES.dic",             "dictionaries/an_ES.aff",
+    "croatian_dictionary_-_hrvatski_rjecnik-1.0.1-firefox+thunderbird+seamonkey.xpi", "dictionaries/hr.dic",                "dictionaries/hr.aff",
+    "croatian_dictionary_hrvatski_rjecnik-1.0.9-an+fx+fn+tb+sm.xpi",                  "dictionaries/hr-HR.dic",             "dictionaries/hr-HR.aff",
+    "dansk_ordbog_til_stavekontrollen-2.2.1-sm+tb+fx.xpi",                            "dictionaries/da.dic",                "dictionaries/da.aff",
+    "deutsches_worterbuch_de_de_alte_rechtschreibung-2.1.8-sm.xpi",                   "dictionaries/de-DE-1901.dic",        "dictionaries/de-DE-1901.aff",
+    "diccionario_de_espanolespana-1.7-sm+tb+fn+fx.xpi",                               "dictionaries/es-ES.dic",             "dictionaries/es-ES.aff",
+    "diccionario_en_espanol_para_venezuela-1.1.17-sm+an+tb+fn+fx.xpi",                "dictionaries/es_VE.dic",             "dictionaries/es_VE.aff",
+    "diccionario_espanol_argentina-2.5.1-tb+fx+sm.xpi",                               "dictionaries/es_AR.dic",             "dictionaries/es_AR.aff",
+    "diccionario_espanol_mexico-1.1.3-fn+tb+fx+sm.xpi",                               "dictionaries/es_MX.dic",             "dictionaries/es_MX.aff",
+    "diccionario_ortografico_valenciano-2.2.0-fx+tb+fn+sm.xpi",                       "dictionaries/roa-ES-val.dic",        "dictionaries/roa-ES-val.aff",
+//BUG! "diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi",                                "dictionaries/Papiamento.dic",        "dictionaries/Papiamento.aff",
+    "dictionnaires_francais-5.0.2-fx+tb+sm.xpi",                                      "dictionaries/fr-classic-reform.dic", "dictionaries/fr-classic-reform.aff",
+    "dictionnaires_francais-5.0.2-fx+tb+sm.xpi",                                      "dictionaries/fr-classic.dic",        "dictionaries/fr-classic.aff",
+    "dictionnaires_francais-5.0.2-fx+tb+sm.xpi",                                      "dictionaries/fr-modern.dic",         "dictionaries/fr-modern.aff",
+    "dictionnaires_francais-5.0.2-fx+tb+sm.xpi",                                      "dictionaries/fr-reform.dic",         "dictionaries/fr-reform.aff",
+    "difazier_an_drouizig-0.12-tb+sm+fx.xpi",                                         "dictionaries/br.dic",                "dictionaries/br.aff",
+//BUG! "dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi",                    "dictionaries/Papiamentu.dic",        "dictionaries/Papiamentu.aff",
+    "dizionari_furlan-3.1-tb+fx+sm.xpi",                                              "dictionaries/fur-IT.dic",            "dictionaries/fur-IT.aff",
+    "dizionario_italiano-3.3.2-fx+sm+tb.xpi",                                         "dictionaries/it_IT.dic",             "dictionaries/it_IT.aff",
+    "eesti_keele_speller-3.2-fx+tb+sm.xpi",                                           "dictionaries/et-EE.dic",             "dictionaries/et-EE.aff",
+    "english_australian_dictionary-2.1.2-tb+fx+sm.xpi",                               "dictionaries/en-AU.dic",             "dictionaries/en-AU.aff",
+    "esperanta_vortaro-1.0.2-fx+tb+sm.xpi",                                           "dictionaries/eo-EO.dic",             "dictionaries/eo-EO.aff",
+    "european_portuguese_spellchecker-14.1.1.1-tb+fx.xpi",                            "dictionaries/pt-PT.dic",             "dictionaries/pt-PT.aff",
+    "faroese_spell_checker_faroe_islands-2.0-tb+sm+fx+fn.xpi",                        "dictionaries/fo_FO.dic",             "dictionaries/fo_FO.aff",
+    "frysk_wurdboek-2.1.1-fn+sm+fx+an+tb.xpi",                                        "dictionaries/fy.dic",                "dictionaries/fy.aff",
+    "geiriadur_cymraeg-1.08-tb+sm+fx.xpi",                                            "dictionaries/cy_GB.dic",             "dictionaries/cy_GB.aff",
+    "general_catalan_dictionary-2.5.0-tb+sm+fn+fx.xpi",                               "dictionaries/ca.dic",                "dictionaries/ca.aff",
+    "german_dictionary-2.0.3-fn+fx+sm+tb.xpi",                                        "dictionaries/de-DE.dic",             "dictionaries/de-DE.aff",
+    "german_dictionary_de_at_new_orthography-20130905-tb+fn+an+fx+sm.xpi",            "dictionaries/de-AT.dic",             "dictionaries/de-AT.aff",
+    "german_dictionary_de_ch_new_orthography-20130905-fx+tb+fn+sm+an.xpi",            "dictionaries/de-CH.dic",             "dictionaries/de-CH.aff",
+    "german_dictionary_de_de_new_orthography-20130905-tb+sm+an+fn+fx.xpi",            "dictionaries/de-DE.dic",             "dictionaries/de-DE.aff",
+    "german_dictionary_extended_for_austria-2.0.3-fx+fn+sm+tb.xpi",                   "dictionaries/de-AT.dic",             "dictionaries/de-AT.aff",
+    "german_dictionary_switzerland-2.0.3-sm+fx+tb+fn.xpi",                            "dictionaries/de-CH.dic",             "dictionaries/de-CH.aff",
+    "greek_spelling_dictionary-0.8.5-fx+tb+sm.xpi",                                   "dictionaries/el-GR.dic",             "dictionaries/el-GR.aff",
+    "gujarati_spell_checker-0.3-fx+tb+fn+sm+sb.xpi",                                  "dictionaries/gu_IN.dic",             "dictionaries/gu_IN.aff",
+    "haitian_creole_spell_checker-0.08-tb+sm+fx.xpi",                                 "dictionaries/ht-HT.dic",             "dictionaries/ht-HT.aff",
+    "hausa_spelling_dictionary-0.2-tb+fx.xpi",                                        "dictionaries/ha-GH.dic",             "dictionaries/ha-GH.aff",
+    "hebrew_spell_checking_dictionary_from_hspell-1.2.0.1-fx+sm+tb.xpi",              "dictionaries/he.dic",                "dictionaries/he.aff",
+    "hindi_spell_checker-0.4-fx+tb+sm+sb+fn.xpi",                                     "dictionaries/hi_IN.dic",             "dictionaries/hi_IN.aff",
+//BUG! "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi",                                   "dictionaries/hu_HU.dic",             "dictionaries/hu_HU.aff",
+//BUG! "icelandic_dictionary-1.3-fx+tb+sm.xpi",                                          "dictionaries/is.dic",                "dictionaries/is.aff",
+    "kamus_pengecek_ejaan_bahasa_indonesia-1.1-fx+tb.xpi",                            "dictionaries/id.dic",                "dictionaries/id.aff",
+//BUG! "kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi",                                 "dictionaries/kn.dic",                "dictionaries/kn.aff",
+    "kashubian_spell_checker_poland-0.9-sm+tb+fx.xpi",                                "dictionaries/Kaszebsczi.dic",        "dictionaries/Kaszebsczi.aff",
+    "kiswahili_spell_checker-0.3-sb+tb+fn+fx+sm.xpi",                                 "dictionaries/sw_TZ.dic",             "dictionaries/sw_TZ.aff",
+    "kurdish_spell_checker-0.96-fx+tb+sm.xpi",                                        "dictionaries/ku-TR.dic",             "dictionaries/ku-TR.aff",
+    "lao_spellchecking_dictionary-0-fx+tb+sm+fn+an.xpi",                              "dictionaries/lo_LA.dic",             "dictionaries/lo_LA.aff",
+    "latviesu_valodas_pareizrakstibas_parbaudes_vardnica-1.0.0-fn+fx+tb+sm.xpi",      "dictionaries/lv_LV.dic",             "dictionaries/lv_LV.aff",
+    "lithuanian_spelling_check_dictionary-1.3-fx+tb+sm+fn.xpi",                       "dictionaries/lt.dic",                "dictionaries/lt.aff",
+    "litreoir_gaelspell_do_mhozilla-4.7-tb+fx+sm+fn.xpi",                             "dictionaries/ga.dic",                "dictionaries/ga.aff",
+    "litreoir_na_liongailise-0.03-fx+sm+tb.xpi",                                      "dictionaries/ln-CD.dic",             "dictionaries/ln-CD.aff",
+//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi",                           "dictionaries/mk-MK-Cyrl.dic",        "dictionaries/mk-MK-Cyrl.aff",
+//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi",                           "dictionaries/mk-MK-Latn.dic",        "dictionaries/mk-MK-Latn.aff",
+    "malagasy_spell_checker-0.3-fn+tb+fx+sm+sb.xpi",                                  "dictionaries/mg_MG.dic",             "dictionaries/mg_MG.aff",
+    "marathi_dictionary-9.3-sm+tb+sb+fx.xpi",                                         "dictionaries/mr-IN.dic",             "dictionaries/mr-IN.aff",
+    "ndebele_south_spell_checker-20110323-tb+fn+fx+sm.xpi",                           "dictionaries/nr-ZA.dic",             "dictionaries/nr-ZA.aff",
+    "nepali_dictionary-1.2-fx+tb.xpi",                                                "dictionaries/ne_NP.dic",             "dictionaries/ne_NP.aff",
+    "norsk_bokmal_ordliste-2.0.10.2-fx+tb+sm.xpi",                                    "dictionaries/nb.dic",                "dictionaries/nb.aff",
+    "norsk_nynorsk_ordliste-2.1.0-sm+fx+tb.xpi",                                      "dictionaries/nn.dic",                "dictionaries/nn.aff",
+    "northern_sotho_spell_checker-20110323-tb+fn+fx+sm.xpi",                          "dictionaries/nso-ZA.dic",            "dictionaries/nso-ZA.aff",
+    "oriya_spell_checker-0.3-fn+tb+fx+sm+sb.xpi",                                     "dictionaries/or-IN.dic",             "dictionaries/or-IN.aff",
+    "polski_slownik_poprawnej_pisowni-1.0.20110621-fx+tb+sm.xpi",                     "dictionaries/pl.dic",                "dictionaries/pl.aff",
+    "punjabi_spell_checker-0.3-fx+tb+sm+sb+fn.xpi",                                   "dictionaries/pa-IN.dic",             "dictionaries/pa-IN.aff",
+//BUG! "romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi",                            "dictionaries/ro_RO-ante1993.dic",    "dictionaries/ro_RO-ante1993.aff",
+//BUG! "russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi",                       "dictionaries/ru_RU.dic",             "dictionaries/ru_RU.aff",
+    "sanskrit_spell_checker-1.1-fx+tb+sm+sb+fn.xpi",                                  "dictionaries/sa_IN.dic",             "dictionaries/sa_IN.aff",
+    "scottish_gaelic_spell_checker-2.7-tb+fx+sm.xpi",                                 "dictionaries/gd-GB.dic",             "dictionaries/gd-GB.aff",
+    "serbian_dictionary-0.18-fx+tb+sm.xpi",                                           "dictionaries/sr-RS-Cyrl.dic",        "dictionaries/sr-RS-Cyrl.aff",
+    "serbian_dictionary-0.18-fx+tb+sm.xpi",                                           "dictionaries/sr-RS-Latn.dic",        "dictionaries/sr-RS-Latn.aff",
+    "slovak_spell_checking_dictionary-2.04.0-tb+fx+sm.xpi",                           "dictionaries/sk-SK.dic",             "dictionaries/sk-SK.aff",
+    "slovak_spell_checking_dictionary-2.04.0-tb+fx+sm.xpi",                           "dictionaries/sk-SK-ascii.dic",       "dictionaries/sk-SK-ascii.aff",
+    "slovar_za_slovenski_jezik-0.1.1.1-fx+tb+sm.xpi",                                 "dictionaries/sl.dic",                "dictionaries/sl.aff",
+    "songhay_spell_checker-0.03-fx+tb+sm.xpi",                                        "dictionaries/Songhay - Mali.dic",    "dictionaries/Songhay - Mali.aff",
+    "southern_sotho_spell_checker-20110323-tb+fn+fx+sm.xpi",                          "dictionaries/st-ZA.dic",             "dictionaries/st-ZA.aff",
+    "sownik_acinski-0.41.20110603-tb+fx+sm.xpi",                                      "dictionaries/la.dic",                "dictionaries/la.aff",
+    "sownik_jezyka_dolnouzyckiego-1.4.8-an+fx+tb+fn+sm.xpi",                          "dictionaries/dsb.dic",               "dictionaries/dsb.aff",
+    "srpska_latinica-0.1-fx+tb+sm.xpi",                                               "dictionaries/Srpski_latinica.dic",   "dictionaries/Srpski_latinica.aff",
+    "svenska_fria_ordlistan-1.1-tb+sm+fx.xpi",                                        "dictionaries/sv.dic",                "dictionaries/sv.aff",
+    "svenska_fria_ordlistan-1.1-tb+sm+fx.xpi",                                        "dictionaries/sv_FI.dic",             "dictionaries/sv_FI.aff",
+    "swati_spell_checker-20110323-tb+sm+fx+fn.xpi",                                   "dictionaries/ss-ZA.dic",             "dictionaries/ss-ZA.aff",
+    "tamil_spell_checker_for_firefox-0.4-tb+fx.xpi",                                  "dictionaries/ta-TA.dic",             "dictionaries/ta-TA.aff",
+    "telugu_spell_checker-0.3-tb+fx+sm.xpi",                                          "dictionaries/te_IN.dic",             "dictionaries/te_IN.aff",
+    "te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi",                                    "dictionaries/mi-x-Tai Tokerau.dic",  "dictionaries/mi-x-Tai Tokerau.aff",
+    "te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi",                                    "dictionaries/mi.dic",                "dictionaries/mi.aff",
+//BUG! "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi",                           "dictionaries/ta_IN.dic",             "dictionaries/ta_IN.aff",
+    "tsonga_spell_checker-20110323-tb+sm+fx+fn.xpi",                                  "dictionaries/ts-ZA.dic",             "dictionaries/ts-ZA.aff",
+    "tswana_spell_checker-20110323-tb+sm+fx+fn.xpi",                                  "dictionaries/tn-ZA.dic",             "dictionaries/tn-ZA.aff",
+    "turkce_yazm_denetimi-3.5-sm+tb+fx.xpi",                                          "dictionaries/tr.dic",                "dictionaries/tr.aff",
+//BUG! "turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi",                            "dictionaries/tk_TM.dic",             "dictionaries/tk_TM.aff",
+    "ukrainian_dictionary-1.7.0-sm+an+fx+fn+tb.xpi",                                  "dictionaries/uk-UA.dic",             "dictionaries/uk-UA.aff",
+    "united_states_english_spellchecker-7.0.1-sm+tb+fx+an.xpi",                       "dictionaries/en-US.dic",             "dictionaries/en-US.aff",
+    "upper_sorbian_spelling_dictionary-0.0.20060327.3-tb+fx+sm.xpi",                  "dictionaries/hsb.dic",               "dictionaries/hsb.aff",
+//BUG! "urdu_dictionary-0.64-fx+tb+sm+sb.xpi",                                           "dictionaries/ur.dic",                "dictionaries/ur.aff",
+    "uzbek_spell_checker-0.3-fn+tb+fx+sm+sb.xpi",                                     "dictionaries/uz.dic",                "dictionaries/uz.aff",
+    "valencian_catalan_dictionary-2.5.0-tb+fn+sm+fx.xpi",                             "dictionaries/ca-ES-valencia.dic",    "dictionaries/ca-ES-valencia.aff",
+    "venda_spell_checker-20110323-tb+fn+fx+sm.xpi",                                   "dictionaries/ve-ZA.dic",             "dictionaries/ve-ZA.aff",
+    "verificador_ortografico_para_portugues_do_brasil-2.3-3.2b1-tb+sm+fn+fx.xpi",     "dictionaries/pt_BR.dic",             "dictionaries/pt_BR.aff",
+    "vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi",                             "dictionaries/vi-DauCu.dic",          "dictionaries/vi-DauCu.aff",
+    "vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi",                             "dictionaries/vi-DauMoi.dic",         "dictionaries/vi-DauMoi.aff",
+//BUG! "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi",                                   "dictionaries/nl.dic",                "dictionaries/nl.aff",
+    "xhosa_spell_checker-20110323-tb+fn+fx+sm.xpi",                                   "dictionaries/xh-ZA.dic",             "dictionaries/xh-ZA.aff",
+    "xuxen-4.0.1-fx+tb+sm.xpi",                                                       "dictionaries/eu.dic",                "dictionaries/eu.aff",
+    "yiddish_spell_checker_yivo-0.0.3-sm+fn+fx+tb.xpi",                               "dictionaries/yi.dic",                "dictionaries/yi.aff",
+    "zulu_spell_checker-20110323-tb+fn+fx+sm.xpi",                                    "dictionaries/zu-ZA.dic",             "dictionaries/zu-ZA.aff"
+  };
+  
+  public void test() throws Exception {
+    for (int i = 0; i < tests.length; i += 3) {
+      File f = new File(DICTIONARY_HOME, tests[i]);
+      assert f.exists();
+      
+      try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) {
+        ZipEntry dicEntry = zip.getEntry(tests[i+1]);
+        assert dicEntry != null;
+        ZipEntry affEntry = zip.getEntry(tests[i+2]);
+        assert affEntry != null;
+      
+        try (InputStream dictionary = zip.getInputStream(dicEntry);
+             InputStream affix = zip.getInputStream(affEntry)) {
+          Dictionary dic = new Dictionary(affix, dictionary);
+          System.out.println(tests[i] + "\t" + RamUsageEstimator.humanSizeOf(dic) + "\t(" +
+                             "words=" + RamUsageEstimator.humanSizeOf(dic.words) + ", " +
+                             "flags=" + RamUsageEstimator.humanSizeOf(dic.flagLookup) + ", " +
+                             "strips=" + RamUsageEstimator.humanSizeOf(dic.stripLookup) + ", " +
+                             "conditions=" + RamUsageEstimator.humanSizeOf(dic.patterns) + ", " +
+                             "affixData=" + RamUsageEstimator.humanSizeOf(dic.affixData) + ", " +
+                             "prefixes=" + RamUsageEstimator.humanSizeOf(dic.prefixes) + ", " +
+                             "suffixes=" + RamUsageEstimator.humanSizeOf(dic.suffixes) + ")");
+        }
+      }
+    }
+  }
+  
+  public void testOneDictionary() throws Exception {
+    String toTest = "hausa_spelling_dictionary-0.2-tb+fx.xpi";
+    for (int i = 0; i < tests.length; i++) {
+      if (tests[i].equals(toTest)) {
+        File f = new File(DICTIONARY_HOME, tests[i]);
+        assert f.exists();
+        
+        try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) {
+          ZipEntry dicEntry = zip.getEntry(tests[i+1]);
+          assert dicEntry != null;
+          ZipEntry affEntry = zip.getEntry(tests[i+2]);
+          assert affEntry != null;
+        
+          try (InputStream dictionary = zip.getInputStream(dicEntry);
+               InputStream affix = zip.getInputStream(affEntry)) {
+              new Dictionary(affix, dictionary);
+          }
+        }
+      }
+    }    
+  }
+}

From b87af547745b150ff6ca0c8af984cd0bb06b3704 Mon Sep 17 00:00:00 2001
From: Joel Bernstein <jbernste@apache.org>
Date: Fri, 7 Mar 2014 14:20:48 +0000
Subject: [PATCH 35/38] SOLR-5720: Updated CHANGES.txt

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1575266 13f79535-47bb-0310-9956-ffa450edef68
---
 solr/CHANGES.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 4d10404fc78..f7409a97f52 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -100,6 +100,10 @@ New Features
 * SOLR-5714: You can now use one pool of memory for for the HDFS block cache
   that all collections share. (Mark Miller, Gregory Chanan)
 
+* SOLR-5720: Add ExpandComponent to expand results collapsed by the 
+  CollapsingQParserPlugin. (Joel Bernstein)
+
+
 Bug Fixes
 ----------------------
 

From 55edc565d8f192f8a349611f8ca827610b313148 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Fri, 7 Mar 2014 16:12:00 +0000
Subject: [PATCH 36/38] LUCENE-5500: SortingMergePolicy should error if the
 Sort refers to the score

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1575306 13f79535-47bb-0310-9956-ffa450edef68
---
 lucene/core/src/java/org/apache/lucene/search/Sort.java  | 4 ++--
 .../src/java/org/apache/lucene/index/sorter/Sorter.java  | 3 +++
 .../lucene/index/sorter/SortingAtomicReaderTest.java     | 9 +++++++++
 .../lucene/index/sorter/TestSortingMergePolicy.java      | 9 +++++++++
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/search/Sort.java b/lucene/core/src/java/org/apache/lucene/search/Sort.java
index 0650d37fe33..57630635d20 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Sort.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Sort.java
@@ -202,8 +202,8 @@ public class Sort {
     return 0x45aaf665 + Arrays.hashCode(fields);
   }
 
-  /** Whether the relevance score is needed to sort documents. */
-  boolean needsScores() {
+  /** Returns true if the relevance score is needed to sort documents. */
+  public boolean needsScores() {
     for (SortField sortField : fields) {
       if (sortField.needsScores()) {
         return true;
diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
index d32785f8876..608b072237a 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
@@ -39,6 +39,9 @@ final class Sorter {
   
   /** Creates a new Sorter to sort the index with {@code sort} */
   Sorter(Sort sort) {
+    if (sort.needsScores()) {
+      throw new IllegalArgumentException("Cannot sort an index with a Sort that refers to the relevance score");
+    }
     this.sort = sort;
   }
 
diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java
index 89d6403619b..bb75fbcb62d 100644
--- a/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java
+++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java
@@ -62,5 +62,14 @@ public class SortingAtomicReaderTest extends SorterTestBase {
     
     TestUtil.checkReader(reader);
   }
+  
+  public void testBadSort() throws Exception {
+    try {
+      SortingAtomicReader.wrap(reader, Sort.RELEVANCE);
+      fail("Didn't get expected exception");
+    } catch (IllegalArgumentException e) {
+      assertEquals("Cannot sort an index with a Sort that refers to the relevance score", e.getMessage());
+    }
+  }
 
 }
diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java
index 47fb654d3ef..5095aeca299 100644
--- a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java
+++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java
@@ -172,5 +172,14 @@ public class TestSortingMergePolicy extends LuceneTestCase {
     
     assertReaderEquals("", sortedReader1, sortedReader2);
   }
+  
+  public void testBadSort() throws Exception {
+    try {
+      new SortingMergePolicy(newMergePolicy(), Sort.RELEVANCE);
+      fail("Didn't get expected exception");
+    } catch (IllegalArgumentException e) {
+      assertEquals("Cannot sort an index with a Sort that refers to the relevance score", e.getMessage());
+    }
+  }
 
 }

From 36edbb84ea0431bd27debd3ed6460c4b33f3ec23 Mon Sep 17 00:00:00 2001
From: Michael McCandless <mikemccand@apache.org>
Date: Fri, 7 Mar 2014 17:09:27 +0000
Subject: [PATCH 37/38] unescape %20 in urls so we don't get false failures
 with 1.7.0_60

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1575328 13f79535-47bb-0310-9956-ffa450edef68
---
 dev-tools/scripts/checkJavaDocs.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/dev-tools/scripts/checkJavaDocs.py b/dev-tools/scripts/checkJavaDocs.py
index e68f6072493..4089a8f15b5 100644
--- a/dev-tools/scripts/checkJavaDocs.py
+++ b/dev-tools/scripts/checkJavaDocs.py
@@ -212,7 +212,7 @@ def checkClassSummaries(fullPath):
     if inThing:
       if lineLower.find('</tr>') != -1:
         if not hasDesc:
-          missing.append((lastCaption, lastItem))
+          missing.append((lastCaption, unEscapeURL(lastItem)))
         inThing = False
         continue
       else:
@@ -298,6 +298,11 @@ def checkSummary(fullPath):
   f.close()
   return anyMissing
 
+def unEscapeURL(s):
+  # Not exhaustive!!
+  s = s.replace('%20', ' ')
+  return s
+
 def unescapeHTML(s):
   s = s.replace('&lt;', '<')
   s = s.replace('&gt;', '>')

From 26c79531b0eabeb0b7a1ab0dfa39a2318f8ee631 Mon Sep 17 00:00:00 2001
From: Ryan Ernst <rjernst@apache.org>
Date: Fri, 7 Mar 2014 18:01:52 +0000
Subject: [PATCH 38/38] SOLR-5818: distrib search with custom comparator does
 not quite work correctly

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1575344 13f79535-47bb-0310-9956-ffa450edef68
---
 solr/CHANGES.txt                              |  3 +
 .../handler/component/QueryComponent.java     | 79 ++++++++++++++++++-
 .../conf/schema-field-sort-values.xml         | 41 ++++++++++
 .../apache/solr/schema/WrappedIntField.java   | 46 +++++++++++
 .../solr/search/TestFieldSortValues.java      | 53 +++++++++++++
 5 files changed, 218 insertions(+), 4 deletions(-)
 create mode 100644 solr/core/src/test-files/solr/collection1/conf/schema-field-sort-values.xml
 create mode 100644 solr/core/src/test/org/apache/solr/schema/WrappedIntField.java
 create mode 100644 solr/core/src/test/org/apache/solr/search/TestFieldSortValues.java

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index f7409a97f52..decef0fa9f0 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -135,6 +135,9 @@ Bug Fixes
 * SOLR-5796: Increase how long we are willing to wait for a core to see the ZK
   advertised leader in it's local state. (Timothy Potter, Mark Miller)  
 
+* SOLR-5818: distrib search with custom comparator does not quite work correctly
+  (Ryan Ernst)
+
 Optimizations
 ----------------------
 * SOLR-1880: Distributed Search skips GET_FIELDS stage if EXECUTE_QUERY
diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
index 43f3841434d..e2c5ba8e5cb 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@@ -25,12 +25,14 @@ import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.FieldComparator;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.grouping.GroupDocs;
 import org.apache.lucene.search.grouping.SearchGroup;
 import org.apache.lucene.search.grouping.TopGroups;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.InPlaceMergeSorter;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
@@ -500,12 +502,32 @@ public class QueryComponent extends SearchComponent
 
       // sort ids from lowest to highest so we can access them in order
       int nDocs = docList.size();
-      long[] sortedIds = new long[nDocs];
-      DocIterator it = rb.getResults().docList.iterator();
+      final long[] sortedIds = new long[nDocs];
+      final float[] scores = new float[nDocs]; // doc scores, parallel to sortedIds
+      DocList docs = rb.getResults().docList;
+      DocIterator it = docs.iterator();
       for (int i=0; i<nDocs; i++) {
         sortedIds[i] = (((long)it.nextDoc()) << 32) | i;
+        scores[i] = docs.hasScores() ? it.score() : Float.NaN;
       }
-      Arrays.sort(sortedIds);
+
+      // sort ids and scores together
+      new InPlaceMergeSorter() {
+        @Override
+        protected void swap(int i, int j) {
+          long tmpId = sortedIds[i];
+          float tmpScore = scores[i];
+          sortedIds[i] = sortedIds[j];
+          scores[i] = scores[j];
+          sortedIds[j] = tmpId;
+          scores[j] = tmpScore;
+        }
+
+        @Override
+        protected int compare(int i, int j) {
+          return Long.compare(sortedIds[i], sortedIds[j]);
+        }
+      }.sort(0, sortedIds.length);
 
       SortSpec sortSpec = rb.getSortSpec();
       Sort sort = searcher.weightSort(sortSpec.getSort());
@@ -527,7 +549,9 @@ public class QueryComponent extends SearchComponent
         int lastIdx = -1;
         int idx = 0;
 
-        for (long idAndPos : sortedIds) {
+        for (int i = 0; i < sortedIds.length; ++i) {
+          long idAndPos = sortedIds[i];
+          float score = scores[i];
           int doc = (int)(idAndPos >>> 32);
           int position = (int)idAndPos;
 
@@ -546,6 +570,7 @@ public class QueryComponent extends SearchComponent
           }
 
           doc -= currentLeaf.docBase;  // adjust for what segment this is in
+          comparator.setScorer(new FakeScorer(doc, score));
           comparator.copy(0, doc);
           Object val = comparator.value(0);
           if (null != ft) val = ft.marshalSortValue(val); 
@@ -1157,4 +1182,50 @@ public class QueryComponent extends SearchComponent
   public URL[] getDocs() {
     return null;
   }
+
+  /**
+   * Fake scorer for a single document
+   *
+   * TODO: when SOLR-5595 is fixed, this wont be needed, as we dont need to recompute sort values here from the comparator
+   */
+  private static class FakeScorer extends Scorer {
+    final int docid;
+    final float score;
+
+    FakeScorer(int docid, float score) {
+      super(null);
+      this.docid = docid;
+      this.score = score;
+    }
+
+    @Override
+    public int docID() {
+      return docid;
+    }
+
+    @Override
+    public float score() throws IOException {
+      return score;
+    }
+
+    @Override
+    public int freq() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int advance(int target) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long cost() {
+      return 1;
+    }
+  }
 }
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-field-sort-values.xml b/solr/core/src/test-files/solr/collection1/conf/schema-field-sort-values.xml
new file mode 100644
index 00000000000..22063d5542c
--- /dev/null
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-field-sort-values.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="test-custom-comparator" version="1.5">
+  <types>
+    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
+    <fieldtype name="text" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+      </analyzer>
+    </fieldtype>
+    <fieldType class="org.apache.solr.schema.WrappedIntField" name="wrapped_int"/>
+  </types>
+  <fields>
+    <field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
+    <field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>
+    <field name="text" type="text" indexed="true" stored="false"/>
+    <field name="payload" type="wrapped_int" indexed="false"
+           stored="true"  multiValued="false"    docValues="true" required="true"/>
+
+  </fields>
+  <defaultSearchField>text</defaultSearchField>
+  <uniqueKey>id</uniqueKey>
+</schema>
diff --git a/solr/core/src/test/org/apache/solr/schema/WrappedIntField.java b/solr/core/src/test/org/apache/solr/schema/WrappedIntField.java
new file mode 100644
index 00000000000..7f52b3e3f4e
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/schema/WrappedIntField.java
@@ -0,0 +1,46 @@
+package org.apache.solr.schema;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.expressions.Expression;
+import org.apache.lucene.expressions.SimpleBindings;
+import org.apache.lucene.expressions.js.JavascriptCompiler;
+import org.apache.lucene.search.SortField;
+
+/**
+ * Custom field wrapping an int, to test sorting via a custom comparator.
+ */
+public class WrappedIntField extends TrieIntField {
+  Expression expr;
+
+  public WrappedIntField() {
+    try {
+      expr = JavascriptCompiler.compile("payload % 3");
+    } catch (Exception e) {
+      throw new RuntimeException("impossible?", e);
+    }
+  }
+
+  @Override
+  public SortField getSortField(final SchemaField field, final boolean reverse) {
+    field.checkSortability();
+    SimpleBindings bindings = new SimpleBindings();
+    bindings.add(super.getSortField(field, reverse));
+    return expr.getSortField(bindings, reverse);
+  }
+}
diff --git a/solr/core/src/test/org/apache/solr/search/TestFieldSortValues.java b/solr/core/src/test/org/apache/solr/search/TestFieldSortValues.java
new file mode 100644
index 00000000000..e234ff7dc3e
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/search/TestFieldSortValues.java
@@ -0,0 +1,53 @@
+package org.apache.solr.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.solr.SolrTestCaseJ4;
+import org.junit.BeforeClass;
+
+
+/**
+ * Test QueryComponent.doFieldSortValues
+ */
+@SuppressCodecs({"Lucene3x"})
+public class TestFieldSortValues extends SolrTestCaseJ4 {
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("solrconfig-minimal.xml", "schema-field-sort-values.xml");
+  }
+  
+  public void testCustomComparator() throws Exception {
+    clearIndex();
+    assertU(adoc(sdoc("id", "1", "payload", "2")));
+    assertU(adoc(sdoc("id", "2", "payload", "3")));
+    assertU(adoc(sdoc("id", "3", "payload", "1")));
+    assertU(adoc(sdoc("id", "4", "payload", "5")));
+    assertU(adoc(sdoc("id", "5", "payload", "4")));
+    assertU(commit());
+
+    // payload is backed by a custom sort field which returns the payload value mod 3
+    assertQ(req("q", "*:*", "fl", "id", "sort", "payload asc, id asc", "fsv", "true")
+        , "//result/doc[int='2'  and position()=1]"
+        , "//result/doc[int='3'  and position()=2]"
+        , "//result/doc[int='5'  and position()=3]"
+        , "//result/doc[int='1'  and position()=4]"
+        , "//result/doc[int='4'  and position()=5]");
+  }
+}