From f38c82b7a2b4570e8aef8c24d3405a455e80cf7a Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Tue, 4 Mar 2014 03:18:47 +0000 Subject: [PATCH 01/38] SOLR-5714: You can now use one pool of memory for for the HDFS block cache that all collections share. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1573847 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 3 + .../solr/core/HdfsDirectoryFactory.java | 66 +++++-- .../solr/store/blockcache/BlockCache.java | 3 + .../solr/store/blockcache/BlockCacheKey.java | 21 +- .../store/blockcache/BlockCacheLocation.java | 3 + .../solr/store/blockcache/BlockDirectory.java | 25 ++- .../store/blockcache/BlockDirectoryCache.java | 20 +- .../solr/store/blockcache/BlockLocks.java | 3 + .../solr/store/blockcache/BufferStore.java | 4 +- .../apache/solr/store/blockcache/Cache.java | 3 + .../store/blockcache/CachedIndexOutput.java | 3 +- .../blockcache/CustomBufferedIndexInput.java | 3 + .../apache/solr/store/blockcache/Metrics.java | 3 + .../blockcache/ReusedBufferedIndexOutput.java | 3 + .../apache/solr/store/blockcache/Store.java | 3 + .../solr/store/hdfs/HdfsFileReader.java | 3 + .../solr/store/hdfs/HdfsFileWriter.java | 3 + .../solr/store/hdfs/NullIndexOutput.java | 3 + .../solr/collection1/conf/solrconfig-tlog.xml | 1 + .../solr/collection1/conf/solrconfig.xml | 3 +- .../cloud/ChaosMonkeyNothingIsSafeTest.java | 4 +- .../solr/cloud/ChaosMonkeySafeLeaderTest.java | 2 +- .../org/apache/solr/cloud/RecoveryZkTest.java | 6 +- .../apache/solr/cloud/hdfs/HdfsTestUtil.java | 3 + .../HdfsWriteToMultipleCollectionsTest.java | 170 ++++++++++++++++ .../solr/store/blockcache/BlockCacheTest.java | 2 + .../solr/collection1/conf/solrconfig.xml | 3 + .../cloud/AbstractFullDistribZkTestBase.java | 111 +---------- .../solr/cloud/StopableIndexingThread.java | 185 ++++++++++++++++++ 29 files changed, 522 insertions(+), 143 deletions(-) create mode 100644 solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java create mode 100644 solr/test-framework/src/java/org/apache/solr/cloud/StopableIndexingThread.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index f990f84d160..04430677f41 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -88,6 +88,9 @@ New Features * SOLR-5183: JSON updates now support nested child documents using a "_childDocument_" object key. (Varun Thacker, hossman) +* SOLR-5714: You can now use one pool of memory for for the HDFS block cache + that all collections share. (Mark Miller, Gregory Chanan) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java index 466246ca664..af104c02475 100644 --- a/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java +++ b/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java @@ -51,6 +51,7 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory { public static final String BLOCKCACHE_SLAB_COUNT = "solr.hdfs.blockcache.slab.count"; public static final String BLOCKCACHE_DIRECT_MEMORY_ALLOCATION = "solr.hdfs.blockcache.direct.memory.allocation"; public static final String BLOCKCACHE_ENABLED = "solr.hdfs.blockcache.enabled"; + public static final String BLOCKCACHE_GLOBAL = "solr.hdfs.blockcache.global"; public static final String BLOCKCACHE_READ_ENABLED = "solr.hdfs.blockcache.read.enabled"; public static final String BLOCKCACHE_WRITE_ENABLED = "solr.hdfs.blockcache.write.enabled"; @@ -72,6 +73,8 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory { private String hdfsDataDir; private String confDir; + + private static BlockCache globalBlockCache; public static Metrics metrics; private static Boolean kerberosInit; @@ -102,6 +105,7 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory { } boolean blockCacheEnabled = params.getBool(BLOCKCACHE_ENABLED, true); + boolean blockCacheGlobal = params.getBool(BLOCKCACHE_GLOBAL, false); // default to false for back compat boolean blockCacheReadEnabled = params.getBool(BLOCKCACHE_READ_ENABLED, true); boolean blockCacheWriteEnabled = params.getBool(BLOCKCACHE_WRITE_ENABLED, true); @@ -117,8 +121,6 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory { boolean directAllocation = params.getBool( BLOCKCACHE_DIRECT_MEMORY_ALLOCATION, true); - BlockCache blockCache; - int slabSize = numberOfBlocksPerBank * blockSize; LOG.info( "Number of slabs of block cache [{}] with direct memory allocation set to [{}]", @@ -131,22 +133,13 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory { int bufferSize = params.getInt("solr.hdfs.blockcache.bufferstore.buffersize", 128); int bufferCount = params.getInt("solr.hdfs.blockcache.bufferstore.buffercount", 128 * 128); - BufferStore.initNewBuffer(bufferSize, bufferCount); - long totalMemory = (long) bankCount * (long) numberOfBlocksPerBank - * (long) blockSize; - try { - blockCache = new BlockCache(metrics, directAllocation, totalMemory, - slabSize, blockSize); - } catch (OutOfMemoryError e) { - throw new RuntimeException( - "The max direct memory is likely too low. Either increase it (by adding -XX:MaxDirectMemorySize=g -XX:+UseLargePages to your containers startup args)" - + " or disable direct allocation using solr.hdfs.blockcache.direct.memory.allocation=false in solrconfig.xml. If you are putting the block cache on the heap," - + " your java heap size might not be large enough." - + " Failed allocating ~" + totalMemory / 1000000.0 + " MB.", e); - } - Cache cache = new BlockDirectoryCache(blockCache, metrics); + BlockCache blockCache = getBlockDirectoryCache(path, numberOfBlocksPerBank, + blockSize, bankCount, directAllocation, slabSize, + bufferSize, bufferCount, blockCacheGlobal); + + Cache cache = new BlockDirectoryCache(blockCache, path, metrics); HdfsDirectory hdfsDirectory = new HdfsDirectory(new Path(path), conf); - dir = new BlockDirectory("solrcore", hdfsDirectory, cache, null, + dir = new BlockDirectory(path, hdfsDirectory, cache, null, blockCacheReadEnabled, blockCacheWriteEnabled); } else { dir = new HdfsDirectory(new Path(path), conf); @@ -164,6 +157,45 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory { } return dir; } + + private BlockCache getBlockDirectoryCache(String path, + int numberOfBlocksPerBank, int blockSize, int bankCount, + boolean directAllocation, int slabSize, int bufferSize, int bufferCount, boolean staticBlockCache) { + if (!staticBlockCache) { + LOG.info("Creating new single instance HDFS BlockCache"); + return createBlockCache(numberOfBlocksPerBank, blockSize, bankCount, directAllocation, slabSize, bufferSize, bufferCount); + } + LOG.info("Creating new global HDFS BlockCache"); + synchronized (HdfsDirectoryFactory.class) { + + if (globalBlockCache == null) { + globalBlockCache = createBlockCache(numberOfBlocksPerBank, blockSize, bankCount, + directAllocation, slabSize, bufferSize, bufferCount); + } + } + return globalBlockCache; + } + + private BlockCache createBlockCache(int numberOfBlocksPerBank, int blockSize, + int bankCount, boolean directAllocation, int slabSize, int bufferSize, + int bufferCount) { + BufferStore.initNewBuffer(bufferSize, bufferCount); + long totalMemory = (long) bankCount * (long) numberOfBlocksPerBank + * (long) blockSize; + + BlockCache blockCache; + try { + blockCache = new BlockCache(metrics, directAllocation, totalMemory, slabSize, blockSize); + } catch (OutOfMemoryError e) { + throw new RuntimeException( + "The max direct memory is likely too low. Either increase it (by adding -XX:MaxDirectMemorySize=g -XX:+UseLargePages to your containers startup args)" + + " or disable direct allocation using solr.hdfs.blockcache.direct.memory.allocation=false in solrconfig.xml. If you are putting the block cache on the heap," + + " your java heap size might not be large enough." + + " Failed allocating ~" + totalMemory / 1000000.0 + " MB.", + e); + } + return blockCache; + } @Override public boolean exists(String path) { diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/BlockCache.java b/solr/core/src/java/org/apache/solr/store/blockcache/BlockCache.java index a6cdf64923a..a520c6b6c29 100644 --- a/solr/core/src/java/org/apache/solr/store/blockcache/BlockCache.java +++ b/solr/core/src/java/org/apache/solr/store/blockcache/BlockCache.java @@ -24,6 +24,9 @@ import java.util.concurrent.atomic.AtomicInteger; import com.googlecode.concurrentlinkedhashmap.ConcurrentLinkedHashMap; import com.googlecode.concurrentlinkedhashmap.EvictionListener; +/** + * @lucene.experimental + */ public class BlockCache { public static final int _128M = 134217728; diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/BlockCacheKey.java b/solr/core/src/java/org/apache/solr/store/blockcache/BlockCacheKey.java index d0daefe0658..cf05c6936bc 100644 --- a/solr/core/src/java/org/apache/solr/store/blockcache/BlockCacheKey.java +++ b/solr/core/src/java/org/apache/solr/store/blockcache/BlockCacheKey.java @@ -16,12 +16,23 @@ package org.apache.solr.store.blockcache; * See the License for the specific language governing permissions and * limitations under the License. */ - +/** + * @lucene.experimental + */ public class BlockCacheKey implements Cloneable { private long block; private int file; + private String path; + public String getPath() { + return path; + } + + public void setPath(String path) { + this.path = path; + } + public long getBlock() { return block; } @@ -44,9 +55,10 @@ public class BlockCacheKey implements Cloneable { int result = 1; result = prime * result + (int) (block ^ (block >>> 32)); result = prime * result + file; + result = prime * result + ((path == null) ? 0 : path.hashCode()); return result; } - + @Override public boolean equals(Object obj) { if (this == obj) return true; @@ -55,9 +67,12 @@ public class BlockCacheKey implements Cloneable { BlockCacheKey other = (BlockCacheKey) obj; if (block != other.block) return false; if (file != other.file) return false; + if (path == null) { + if (other.path != null) return false; + } else if (!path.equals(other.path)) return false; return true; } - + @Override public BlockCacheKey clone() { try { diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/BlockCacheLocation.java b/solr/core/src/java/org/apache/solr/store/blockcache/BlockCacheLocation.java index 968628f058a..d2a124dda8c 100644 --- a/solr/core/src/java/org/apache/solr/store/blockcache/BlockCacheLocation.java +++ b/solr/core/src/java/org/apache/solr/store/blockcache/BlockCacheLocation.java @@ -19,6 +19,9 @@ package org.apache.solr.store.blockcache; import java.util.concurrent.atomic.AtomicBoolean; +/** + * @lucene.experimental + */ public class BlockCacheLocation { private int block; diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/BlockDirectory.java b/solr/core/src/java/org/apache/solr/store/blockcache/BlockDirectory.java index 9982197a574..028fd55aecb 100644 --- a/solr/core/src/java/org/apache/solr/store/blockcache/BlockDirectory.java +++ b/solr/core/src/java/org/apache/solr/store/blockcache/BlockDirectory.java @@ -34,6 +34,9 @@ import org.apache.solr.store.hdfs.HdfsDirectory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * @lucene.experimental + */ public class BlockDirectory extends Directory { public static Logger LOG = LoggerFactory.getLogger(BlockDirectory.class); @@ -82,11 +85,11 @@ public class BlockDirectory extends Directory { private Directory directory; private int blockSize; private String dirName; - private Cache cache; + private final Cache cache; private Set blockCacheFileTypes; private final boolean blockCacheReadEnabled; private final boolean blockCacheWriteEnabled; - + public BlockDirectory(String dirName, Directory directory, Cache cache, Set blockCacheFileTypes, boolean blockCacheReadEnabled, boolean blockCacheWriteEnabled) throws IOException { @@ -265,6 +268,15 @@ public class BlockDirectory extends Directory { return dirName + "/" + name; } + /** + * Expert: mostly for tests + * + * @lucene.experimental + */ + public Cache getCache() { + return cache; + } + @Override public void copy(Directory to, String src, String dest, IOContext context) throws IOException { @@ -383,4 +395,13 @@ public class BlockDirectory extends Directory { return directory; } + + public boolean isBlockCacheReadEnabled() { + return blockCacheReadEnabled; + } + + public boolean isBlockCacheWriteEnabled() { + return blockCacheWriteEnabled; + } + } diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/BlockDirectoryCache.java b/solr/core/src/java/org/apache/solr/store/blockcache/BlockDirectoryCache.java index 41ca9bb4775..592831b0dad 100644 --- a/solr/core/src/java/org/apache/solr/store/blockcache/BlockDirectoryCache.java +++ b/solr/core/src/java/org/apache/solr/store/blockcache/BlockDirectoryCache.java @@ -21,17 +21,31 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; +/** + * @lucene.experimental + */ public class BlockDirectoryCache implements Cache { - private BlockCache blockCache; + private final BlockCache blockCache; private AtomicInteger counter = new AtomicInteger(); private Map names = new ConcurrentHashMap(); + private String path; private Metrics metrics; - public BlockDirectoryCache(BlockCache blockCache, Metrics metrics) { + public BlockDirectoryCache(BlockCache blockCache, String path, Metrics metrics) { this.blockCache = blockCache; + this.path = path; this.metrics = metrics; } + /** + * Expert: mostly for tests + * + * @lucene.experimental + */ + public BlockCache getBlockCache() { + return blockCache; + } + @Override public void delete(String name) { names.remove(name); @@ -46,6 +60,7 @@ public class BlockDirectoryCache implements Cache { names.put(name, file); } BlockCacheKey blockCacheKey = new BlockCacheKey(); + blockCacheKey.setPath(path); blockCacheKey.setBlock(blockId); blockCacheKey.setFile(file); blockCache.store(blockCacheKey, blockOffset, buffer, offset, length); @@ -59,6 +74,7 @@ public class BlockDirectoryCache implements Cache { return false; } BlockCacheKey blockCacheKey = new BlockCacheKey(); + blockCacheKey.setPath(path); blockCacheKey.setBlock(blockId); blockCacheKey.setFile(file); boolean fetch = blockCache.fetch(blockCacheKey, b, blockOffset, off, diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/BlockLocks.java b/solr/core/src/java/org/apache/solr/store/blockcache/BlockLocks.java index e91ffb2ab4b..ba696506362 100644 --- a/solr/core/src/java/org/apache/solr/store/blockcache/BlockLocks.java +++ b/solr/core/src/java/org/apache/solr/store/blockcache/BlockLocks.java @@ -21,6 +21,9 @@ import java.util.concurrent.atomic.AtomicLongArray; import org.apache.lucene.util.LongBitSet; +/** + * @lucene.experimental + */ public class BlockLocks { private AtomicLongArray bits; diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/BufferStore.java b/solr/core/src/java/org/apache/solr/store/blockcache/BufferStore.java index 3e637d59d0b..f54b2757041 100644 --- a/solr/core/src/java/org/apache/solr/store/blockcache/BufferStore.java +++ b/solr/core/src/java/org/apache/solr/store/blockcache/BufferStore.java @@ -22,7 +22,9 @@ import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; - +/** + * @lucene.experimental + */ public class BufferStore implements Store { private static final Store EMPTY = new Store() { diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/Cache.java b/solr/core/src/java/org/apache/solr/store/blockcache/Cache.java index 7e70ad0a775..dafa4ffcd9d 100644 --- a/solr/core/src/java/org/apache/solr/store/blockcache/Cache.java +++ b/solr/core/src/java/org/apache/solr/store/blockcache/Cache.java @@ -17,6 +17,9 @@ package org.apache.solr.store.blockcache; * limitations under the License. */ +/** + * @lucene.experimental + */ public interface Cache { /** diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java b/solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java index 6e3c92ee1ac..858214cf83b 100644 --- a/solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java +++ b/solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java @@ -21,10 +21,11 @@ import java.io.IOException; import org.apache.lucene.store.IndexOutput; -/* +/** * Cache the blocks as they are written. The cache file name is the name of * the file until the file is closed, at which point the cache is updated * to include the last modified date (which is unknown until that point). + * @lucene.experimental */ public class CachedIndexOutput extends ReusedBufferedIndexOutput { private final BlockDirectory directory; diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/CustomBufferedIndexInput.java b/solr/core/src/java/org/apache/solr/store/blockcache/CustomBufferedIndexInput.java index be8f260b902..aa79fb99804 100644 --- a/solr/core/src/java/org/apache/solr/store/blockcache/CustomBufferedIndexInput.java +++ b/solr/core/src/java/org/apache/solr/store/blockcache/CustomBufferedIndexInput.java @@ -23,6 +23,9 @@ import java.io.IOException; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; +/** + * @lucene.experimental + */ public abstract class CustomBufferedIndexInput extends IndexInput { public static final int BUFFER_SIZE = 32768; diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/Metrics.java b/solr/core/src/java/org/apache/solr/store/blockcache/Metrics.java index fce1b9d9a73..052e70442f8 100644 --- a/solr/core/src/java/org/apache/solr/store/blockcache/Metrics.java +++ b/solr/core/src/java/org/apache/solr/store/blockcache/Metrics.java @@ -29,6 +29,9 @@ import org.apache.hadoop.metrics.MetricsUtil; import org.apache.hadoop.metrics.Updater; import org.apache.hadoop.metrics.jvm.JvmMetrics; +/** + * @lucene.experimental + */ public class Metrics implements Updater { public static class MethodCall { diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/ReusedBufferedIndexOutput.java b/solr/core/src/java/org/apache/solr/store/blockcache/ReusedBufferedIndexOutput.java index 6b12c982e44..92018fce7af 100644 --- a/solr/core/src/java/org/apache/solr/store/blockcache/ReusedBufferedIndexOutput.java +++ b/solr/core/src/java/org/apache/solr/store/blockcache/ReusedBufferedIndexOutput.java @@ -21,6 +21,9 @@ import java.io.IOException; import org.apache.lucene.store.IndexOutput; +/** + * @lucene.experimental + */ public abstract class ReusedBufferedIndexOutput extends IndexOutput { public static final int BUFFER_SIZE = 1024; diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/Store.java b/solr/core/src/java/org/apache/solr/store/blockcache/Store.java index 3a491b3db83..8fb4e48cf38 100644 --- a/solr/core/src/java/org/apache/solr/store/blockcache/Store.java +++ b/solr/core/src/java/org/apache/solr/store/blockcache/Store.java @@ -17,6 +17,9 @@ package org.apache.solr.store.blockcache; * limitations under the License. */ +/** + * @lucene.experimental + */ public interface Store { byte[] takeBuffer(int bufferSize); diff --git a/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileReader.java b/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileReader.java index 8a537935ea0..0294496c097 100644 --- a/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileReader.java +++ b/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileReader.java @@ -28,6 +28,9 @@ import org.apache.lucene.store.DataInput; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * @lucene.experimental + */ public class HdfsFileReader extends DataInput { public static Logger LOG = LoggerFactory.getLogger(HdfsFileReader.class); diff --git a/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileWriter.java b/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileWriter.java index 459a6d13fe9..d73e353a71e 100644 --- a/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileWriter.java +++ b/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileWriter.java @@ -32,6 +32,9 @@ import org.apache.lucene.store.DataOutput; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * @lucene.experimental + */ public class HdfsFileWriter extends DataOutput implements Closeable { public static Logger LOG = LoggerFactory.getLogger(HdfsFileWriter.class); diff --git a/solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java b/solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java index 044687c41d4..942dfd73f4f 100644 --- a/solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java +++ b/solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java @@ -21,6 +21,9 @@ import java.io.IOException; import org.apache.lucene.store.IndexOutput; +/** + * @lucene.experimental + */ public class NullIndexOutput extends IndexOutput { private long pos; diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml index 22c5b3ff57b..95a57ab23dd 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml @@ -28,6 +28,7 @@ ${solr.hdfs.blockcache.blocksperbank:1024} ${solr.hdfs.home:} ${solr.hdfs.confdir:} + ${solr.hdfs.blockcache.global:false} ${solr.data.dir:} diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml index d2413b09654..5fe25d7db76 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml @@ -46,7 +46,8 @@ 3000000 4000000 ${solr.hdfs.home:} - ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.global:false} ${tests.luceneMatchVersion:LUCENE_CURRENT} diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java index 8650f216431..ba0f0817843 100644 --- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java @@ -131,7 +131,7 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase int threadCount = 1; int i = 0; for (i = 0; i < threadCount; i++) { - StopableIndexingThread indexThread = new StopableIndexingThread(Integer.toString(i), true); + StopableIndexingThread indexThread = new StopableIndexingThread(controlClient, cloudClient, Integer.toString(i), true); threads.add(indexThread); indexThread.start(); } @@ -270,7 +270,7 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase public FullThrottleStopableIndexingThread(List clients, String id, boolean doDeletes) { - super(id, doDeletes); + super(controlClient, cloudClient, id, doDeletes); setName("FullThrottleStopableIndexingThread"); setDaemon(true); this.clients = clients; diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java index 98353476ff0..19e40bfd2db 100644 --- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java @@ -108,7 +108,7 @@ public class ChaosMonkeySafeLeaderTest extends AbstractFullDistribZkTestBase { List threads = new ArrayList(); int threadCount = 2; for (int i = 0; i < threadCount; i++) { - StopableIndexingThread indexThread = new StopableIndexingThread(Integer.toString(i), true); + StopableIndexingThread indexThread = new StopableIndexingThread(controlClient, cloudClient, Integer.toString(i), true); threads.add(indexThread); indexThread.start(); } diff --git a/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java b/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java index 6bb1328cfbd..a15a021706a 100644 --- a/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java @@ -66,10 +66,10 @@ public class RecoveryZkTest extends AbstractFullDistribZkTestBase { int maxDoc = maxDocList[random().nextInt(maxDocList.length - 1)]; - indexThread = new StopableIndexingThread("1", true, maxDoc); + indexThread = new StopableIndexingThread(controlClient, cloudClient, "1", true, maxDoc); indexThread.start(); - indexThread2 = new StopableIndexingThread("2", true, maxDoc); + indexThread2 = new StopableIndexingThread(controlClient, cloudClient, "2", true, maxDoc); indexThread2.start(); @@ -100,7 +100,7 @@ public class RecoveryZkTest extends AbstractFullDistribZkTestBase { Thread.sleep(1000); - waitForThingsToLevelOut(45); + waitForThingsToLevelOut(90); Thread.sleep(2000); diff --git a/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java b/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java index 1788aa715ad..6dae9b555ce 100644 --- a/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java +++ b/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java @@ -64,6 +64,8 @@ public class HdfsTestUtil { System.setProperty("solr.hdfs.home", "/solr_hdfs_home"); + System.setProperty("solr.hdfs.blockcache.global", Boolean.toString(LuceneTestCase.random().nextBoolean())); + final MiniDFSCluster dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null); dfsCluster.waitActive(); @@ -92,6 +94,7 @@ public class HdfsTestUtil { System.clearProperty("test.build.data"); System.clearProperty("test.cache.data"); System.clearProperty("solr.hdfs.home"); + System.clearProperty("solr.hdfs.blockcache.global"); if (dfsCluster != null) { timers.remove(dfsCluster); dfsCluster.shutdown(); diff --git a/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java b/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java new file mode 100644 index 00000000000..5a737826566 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.cloud.hdfs; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.store.NRTCachingDirectory; +import org.apache.lucene.util.LuceneTestCase.Nightly; +import org.apache.lucene.util.LuceneTestCase.Slow; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.client.solrj.impl.CloudSolrServer; +import org.apache.solr.cloud.BasicDistributedZkTest; +import org.apache.solr.cloud.StopableIndexingThread; +import org.apache.solr.core.CoreContainer; +import org.apache.solr.core.HdfsDirectoryFactory; +import org.apache.solr.core.SolrCore; +import org.apache.solr.servlet.SolrDispatchFilter; +import org.apache.solr.store.blockcache.BlockCache; +import org.apache.solr.store.blockcache.BlockDirectory; +import org.apache.solr.store.blockcache.BlockDirectoryCache; +import org.apache.solr.store.blockcache.Cache; +import org.apache.solr.util.RefCounted; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope; + +@Slow +@Nightly +@ThreadLeakScope(Scope.NONE) // hdfs client currently leaks thread(s) +public class HdfsWriteToMultipleCollectionsTest extends BasicDistributedZkTest { + private static final String SOLR_HDFS_HOME = "solr.hdfs.home"; + private static final String SOLR_HDFS_BLOCKCACHE_GLOBAL = "solr.hdfs.blockcache.global"; + private static final String ACOLLECTION = "acollection"; + private static MiniDFSCluster dfsCluster; + + @BeforeClass + public static void setupClass() throws Exception { + schemaString = "schema15.xml"; // we need a string id + dfsCluster = HdfsTestUtil.setupClass(new File(TEMP_DIR, + HdfsBasicDistributedZk2Test.class.getName() + "_" + + System.currentTimeMillis()).getAbsolutePath()); + System.setProperty(SOLR_HDFS_HOME, dfsCluster.getURI().toString() + "/solr"); + } + + @AfterClass + public static void teardownClass() throws Exception { + HdfsTestUtil.teardownClass(dfsCluster); + System.clearProperty(SOLR_HDFS_HOME); + dfsCluster = null; + } + + @Override + protected String getDataDir(String dataDir) throws IOException { + return HdfsTestUtil.getDataDir(dfsCluster, dataDir); + } + + public HdfsWriteToMultipleCollectionsTest() { + super(); + sliceCount = 1; + shardCount = 3; + } + + protected String getSolrXml() { + return "solr-no-core.xml"; + } + + @Override + public void doTest() throws Exception { + int docCount = random().nextInt(1313) + 1; + int cnt = random().nextInt(4) + 1; + for (int i = 0; i < cnt; i++) { + createCollection(ACOLLECTION + i, 2, 2, 9); + } + for (int i = 0; i < cnt; i++) { + waitForRecoveriesToFinish(ACOLLECTION + i, false); + } + List cloudServers = new ArrayList(); + List threads = new ArrayList(); + for (int i = 0; i < cnt; i++) { + CloudSolrServer server = new CloudSolrServer(zkServer.getZkAddress()); + server.setDefaultCollection(ACOLLECTION + i); + cloudServers.add(server); + StopableIndexingThread indexThread = new StopableIndexingThread(null, server, "1", true, docCount); + threads.add(indexThread); + indexThread.start(); + } + + int addCnt = 0; + for (StopableIndexingThread thread : threads) { + thread.join(); + addCnt += thread.getNumAdds() - thread.getNumDeletes(); + } + + long collectionsCount = 0; + for (CloudSolrServer server : cloudServers) { + server.commit(); + collectionsCount += server.query(new SolrQuery("*:*")).getResults().getNumFound(); + } + + for (CloudSolrServer server : cloudServers) { + server.shutdown(); + } + + assertEquals(addCnt, collectionsCount); + + BlockCache lastBlockCache = null; + // assert that we are using the block directory and that write and read caching are being used + for (JettySolrRunner jetty : jettys) { + CoreContainer cores = ((SolrDispatchFilter) jetty.getDispatchFilter() + .getFilter()).getCores(); + Collection solrCores = cores.getCores(); + for (SolrCore core : solrCores) { + if (core.getCoreDescriptor().getCloudDescriptor().getCollectionName() + .startsWith(ACOLLECTION)) { + assertTrue(core.getDirectoryFactory() instanceof HdfsDirectoryFactory); + RefCounted iwRef = core.getUpdateHandler() + .getSolrCoreState().getIndexWriter(core); + try { + IndexWriter iw = iwRef.get(); + NRTCachingDirectory directory = (NRTCachingDirectory) iw + .getDirectory(); + BlockDirectory blockDirectory = (BlockDirectory) directory + .getDelegate(); + assertTrue(blockDirectory.isBlockCacheReadEnabled()); + assertTrue(blockDirectory.isBlockCacheWriteEnabled()); + Cache cache = blockDirectory.getCache(); + // we know its a BlockDirectoryCache, but future proof + assertTrue(cache instanceof BlockDirectoryCache); + BlockCache blockCache = ((BlockDirectoryCache) cache) + .getBlockCache(); + if (lastBlockCache != null) { + if (Boolean.getBoolean(SOLR_HDFS_BLOCKCACHE_GLOBAL)) { + assertEquals(lastBlockCache, blockCache); + } else { + assertNotSame(lastBlockCache, blockCache); + } + } + lastBlockCache = blockCache; + } finally { + iwRef.decref(); + } + } + } + } + } +} diff --git a/solr/core/src/test/org/apache/solr/store/blockcache/BlockCacheTest.java b/solr/core/src/test/org/apache/solr/store/blockcache/BlockCacheTest.java index 70fd813aead..bc5e75c844a 100644 --- a/solr/core/src/test/org/apache/solr/store/blockcache/BlockCacheTest.java +++ b/solr/core/src/test/org/apache/solr/store/blockcache/BlockCacheTest.java @@ -51,6 +51,7 @@ public class BlockCacheTest extends LuceneTestCase { int file = 0; blockCacheKey.setBlock(block); blockCacheKey.setFile(file); + blockCacheKey.setPath("/"); if (blockCache.fetch(blockCacheKey, buffer)) { hitsInCache.incrementAndGet(); @@ -91,6 +92,7 @@ public class BlockCacheTest extends LuceneTestCase { BlockCacheKey blockCacheKey = new BlockCacheKey(); blockCacheKey.setBlock(0); blockCacheKey.setFile(0); + blockCacheKey.setPath("/"); byte[] newData = new byte[blockSize*3]; byte[] testData = testData(random, blockSize, newData); diff --git a/solr/example/solr/collection1/conf/solrconfig.xml b/solr/example/solr/collection1/conf/solrconfig.xml index 3126c21d74d..192cbcf24bc 100755 --- a/solr/example/solr/collection1/conf/solrconfig.xml +++ b/solr/example/solr/collection1/conf/solrconfig.xml @@ -129,6 +129,9 @@ ${solr.hdfs.confdir:} ${solr.hdfs.blockcache.enabled:true} + + ${solr.hdfs.blockcache.global:true} diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java index e3f193bbe71..66a3adf468a 100644 --- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java +++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java @@ -1428,122 +1428,13 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes return rsp; } - abstract class StopableThread extends Thread { + static abstract class StopableThread extends Thread { public StopableThread(String name) { super(name); } public abstract void safeStop(); } - class StopableIndexingThread extends StopableThread { - private volatile boolean stop = false; - protected final String id; - protected final List deletes = new ArrayList(); - protected Set addFails = new HashSet(); - protected Set deleteFails = new HashSet(); - protected boolean doDeletes; - private int numCycles; - - public StopableIndexingThread(String id, boolean doDeletes) { - this(id, doDeletes, -1); - } - - public StopableIndexingThread(String id, boolean doDeletes, int numCycles) { - super("StopableIndexingThread"); - this.id = id; - this.doDeletes = doDeletes; - this.numCycles = numCycles; - setDaemon(true); - } - - @Override - public void run() { - int i = 0; - int numDone = 0; - int numDeletes = 0; - int numAdds = 0; - - while (true && !stop) { - if (numCycles != -1) { - if (numDone > numCycles) { - break; - } - } - ++numDone; - String id = this.id + "-" + i; - ++i; - boolean addFailed = false; - - if (doDeletes && random().nextBoolean() && deletes.size() > 0) { - String delete = deletes.remove(0); - try { - numDeletes++; - UpdateRequest req = new UpdateRequest(); - req.deleteById(delete); - req.setParam("CONTROL", "TRUE"); - req.process(controlClient); - - cloudClient.deleteById(delete); - } catch (Exception e) { - System.err.println("REQUEST FAILED:"); - e.printStackTrace(); - if (e instanceof SolrServerException) { - System.err.println("ROOT CAUSE:"); - ((SolrServerException) e).getRootCause().printStackTrace(); - } - deleteFails.add(id); - } - } - - try { - numAdds++; - indexr("id", id, i1, 50, t1, - "to come to the aid of their country."); - } catch (Exception e) { - addFailed = true; - System.err.println("REQUEST FAILED:"); - e.printStackTrace(); - if (e instanceof SolrServerException) { - System.err.println("ROOT CAUSE:"); - ((SolrServerException) e).getRootCause().printStackTrace(); - } - addFails.add(id); - } - - if (!addFailed && doDeletes && random().nextBoolean()) { - deletes.add(id); - } - - try { - Thread.currentThread().sleep(random().nextInt(100)); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - } - - System.err.println("added docs:" + numAdds + " with " + (addFails.size() + deleteFails.size()) + " fails" - + " deletes:" + numDeletes); - } - - @Override - public void safeStop() { - stop = true; - } - - public Set getAddFails() { - return addFails; - } - - public Set getDeleteFails() { - return deleteFails; - } - - public int getFailCount() { - return addFails.size() + deleteFails.size(); - } - - }; - class StopableSearchThread extends StopableThread { private volatile boolean stop = false; protected final AtomicInteger queryFails = new AtomicInteger(); diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/StopableIndexingThread.java b/solr/test-framework/src/java/org/apache/solr/cloud/StopableIndexingThread.java new file mode 100644 index 00000000000..8446f086849 --- /dev/null +++ b/solr/test-framework/src/java/org/apache/solr/cloud/StopableIndexingThread.java @@ -0,0 +1,185 @@ +package org.apache.solr.cloud; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.solr.client.solrj.SolrServer; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.request.UpdateRequest; +import org.apache.solr.common.SolrInputDocument; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class StopableIndexingThread extends AbstractFullDistribZkTestBase.StopableThread { + private static String t1 = "a_t"; + private static String i1 = "a_si"; + private volatile boolean stop = false; + protected final String id; + protected final List deletes = new ArrayList(); + protected Set addFails = new HashSet(); + protected Set deleteFails = new HashSet(); + protected boolean doDeletes; + private int numCycles; + private SolrServer controlClient; + private SolrServer cloudClient; + private int numDeletes; + private int numAdds; + + public StopableIndexingThread(SolrServer controlClient, SolrServer cloudClient, String id, boolean doDeletes) { + this(controlClient, cloudClient, id, doDeletes, -1); + } + + public StopableIndexingThread(SolrServer controlClient, SolrServer cloudClient, String id, boolean doDeletes, int numCycles) { + super("StopableIndexingThread"); + this.controlClient = controlClient; + this.cloudClient = cloudClient; + this.id = id; + this.doDeletes = doDeletes; + this.numCycles = numCycles; + setDaemon(true); + } + + @Override + public void run() { + int i = 0; + int numDone = 0; + numDeletes = 0; + numAdds = 0; + + while (true && !stop) { + if (numCycles != -1) { + if (numDone > numCycles) { + break; + } + } + ++numDone; + String id = this.id + "-" + i; + ++i; + boolean addFailed = false; + + if (doDeletes && AbstractFullDistribZkTestBase.random().nextBoolean() && deletes.size() > 0) { + String delete = deletes.remove(0); + try { + numDeletes++; + if (controlClient != null) { + UpdateRequest req = new UpdateRequest(); + req.deleteById(delete); + req.setParam("CONTROL", "TRUE"); + req.process(controlClient); + } + + cloudClient.deleteById(delete); + } catch (Exception e) { + System.err.println("REQUEST FAILED:"); + e.printStackTrace(); + if (e instanceof SolrServerException) { + System.err.println("ROOT CAUSE:"); + ((SolrServerException) e).getRootCause().printStackTrace(); + } + deleteFails.add(id); + } + } + + try { + numAdds++; + indexr("id", id, i1, 50, t1, + "to come to the aid of their country."); + } catch (Exception e) { + addFailed = true; + System.err.println("REQUEST FAILED:"); + e.printStackTrace(); + if (e instanceof SolrServerException) { + System.err.println("ROOT CAUSE:"); + ((SolrServerException) e).getRootCause().printStackTrace(); + } + addFails.add(id); + } + + if (!addFailed && doDeletes && AbstractFullDistribZkTestBase.random().nextBoolean()) { + deletes.add(id); + } + + try { + Thread.currentThread().sleep(AbstractFullDistribZkTestBase.random().nextInt(100)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + System.err.println("added docs:" + numAdds + " with " + (addFails.size() + deleteFails.size()) + " fails" + + " deletes:" + numDeletes); + } + + @Override + public void safeStop() { + stop = true; + } + + public Set getAddFails() { + return addFails; + } + + public Set getDeleteFails() { + return deleteFails; + } + + public int getFailCount() { + return addFails.size() + deleteFails.size(); + } + + protected void addFields(SolrInputDocument doc, Object... fields) { + for (int i = 0; i < fields.length; i += 2) { + doc.addField((String) (fields[i]), fields[i + 1]); + } + } + + protected void indexr(Object... fields) throws Exception { + SolrInputDocument doc = new SolrInputDocument(); + addFields(doc, fields); + addFields(doc, "rnd_b", true); + indexDoc(doc); + } + + protected void indexDoc(SolrInputDocument doc) throws IOException, + SolrServerException { + + if (controlClient != null) { + UpdateRequest req = new UpdateRequest(); + req.add(doc); + req.setParam("CONTROL", "TRUE"); + req.process(controlClient); + } + + + UpdateRequest ureq = new UpdateRequest(); + ureq.add(doc); + ureq.process(cloudClient); + } + + public int getNumDeletes() { + return numDeletes; + } + + public int getNumAdds() { + return numAdds; + } + +} \ No newline at end of file From 3064419624fd10622da2844399d4009fc2e62e00 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 4 Mar 2014 17:04:48 +0000 Subject: [PATCH 02/38] LUCENE-5224: Add iconv, oconv, and ignore support to HunspellStemFilter git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574135 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 + .../lucene/analysis/hunspell/Dictionary.java | 171 +++++++++++++++++- .../lucene/analysis/hunspell/Stemmer.java | 45 ++++- .../lucene/analysis/hunspell/TestConv.java | 36 ++++ .../analysis/hunspell/TestDictionary.java | 57 +++++- .../hunspell/TestHunspellStemFilter.java | 18 +- .../lucene/analysis/hunspell/TestIgnore.java | 36 ++++ .../apache/lucene/analysis/hunspell/conv.aff | 16 ++ .../apache/lucene/analysis/hunspell/conv.dic | 2 + .../lucene/analysis/hunspell/ignore.aff | 6 + .../lucene/analysis/hunspell/ignore.dic | 3 + 11 files changed, 373 insertions(+), 20 deletions(-) create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestConv.java create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestIgnore.java create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/conv.aff create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/conv.dic create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ignore.aff create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ignore.dic diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 30774b5b579..cca653dc97a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -89,6 +89,9 @@ New Features * LUCENE-5485: Add circumfix support to HunspellStemFilter. (Robert Muir) +* LUCENE-5224: Add iconv, oconv, and ignore support to HunspellStemFilter. + (Robert Muir) + API Changes * LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java index 5242f5c77a5..1d3e60b970c 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java @@ -21,14 +21,17 @@ import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.OfflineSorter; import org.apache.lucene.util.OfflineSorter.ByteSequencesReader; import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter; import org.apache.lucene.util.fst.Builder; +import org.apache.lucene.util.fst.CharSequenceOutputs; import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.IntSequenceOutputs; +import org.apache.lucene.util.fst.Outputs; import org.apache.lucene.util.fst.Util; import java.io.BufferedInputStream; @@ -67,6 +70,9 @@ public class Dictionary { private static final String FLAG_KEY = "FLAG"; private static final String COMPLEXPREFIXES_KEY = "COMPLEXPREFIXES"; private static final String CIRCUMFIX_KEY = "CIRCUMFIX"; + private static final String IGNORE_KEY = "IGNORE"; + private static final String ICONV_KEY = "ICONV"; + private static final String OCONV_KEY = "OCONV"; private static final String NUM_FLAG_TYPE = "num"; private static final String UTF8_FLAG_TYPE = "UTF-8"; @@ -110,6 +116,16 @@ public class Dictionary { int circumfix = -1; // circumfix flag, or -1 if one is not defined + // ignored characters (dictionary, affix, inputs) + private char[] ignore; + + // FSTs used for ICONV/OCONV, output ord pointing to replacement text + FST iconv; + FST oconv; + + boolean needsInputCleaning; + boolean needsOutputCleaning; + /** * Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix * and dictionary files. @@ -136,6 +152,8 @@ public class Dictionary { */ public Dictionary(InputStream affix, List dictionaries, boolean ignoreCase) throws IOException, ParseException { this.ignoreCase = ignoreCase; + this.needsInputCleaning = ignoreCase; + this.needsOutputCleaning = false; // set if we have an OCONV // hungarian has thousands of AF before the SET, so a 32k buffer is needed BufferedInputStream buffered = new BufferedInputStream(affix, 32768); buffered.mark(32768); @@ -249,6 +267,29 @@ public class Dictionary { throw new ParseException("Illegal CIRCUMFIX declaration", reader.getLineNumber()); } circumfix = flagParsingStrategy.parseFlag(parts[1]); + } else if (line.startsWith(IGNORE_KEY)) { + String parts[] = line.split("\\s+"); + if (parts.length != 2) { + throw new ParseException("Illegal IGNORE declaration", reader.getLineNumber()); + } + ignore = parts[1].toCharArray(); + Arrays.sort(ignore); + needsInputCleaning = true; + } else if (line.startsWith(ICONV_KEY) || line.startsWith(OCONV_KEY)) { + String parts[] = line.split("\\s+"); + String type = parts[0]; + if (parts.length != 2) { + throw new ParseException("Illegal " + type + " declaration", reader.getLineNumber()); + } + int num = Integer.parseInt(parts[1]); + FST res = parseConversions(reader, num); + if (type.equals("ICONV")) { + iconv = res; + needsInputCleaning |= iconv != null; + } else { + oconv = res; + needsOutputCleaning |= oconv != null; + } } } @@ -291,6 +332,7 @@ public class Dictionary { Map seenPatterns) throws IOException, ParseException { BytesRef scratch = new BytesRef(); + StringBuilder sb = new StringBuilder(); String args[] = header.split("\\s+"); boolean crossProduct = args[2].equals("Y"); @@ -300,9 +342,6 @@ public class Dictionary { ByteArrayDataOutput affixWriter = new ByteArrayDataOutput(affixData, currentAffix << 3, numLines << 3); for (int i = 0; i < numLines; i++) { - if (currentAffix > Short.MAX_VALUE) { - throw new UnsupportedOperationException("Too many affixes, please report this to dev@lucene.apache.org"); - } assert affixWriter.getPosition() == currentAffix << 3; String line = reader.readLine(); String ruleArgs[] = line.split("\\s+"); @@ -345,6 +384,9 @@ public class Dictionary { Integer patternIndex = seenPatterns.get(regex); if (patternIndex == null) { patternIndex = patterns.size(); + if (patternIndex > Short.MAX_VALUE) { + throw new UnsupportedOperationException("Too many patterns, please report this to dev@lucene.apache.org"); + } seenPatterns.put(regex, patternIndex); Pattern pattern = Pattern.compile(regex); patterns.add(pattern); @@ -355,6 +397,8 @@ public class Dictionary { if (stripOrd < 0) { // already exists in our hash stripOrd = (-stripOrd)-1; + } else if (stripOrd > Character.MAX_VALUE) { + throw new UnsupportedOperationException("Too many unique strips, please report this to dev@lucene.apache.org"); } if (appendFlags == null) { @@ -368,7 +412,7 @@ public class Dictionary { appendFlagsOrd = (-appendFlagsOrd)-1; } else if (appendFlagsOrd > Short.MAX_VALUE) { // this limit is probably flexible, but its a good sanity check too - throw new UnsupportedOperationException("Too many unique flags, please report this to dev@lucene.apache.org"); + throw new UnsupportedOperationException("Too many unique append flags, please report this to dev@lucene.apache.org"); } affixWriter.writeShort((short)flag); @@ -378,6 +422,11 @@ public class Dictionary { affixWriter.writeShort((short)patternOrd); affixWriter.writeShort((short)appendFlagsOrd); + if (needsInputCleaning) { + CharSequence cleaned = cleanInput(affixArg, sb); + affixArg = cleaned.toString(); + } + List list = affixes.get(affixArg); if (list == null) { list = new ArrayList(); @@ -388,6 +437,31 @@ public class Dictionary { currentAffix++; } } + + private FST parseConversions(LineNumberReader reader, int num) throws IOException, ParseException { + Map mappings = new TreeMap<>(); + + for (int i = 0; i < num; i++) { + String line = reader.readLine(); + String parts[] = line.split("\\s+"); + if (parts.length != 3) { + throw new ParseException("invalid syntax: " + line, reader.getLineNumber()); + } + if (mappings.put(parts[1], parts[2]) != null) { + throw new IllegalStateException("duplicate mapping specified for: " + parts[1]); + } + } + + Outputs outputs = CharSequenceOutputs.getSingleton(); + Builder builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs); + IntsRef scratchInts = new IntsRef(); + for (Map.Entry entry : mappings.entrySet()) { + Util.toUTF16(entry.getKey(), scratchInts); + builder.add(scratchInts, new CharsRef(entry.getValue())); + } + + return builder.finish(); + } /** * Parses the encoding specified in the affix file readable through the provided InputStream @@ -485,6 +559,8 @@ public class Dictionary { BytesRef flagsScratch = new BytesRef(); IntsRef scratchInts = new IntsRef(); + StringBuilder sb = new StringBuilder(); + File unsorted = File.createTempFile("unsorted", "dat", tempDir); try (ByteSequencesWriter writer = new ByteSequencesWriter(unsorted)) { for (InputStream dictionary : dictionaries) { @@ -492,16 +568,19 @@ public class Dictionary { String line = lines.readLine(); // first line is number of entries (approximately, sometimes) while ((line = lines.readLine()) != null) { - if (ignoreCase) { + if (needsInputCleaning) { int flagSep = line.lastIndexOf('/'); if (flagSep == -1) { - writer.write(line.toLowerCase(Locale.ROOT).getBytes(IOUtils.CHARSET_UTF_8)); + CharSequence cleansed = cleanInput(line, sb); + writer.write(cleansed.toString().getBytes(IOUtils.CHARSET_UTF_8)); } else { - StringBuilder sb = new StringBuilder(); - sb.append(line.substring(0, flagSep).toLowerCase(Locale.ROOT)); - if (flagSep < line.length()) { - sb.append(line.substring(flagSep, line.length())); + String text = line.substring(0, flagSep); + CharSequence cleansed = cleanInput(text, sb); + if (cleansed != sb) { + sb.setLength(0); + sb.append(cleansed); } + sb.append(line.substring(flagSep)); writer.write(sb.toString().getBytes(IOUtils.CHARSET_UTF_8)); } } else { @@ -761,4 +840,76 @@ public class Dictionary { static boolean hasFlag(char flags[], char flag) { return Arrays.binarySearch(flags, flag) >= 0; } + + CharSequence cleanInput(CharSequence input, StringBuilder reuse) { + reuse.setLength(0); + + for (int i = 0; i < input.length(); i++) { + char ch = input.charAt(i); + + if (ignore != null && Arrays.binarySearch(ignore, ch) >= 0) { + continue; + } + + if (ignoreCase && iconv == null) { + // if we have no input conversion mappings, do this on-the-fly + ch = Character.toLowerCase(ch); + } + + reuse.append(ch); + } + + if (iconv != null) { + try { + applyMappings(iconv, reuse); + } catch (IOException bogus) { + throw new RuntimeException(bogus); + } + if (ignoreCase) { + for (int i = 0; i < reuse.length(); i++) { + reuse.setCharAt(i, Character.toLowerCase(reuse.charAt(i))); + } + } + } + + return reuse; + } + + // TODO: this could be more efficient! + static void applyMappings(FST fst, StringBuilder sb) throws IOException { + final FST.BytesReader bytesReader = fst.getBytesReader(); + final FST.Arc firstArc = fst.getFirstArc(new FST.Arc()); + final CharsRef NO_OUTPUT = fst.outputs.getNoOutput(); + + // temporary stuff + final FST.Arc arc = new FST.Arc<>(); + int longestMatch; + CharsRef longestOutput; + + for (int i = 0; i < sb.length(); i++) { + arc.copyFrom(firstArc); + CharsRef output = NO_OUTPUT; + longestMatch = -1; + longestOutput = null; + + for (int j = i; j < sb.length(); j++) { + char ch = sb.charAt(j); + if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) { + break; + } else { + output = fst.outputs.add(output, arc.output); + } + if (arc.isFinal()) { + longestOutput = fst.outputs.add(output, arc.nextFinalOutput); + longestMatch = j; + } + } + + if (longestMatch >= 0) { + sb.delete(i, longestMatch+1); + sb.insert(i, longestOutput); + i += (longestOutput.length - 1); + } + } + } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java index 18e62c597e4..ff6cc0ae802 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java @@ -17,6 +17,7 @@ package org.apache.lucene.analysis.hunspell; * limitations under the License. */ +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -24,8 +25,8 @@ import java.util.List; import java.util.regex.Pattern; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.analysis.util.CharacterUtils; import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IntsRef; @@ -40,8 +41,11 @@ final class Stemmer { private final BytesRef scratch = new BytesRef(); private final StringBuilder segment = new StringBuilder(); private final ByteArrayDataInput affixReader; - private final CharacterUtils charUtils = CharacterUtils.getInstance(Version.LUCENE_CURRENT); - + + // used for normalization + private final StringBuilder scratchSegment = new StringBuilder(); + private char scratchBuffer[] = new char[32]; + /** * Constructs a new Stemmer which will use the provided Dictionary to create its stems. * @@ -68,17 +72,25 @@ final class Stemmer { * @param word Word to find the stems for * @return List of stems for the word */ - public List stem(char word[], int length) { - if (dictionary.ignoreCase) { - charUtils.toLowerCase(word, 0, length); + public List stem(char word[], int length) { + + if (dictionary.needsInputCleaning) { + scratchSegment.setLength(0); + scratchSegment.append(word, 0, length); + CharSequence cleaned = dictionary.cleanInput(scratchSegment, segment); + scratchBuffer = ArrayUtil.grow(scratchBuffer, cleaned.length()); + length = segment.length(); + segment.getChars(0, length, scratchBuffer, 0); + word = scratchBuffer; } + List stems = new ArrayList(); IntsRef forms = dictionary.lookupWord(word, 0, length); if (forms != null) { // TODO: some forms should not be added, e.g. ONLYINCOMPOUND // just because it exists, does not make it valid... for (int i = 0; i < forms.length; i++) { - stems.add(new CharsRef(word, 0, length)); + stems.add(newStem(word, length)); } } stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false)); @@ -106,6 +118,23 @@ final class Stemmer { } return deduped; } + + private CharsRef newStem(char buffer[], int length) { + if (dictionary.needsOutputCleaning) { + scratchSegment.setLength(0); + scratchSegment.append(buffer, 0, length); + try { + Dictionary.applyMappings(dictionary.oconv, scratchSegment); + } catch (IOException bogus) { + throw new RuntimeException(bogus); + } + char cleaned[] = new char[scratchSegment.length()]; + scratchSegment.getChars(0, cleaned.length, cleaned, 0); + return new CharsRef(cleaned, 0, cleaned.length); + } else { + return new CharsRef(buffer, 0, length); + } + } // ================================================= Helper Methods ================================================ @@ -292,7 +321,7 @@ final class Stemmer { continue; } } - stems.add(new CharsRef(strippedWord, 0, length)); + stems.add(newStem(strippedWord, length)); } } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestConv.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestConv.java new file mode 100644 index 00000000000..c72fd3ff704 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestConv.java @@ -0,0 +1,36 @@ +package org.apache.lucene.analysis.hunspell; + +import org.junit.BeforeClass; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestConv extends StemmerTestBase { + + @BeforeClass + public static void beforeClass() throws Exception { + init("conv.aff", "conv.dic"); + } + + public void testConversion() { + assertStemsTo("drink", "drInk"); + assertStemsTo("drInk", "drInk"); + assertStemsTo("drInkAble", "drInk"); + assertStemsTo("drInkABle", "drInk"); + assertStemsTo("drinkABle", "drInk"); + } +} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java index ad4f257e628..5d7682e88c0 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java @@ -22,10 +22,15 @@ import java.io.IOException; import java.io.InputStream; import java.text.ParseException; -import org.apache.lucene.analysis.hunspell.Dictionary; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.fst.Builder; +import org.apache.lucene.util.fst.CharSequenceOutputs; +import org.apache.lucene.util.fst.FST; +import org.apache.lucene.util.fst.Outputs; +import org.apache.lucene.util.fst.Util; public class TestDictionary extends LuceneTestCase { @@ -123,4 +128,54 @@ public class TestDictionary extends LuceneTestCase { assertTrue(affixStream.isClosed()); assertTrue(dictStream.isClosed()); } + + + + public void testReplacements() throws Exception { + Outputs outputs = CharSequenceOutputs.getSingleton(); + Builder builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs); + IntsRef scratchInts = new IntsRef(); + + // a -> b + Util.toUTF16("a", scratchInts); + builder.add(scratchInts, new CharsRef("b")); + + // ab -> c + Util.toUTF16("ab", scratchInts); + builder.add(scratchInts, new CharsRef("c")); + + // c -> de + Util.toUTF16("c", scratchInts); + builder.add(scratchInts, new CharsRef("de")); + + // def -> gh + Util.toUTF16("def", scratchInts); + builder.add(scratchInts, new CharsRef("gh")); + + FST fst = builder.finish(); + + StringBuilder sb = new StringBuilder("atestanother"); + Dictionary.applyMappings(fst, sb); + assertEquals("btestbnother", sb.toString()); + + sb = new StringBuilder("abtestanother"); + Dictionary.applyMappings(fst, sb); + assertEquals("ctestbnother", sb.toString()); + + sb = new StringBuilder("atestabnother"); + Dictionary.applyMappings(fst, sb); + assertEquals("btestcnother", sb.toString()); + + sb = new StringBuilder("abtestabnother"); + Dictionary.applyMappings(fst, sb); + assertEquals("ctestcnother", sb.toString()); + + sb = new StringBuilder("abtestabcnother"); + Dictionary.applyMappings(fst, sb); + assertEquals("ctestcdenother", sb.toString()); + + sb = new StringBuilder("defdefdefc"); + Dictionary.applyMappings(fst, sb); + assertEquals("ghghghde", sb.toString()); + } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java index 3069c0ab1e2..f42afcfa9cc 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.hunspell; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; +import java.util.Collections; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; @@ -30,7 +31,6 @@ import org.apache.lucene.analysis.hunspell.Dictionary; import org.apache.lucene.analysis.hunspell.HunspellStemFilter; import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.TestUtil; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -94,4 +94,20 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase { }; checkOneTerm(a, "", ""); } + + public void testIgnoreCaseNoSideEffects() throws Exception { + final Dictionary d; + try (InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff"); + InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic")) { + d = new Dictionary(affixStream, Collections.singletonList(dictStream), true); + } + Analyzer a = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new KeywordTokenizer(); + return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, d)); + } + }; + checkOneTerm(a, "NoChAnGy", "NoChAnGy"); + } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestIgnore.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestIgnore.java new file mode 100644 index 00000000000..723eca94d3d --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestIgnore.java @@ -0,0 +1,36 @@ +package org.apache.lucene.analysis.hunspell; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.junit.BeforeClass; + +public class TestIgnore extends StemmerTestBase { + + @BeforeClass + public static void beforeClass() throws Exception { + init("ignore.aff", "ignore.dic"); + } + + public void testExamples() { + assertStemsTo("drink", "drink"); + assertStemsTo("drinkable", "drink"); + assertStemsTo("dr'ink-able", "drink"); + assertStemsTo("drank-able", "drank"); + assertStemsTo("'-'-'-"); + } +} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/conv.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/conv.aff new file mode 100644 index 00000000000..e860a87b7e9 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/conv.aff @@ -0,0 +1,16 @@ +SET UTF-8 + +ICONV 4 +ICONV A a +ICONV B b +ICONV C c +ICONV I i + +OCONV 4 +OCONV a A +OCONV b B +OCONV c C +OCONV i I + +SFX X Y 1 +SFX X 0 able . +ABLE \ No newline at end of file diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/conv.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/conv.dic new file mode 100644 index 00000000000..6b68dc80cc0 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/conv.dic @@ -0,0 +1,2 @@ +1 +drink/X [VERB] diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ignore.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ignore.aff new file mode 100644 index 00000000000..65c4683fc0a --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ignore.aff @@ -0,0 +1,6 @@ +SET UTF-8 + +IGNORE '- + +SFX X Y 1 +SFX X 0 able . +ABLE \ No newline at end of file diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ignore.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ignore.dic new file mode 100644 index 00000000000..9ae92058f54 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ignore.dic @@ -0,0 +1,3 @@ +1 +drink/X [VERB] +dr-ank/X [VERB] \ No newline at end of file From b670831559cd4de9bbfecd1bdaf387cd52b6412c Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 4 Mar 2014 17:51:20 +0000 Subject: [PATCH 03/38] SOLR-2934: increase buffer size for recent dictionaries with large amounts of AF/AM lines before charset git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574158 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/analysis/hunspell/Dictionary.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java index 1d3e60b970c..974d24185db 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java @@ -154,9 +154,11 @@ public class Dictionary { this.ignoreCase = ignoreCase; this.needsInputCleaning = ignoreCase; this.needsOutputCleaning = false; // set if we have an OCONV - // hungarian has thousands of AF before the SET, so a 32k buffer is needed - BufferedInputStream buffered = new BufferedInputStream(affix, 32768); - buffered.mark(32768); + // TODO: we really need to probably buffer this on disk since so many newer dictionaries + // (en_GB, hu_HU, etc) now have tons of AM lines (morph metadata) etc before they finally declare + // their encoding... but for now this large buffer is a workaround + BufferedInputStream buffered = new BufferedInputStream(affix, 65536); + buffered.mark(65536); String encoding = getDictionaryEncoding(buffered); buffered.reset(); CharsetDecoder decoder = getJavaEncoding(encoding); From 96bcbefdd403bba362389b927a5f6ef927122d51 Mon Sep 17 00:00:00 2001 From: "Chris M. Hostetter" Date: Wed, 5 Mar 2014 01:01:18 +0000 Subject: [PATCH 04/38] SOLR-5815: add some test logging to try and figure out WTF git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574273 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/solr/core/TestNonNRTOpen.java | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/core/TestNonNRTOpen.java b/solr/core/src/test/org/apache/solr/core/TestNonNRTOpen.java index 8a5e493a8cc..b3b851448bf 100644 --- a/solr/core/src/test/org/apache/solr/core/TestNonNRTOpen.java +++ b/solr/core/src/test/org/apache/solr/core/TestNonNRTOpen.java @@ -30,8 +30,12 @@ import org.apache.solr.util.RefCounted; import org.junit.AfterClass; import org.junit.BeforeClass; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + public class TestNonNRTOpen extends SolrTestCaseJ4 { - + private static final Logger log = LoggerFactory.getLogger(TestNonNRTOpen.class); + @BeforeClass public static void beforeClass() throws Exception { // use a filesystem, because we need to create an index, then "start up solr" @@ -80,6 +84,7 @@ public class TestNonNRTOpen extends SolrTestCaseJ4 { // core reload String core = h.getCore().getName(); + log.info("Reloading core: " + h.getCore().toString()); h.getCoreContainer().reload(core); assertNotNRT(1); @@ -90,6 +95,7 @@ public class TestNonNRTOpen extends SolrTestCaseJ4 { // add a doc and core reload assertU(adoc("bazz", "doc2")); + log.info("Reloading core: " + h.getCore().toString()); h.getCoreContainer().reload(core); assertNotNRT(3); } @@ -127,11 +133,15 @@ public class TestNonNRTOpen extends SolrTestCaseJ4 { } static void assertNotNRT(int maxDoc) { - RefCounted searcher = h.getCore().getSearcher(); + SolrCore core = h.getCore(); + log.info("Checking notNRT & maxDoc=" + maxDoc + " of core=" + core.toString()); + RefCounted searcher = core.getSearcher(); try { - DirectoryReader ir = searcher.get().getIndexReader(); - assertEquals(maxDoc, ir.maxDoc()); - assertFalse("expected non-NRT reader, got: " + ir, ir.toString().contains(":nrt")); + SolrIndexSearcher s = searcher.get(); + DirectoryReader ir = s.getIndexReader(); + assertEquals("SOLR-5815? : wrong maxDoc: core=" + core.toString() +" searcher=" + s.toString(), + maxDoc, ir.maxDoc()); + assertFalse("SOLR-5815? : expected non-NRT reader, got: " + ir, ir.toString().contains(":nrt")); } finally { searcher.decref(); } From 9f701bd30476bafac8d6ec2b5505fb1886541219 Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Wed, 5 Mar 2014 01:33:35 +0000 Subject: [PATCH 05/38] SOLR-5811: The Overseer will retry work items until success, which is a serious problem if you hit a bad work item. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574280 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 3 + .../java/org/apache/solr/cloud/Overseer.java | 45 +++- .../org/apache/solr/cloud/ZkController.java | 6 + .../org/apache/solr/cloud/OverseerTest.java | 193 ++++++++++++------ 4 files changed, 178 insertions(+), 69 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 04430677f41..b9b572cc005 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -116,6 +116,9 @@ Bug Fixes * SOLR-5761: HttpSolrServer has a few fields that can be set via setters but are not volatile. (Mark Miller, Gregory Chanan) +* SOLR-5811: The Overseer will retry work items until success, which is a serious + problem if you hit a bad work item. (Mark Miller) + Optimizations ---------------------- * SOLR-1880: Distributed Search skips GET_FIELDS stage if EXECUTE_QUERY diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java index 666c7134d00..8bf202ce8ec 100644 --- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java +++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java @@ -123,7 +123,16 @@ public class Overseer { else if (LeaderStatus.YES == isLeader) { final ZkNodeProps message = ZkNodeProps.load(head); final String operation = message.getStr(QUEUE_OPERATION); - clusterState = processMessage(clusterState, message, operation); + try { + clusterState = processMessage(clusterState, message, operation); + } catch (Exception e) { + // generally there is nothing we can do - in most cases, we have + // an issue that will fail again on retry or we cannot communicate with + // ZooKeeper in which case another Overseer should take over + // TODO: if ordering for the message is not important, we could + // track retries and put it back on the end of the queue + log.error("Could not process Overseer message", e); + } zkClient.setData(ZkStateReader.CLUSTER_STATE, ZkStateReader.toJSON(clusterState), true); @@ -189,8 +198,16 @@ public class Overseer { while (head != null) { final ZkNodeProps message = ZkNodeProps.load(head.getBytes()); final String operation = message.getStr(QUEUE_OPERATION); - - clusterState = processMessage(clusterState, message, operation); + try { + clusterState = processMessage(clusterState, message, operation); + } catch (Exception e) { + // generally there is nothing we can do - in most cases, we have + // an issue that will fail again on retry or we cannot communicate with + // ZooKeeper in which case another Overseer should take over + // TODO: if ordering for the message is not important, we could + // track retries and put it back on the end of the queue + log.error("Could not process Overseer message", e); + } workQueue.offer(head.getBytes()); stateUpdateQueue.poll(); @@ -294,6 +311,7 @@ public class Overseer { private ClusterState createReplica(ClusterState clusterState, ZkNodeProps message) { log.info("createReplica() {} ", message); String coll = message.getStr(ZkStateReader.COLLECTION_PROP); + checkCollection(message, coll); String slice = message.getStr(ZkStateReader.SHARD_ID_PROP); Slice sl = clusterState.getSlice(coll, slice); if(sl == null){ @@ -334,6 +352,7 @@ public class Overseer { private ClusterState updateShardState(ClusterState clusterState, ZkNodeProps message) { String collection = message.getStr(ZkStateReader.COLLECTION_PROP); + checkCollection(message, collection); log.info("Update shard state invoked for collection: " + collection + " with message: " + message); for (String key : message.keySet()) { if (ZkStateReader.COLLECTION_PROP.equals(key)) continue; @@ -358,6 +377,7 @@ public class Overseer { private ClusterState addRoutingRule(ClusterState clusterState, ZkNodeProps message) { String collection = message.getStr(ZkStateReader.COLLECTION_PROP); + checkCollection(message, collection); String shard = message.getStr(ZkStateReader.SHARD_ID_PROP); String routeKey = message.getStr("routeKey"); String range = message.getStr("range"); @@ -397,8 +417,15 @@ public class Overseer { return clusterState; } + private void checkCollection(ZkNodeProps message, String collection) { + if (collection == null || collection.trim().length() == 0) { + log.error("Skipping invalid Overseer message because it has no collection specified: " + message); + } + } + private ClusterState removeRoutingRule(ClusterState clusterState, ZkNodeProps message) { String collection = message.getStr(ZkStateReader.COLLECTION_PROP); + checkCollection(message, collection); String shard = message.getStr(ZkStateReader.SHARD_ID_PROP); String routeKeyStr = message.getStr("routeKey"); @@ -424,6 +451,7 @@ public class Overseer { private ClusterState createShard(ClusterState clusterState, ZkNodeProps message) { String collection = message.getStr(ZkStateReader.COLLECTION_PROP); + checkCollection(message, collection); String shardId = message.getStr(ZkStateReader.SHARD_ID_PROP); Slice slice = clusterState.getSlice(collection, shardId); if (slice == null) { @@ -470,6 +498,7 @@ public class Overseer { private ClusterState updateStateNew(ClusterState clusterState, ZkNodeProps message) { String collection = message.getStr(ZkStateReader.COLLECTION_PROP); + checkCollection(message, collection); String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP); if(collection==null || sliceName == null){ @@ -490,9 +519,7 @@ public class Overseer { */ private ClusterState updateState(ClusterState state, final ZkNodeProps message) { final String collection = message.getStr(ZkStateReader.COLLECTION_PROP); - assert collection.length() > 0 : message; - - + checkCollection(message, collection); Integer numShards = message.getInt(ZkStateReader.NUM_SHARDS_PROP, null); log.info("Update state numShards={} message={}", numShards, message); @@ -851,9 +878,7 @@ public class Overseer { private ClusterState removeCollection(final ClusterState clusterState, ZkNodeProps message) { final String collection = message.getStr("name"); - -// final Map newCollections = new LinkedHashMap(clusterState.getCollectionStates()); // shallow copy -// newCollections.remove(collection); + checkCollection(message, collection); // ClusterState newState = new ClusterState(clusterState.getLiveNodes(), newCollections); return clusterState.copyWith(singletonMap(collection, (DocCollection)null)); @@ -864,6 +889,7 @@ public class Overseer { */ private ClusterState removeShard(final ClusterState clusterState, ZkNodeProps message) { final String collection = message.getStr(ZkStateReader.COLLECTION_PROP); + checkCollection(message, collection); final String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP); log.info("Removing collection: " + collection + " shard: " + sliceId + " from clusterstate"); @@ -889,6 +915,7 @@ public class Overseer { String cnn = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP); final String collection = message.getStr(ZkStateReader.COLLECTION_PROP); + checkCollection(message, collection); // final Map newCollections = new LinkedHashMap(clusterState.getCollectionStates()); // shallow copy // DocCollection coll = newCollections.get(collection); diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java index 27dd8c196e9..ac36d4c5e2f 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java @@ -1064,6 +1064,12 @@ public final class ZkController { final String coreNodeName = cd.getCloudDescriptor().getCoreNodeName(); final String collection = cd.getCloudDescriptor().getCollectionName(); assert collection != null; + + if (collection == null || collection.trim().length() == 0) { + log.error("No collection was specified."); + return; + } + ElectionContext context = electionContexts.remove(new ContextKey(collection, coreNodeName)); if (context != null) { diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java index f63b1f9df99..a67a8be527a 100644 --- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java @@ -64,18 +64,18 @@ public class OverseerTest extends SolrTestCaseJ4 { private List overseers = new ArrayList(); private List readers = new ArrayList(); + private String collection = "collection1"; + public static class MockZKController{ private final SolrZkClient zkClient; private final ZkStateReader zkStateReader; private final String nodeName; - private final String collection; private final LeaderElector elector; private final Map electionContext = Collections.synchronizedMap(new HashMap()); - public MockZKController(String zkAddress, String nodeName, String collection) throws InterruptedException, TimeoutException, IOException, KeeperException { + public MockZKController(String zkAddress, String nodeName) throws InterruptedException, TimeoutException, IOException, KeeperException { this.nodeName = nodeName; - this.collection = collection; zkClient = new SolrZkClient(zkAddress, TIMEOUT); zkStateReader = new ZkStateReader(zkClient); zkStateReader.createClusterStateWatchersAndUpdate(); @@ -105,7 +105,7 @@ public class OverseerTest extends SolrTestCaseJ4 { zkClient.close(); } - public String publishState(String coreName, String coreNodeName, String stateName, int numShards) + public String publishState(String collection, String coreName, String coreNodeName, String stateName, int numShards) throws KeeperException, InterruptedException, IOException { if (stateName == null) { ElectionContext ec = electionContext.remove(coreName); @@ -134,41 +134,40 @@ public class OverseerTest extends SolrTestCaseJ4 { q.offer(ZkStateReader.toJSON(m)); } - for (int i = 0; i < 120; i++) { - String shardId = getShardId("http://" + nodeName + "/solr/", coreName); - if (shardId != null) { - try { - zkClient.makePath("/collections/" + collection + "/leader_elect/" - + shardId + "/election", true); - } catch (NodeExistsException nee) {} - ZkNodeProps props = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, - "http://" + nodeName + "/solr/", ZkStateReader.NODE_NAME_PROP, - nodeName, ZkStateReader.CORE_NAME_PROP, coreName, - ZkStateReader.SHARD_ID_PROP, shardId, - ZkStateReader.COLLECTION_PROP, collection, - ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName); - ShardLeaderElectionContextBase ctx = new ShardLeaderElectionContextBase( - elector, shardId, collection, nodeName + "_" + coreName, props, - zkStateReader); - elector.setup(ctx); - elector.joinElection(ctx, false); - return shardId; + if (collection.length() > 0) { + for (int i = 0; i < 120; i++) { + String shardId = getShardId(collection, coreNodeName); + if (shardId != null) { + try { + zkClient.makePath("/collections/" + collection + "/leader_elect/" + + shardId + "/election", true); + } catch (NodeExistsException nee) {} + ZkNodeProps props = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, + "http://" + nodeName + "/solr/", ZkStateReader.NODE_NAME_PROP, + nodeName, ZkStateReader.CORE_NAME_PROP, coreName, + ZkStateReader.SHARD_ID_PROP, shardId, + ZkStateReader.COLLECTION_PROP, collection, + ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName); + ShardLeaderElectionContextBase ctx = new ShardLeaderElectionContextBase( + elector, shardId, collection, nodeName + "_" + coreName, props, + zkStateReader); + elector.setup(ctx); + elector.joinElection(ctx, false); + return shardId; + } + Thread.sleep(500); } - Thread.sleep(500); } return null; } - private String getShardId(final String baseUrl, final String coreName) { - Map slices = zkStateReader.getClusterState().getSlicesMap( - collection); + private String getShardId(String collection, String coreNodeName) { + Map slices = zkStateReader.getClusterState().getSlicesMap(collection); if (slices != null) { for (Slice slice : slices.values()) { for (Replica replica : slice.getReplicas()) { - // TODO: for really large clusters, we could 'index' on this - String rbaseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP); - String rcore = replica.getStr(ZkStateReader.CORE_NAME_PROP); - if (baseUrl.equals(rbaseUrl) && coreName.equals(rcore)) { + String cnn = replica.getName(); + if (coreNodeName.equals(cnn)) { return slice.getName(); } } @@ -226,17 +225,17 @@ public class OverseerTest extends SolrTestCaseJ4 { ZkStateReader reader = new ZkStateReader(zkClient); reader.createClusterStateWatchersAndUpdate(); - zkController = new MockZKController(server.getZkAddress(), "127.0.0.1", "collection1"); + zkController = new MockZKController(server.getZkAddress(), "127.0.0.1"); final int numShards=6; for (int i = 0; i < numShards; i++) { - assertNotNull("shard got no id?", zkController.publishState("core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3)); + assertNotNull("shard got no id?", zkController.publishState(collection, "core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3)); } - - assertEquals(2, reader.getClusterState().getSlice("collection1", "shard1").getReplicasMap().size()); - assertEquals(2, reader.getClusterState().getSlice("collection1", "shard2").getReplicasMap().size()); - assertEquals(2, reader.getClusterState().getSlice("collection1", "shard3").getReplicasMap().size()); + Map rmap = reader.getClusterState().getSlice("collection1", "shard1").getReplicasMap(); + assertEquals(rmap.toString(), 2, rmap.size()); + assertEquals(rmap.toString(), 2, reader.getClusterState().getSlice("collection1", "shard2").getReplicasMap().size()); + assertEquals(rmap.toString(), 2, reader.getClusterState().getSlice("collection1", "shard3").getReplicasMap().size()); //make sure leaders are in cloud state assertNotNull(reader.getLeaderUrl("collection1", "shard1", 15000)); @@ -258,6 +257,81 @@ public class OverseerTest extends SolrTestCaseJ4 { } } + @Test + public void testBadQueueItem() throws Exception { + String zkDir = dataDir.getAbsolutePath() + File.separator + + "zookeeper/server1/data"; + + ZkTestServer server = new ZkTestServer(zkDir); + + MockZKController zkController = null; + SolrZkClient zkClient = null; + SolrZkClient overseerClient = null; + + try { + server.run(); + AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost()); + AbstractZkTestCase.makeSolrZkNode(server.getZkHost()); + + zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT); + zkClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true); + + overseerClient = electNewOverseer(server.getZkAddress()); + + ZkStateReader reader = new ZkStateReader(zkClient); + reader.createClusterStateWatchersAndUpdate(); + + zkController = new MockZKController(server.getZkAddress(), "127.0.0.1"); + + final int numShards=3; + + for (int i = 0; i < numShards; i++) { + assertNotNull("shard got no id?", zkController.publishState(collection, "core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3)); + } + + assertEquals(1, reader.getClusterState().getSlice(collection, "shard1").getReplicasMap().size()); + assertEquals(1, reader.getClusterState().getSlice(collection, "shard2").getReplicasMap().size()); + assertEquals(1, reader.getClusterState().getSlice(collection, "shard3").getReplicasMap().size()); + + //make sure leaders are in cloud state + assertNotNull(reader.getLeaderUrl(collection, "shard1", 15000)); + assertNotNull(reader.getLeaderUrl(collection, "shard2", 15000)); + assertNotNull(reader.getLeaderUrl(collection, "shard3", 15000)); + + // publish a bad queue item + String emptyCollectionName = ""; + zkController.publishState(emptyCollectionName, "core0", "node0", ZkStateReader.ACTIVE, 1); + zkController.publishState(emptyCollectionName, "core0", "node0", null, 1); + + // make sure the Overseer is still processing items + for (int i = 0; i < numShards; i++) { + assertNotNull("shard got no id?", zkController.publishState("collection2", "core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3)); + } + + assertEquals(1, reader.getClusterState().getSlice("collection2", "shard1").getReplicasMap().size()); + assertEquals(1, reader.getClusterState().getSlice("collection2", "shard2").getReplicasMap().size()); + assertEquals(1, reader.getClusterState().getSlice("collection2", "shard3").getReplicasMap().size()); + + //make sure leaders are in cloud state + assertNotNull(reader.getLeaderUrl("collection2", "shard1", 15000)); + assertNotNull(reader.getLeaderUrl("collection2", "shard2", 15000)); + assertNotNull(reader.getLeaderUrl("collection2", "shard3", 15000)); + + } finally { + if (DEBUG) { + if (zkController != null) { + zkClient.printLayoutToStdOut(); + } + } + close(zkClient); + if (zkController != null) { + zkController.close(); + } + close(overseerClient); + server.shutdown(); + } + } + @Test public void testShardAssignmentBigger() throws Exception { String zkDir = dataDir.getAbsolutePath() + File.separator @@ -289,7 +363,7 @@ public class OverseerTest extends SolrTestCaseJ4 { reader.createClusterStateWatchersAndUpdate(); for (int i = 0; i < nodeCount; i++) { - controllers[i] = new MockZKController(server.getZkAddress(), "node" + i, "collection1"); + controllers[i] = new MockZKController(server.getZkAddress(), "node" + i); } for (int i = 0; i < nodeCount; i++) { nodeExecutors[i] = Executors.newFixedThreadPool(1, new DefaultSolrThreadFactory("testShardAssignment")); @@ -306,7 +380,7 @@ public class OverseerTest extends SolrTestCaseJ4 { final String coreName = "core" + slot; try { - ids[slot]=controllers[slot % nodeCount].publishState(coreName, "node" + slot, ZkStateReader.ACTIVE, sliceCount); + ids[slot]=controllers[slot % nodeCount].publishState(collection, coreName, "node" + slot, ZkStateReader.ACTIVE, sliceCount); } catch (Throwable e) { e.printStackTrace(); fail("register threw exception:" + e.getClass()); @@ -551,21 +625,20 @@ public class OverseerTest extends SolrTestCaseJ4 { reader = new ZkStateReader(zkClient); reader.createClusterStateWatchersAndUpdate(); - mockController = new MockZKController(server.getZkAddress(), "node1", - "collection1"); + mockController = new MockZKController(server.getZkAddress(), "node1"); overseerClient = electNewOverseer(server.getZkAddress()); Thread.sleep(1000); - mockController.publishState("core1", "core_node1", + mockController.publishState(collection, "core1", "core_node1", ZkStateReader.RECOVERING, 1); - waitForCollections(reader, "collection1"); + waitForCollections(reader, collection); verifyStatus(reader, ZkStateReader.RECOVERING); int version = getClusterStateVersion(zkClient); - mockController.publishState("core1", "core_node1", ZkStateReader.ACTIVE, + mockController.publishState(collection, "core1", "core_node1", ZkStateReader.ACTIVE, 1); while (version == getClusterStateVersion(zkClient)); @@ -575,7 +648,7 @@ public class OverseerTest extends SolrTestCaseJ4 { overseerClient.close(); Thread.sleep(1000); // wait for overseer to get killed - mockController.publishState("core1", "core_node1", + mockController.publishState(collection, "core1", "core_node1", ZkStateReader.RECOVERING, 1); version = getClusterStateVersion(zkClient); @@ -588,13 +661,13 @@ public class OverseerTest extends SolrTestCaseJ4 { assertEquals("Live nodes count does not match", 1, reader .getClusterState().getLiveNodes().size()); assertEquals("Shard count does not match", 1, reader.getClusterState() - .getSlice("collection1", "shard1").getReplicasMap().size()); + .getSlice(collection, "shard1").getReplicasMap().size()); version = getClusterStateVersion(zkClient); - mockController.publishState("core1", "core_node1", null, 1); + mockController.publishState(collection, "core1", "core_node1", null, 1); while (version == getClusterStateVersion(zkClient)); Thread.sleep(500); assertFalse("collection1 should be gone after publishing the null state", - reader.getClusterState().getCollections().contains("collection1")); + reader.getClusterState().getCollections().contains(collection)); } finally { close(mockController); close(overseerClient); @@ -676,17 +749,17 @@ public class OverseerTest extends SolrTestCaseJ4 { for (int i = 0; i < atLeast(4); i++) { killCounter.incrementAndGet(); //for each round allow 1 kill - mockController = new MockZKController(server.getZkAddress(), "node1", "collection1"); - mockController.publishState("core1", "node1", "state1",1); + mockController = new MockZKController(server.getZkAddress(), "node1"); + mockController.publishState(collection, "core1", "node1", "state1",1); if(mockController2!=null) { mockController2.close(); mockController2 = null; } - mockController.publishState("core1", "node1","state2",1); - mockController2 = new MockZKController(server.getZkAddress(), "node2", "collection1"); - mockController.publishState("core1", "node1", "state1",1); + mockController.publishState(collection, "core1", "node1","state2",1); + mockController2 = new MockZKController(server.getZkAddress(), "node2"); + mockController.publishState(collection, "core1", "node1", "state1",1); verifyShardLeader(reader, "collection1", "shard1", "core1"); - mockController2.publishState("core4", "node2", "state2" ,1); + mockController2.publishState(collection, "core4", "node2", "state2" ,1); mockController.close(); mockController = null; verifyShardLeader(reader, "collection1", "shard1", "core4"); @@ -729,11 +802,11 @@ public class OverseerTest extends SolrTestCaseJ4 { reader = new ZkStateReader(controllerClient); reader.createClusterStateWatchersAndUpdate(); - mockController = new MockZKController(server.getZkAddress(), "node1", "collection1"); + mockController = new MockZKController(server.getZkAddress(), "node1"); overseerClient = electNewOverseer(server.getZkAddress()); - mockController.publishState("core1", "core_node1", ZkStateReader.RECOVERING, 1); + mockController.publishState(collection, "core1", "core_node1", ZkStateReader.RECOVERING, 1); waitForCollections(reader, "collection1"); @@ -743,8 +816,8 @@ public class OverseerTest extends SolrTestCaseJ4 { int version = getClusterStateVersion(controllerClient); - mockController = new MockZKController(server.getZkAddress(), "node1", "collection1"); - mockController.publishState("core1", "core_node1", ZkStateReader.RECOVERING, 1); + mockController = new MockZKController(server.getZkAddress(), "node1"); + mockController.publishState(collection, "core1", "core_node1", ZkStateReader.RECOVERING, 1); while (version == getClusterStateVersion(controllerClient)); @@ -794,11 +867,11 @@ public class OverseerTest extends SolrTestCaseJ4 { reader = new ZkStateReader(controllerClient); reader.createClusterStateWatchersAndUpdate(); - mockController = new MockZKController(server.getZkAddress(), "node1", "collection1"); + mockController = new MockZKController(server.getZkAddress(), "node1"); overseerClient = electNewOverseer(server.getZkAddress()); - mockController.publishState("core1", "node1", ZkStateReader.RECOVERING, 12); + mockController.publishState(collection, "core1", "node1", ZkStateReader.RECOVERING, 12); waitForCollections(reader, "collection1"); From 3e2a81753955e8e40460b0ac3579dc0b6140d8af Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Wed, 5 Mar 2014 17:20:36 +0000 Subject: [PATCH 06/38] Fix javadoc spelling. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574577 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/java/org/apache/solr/common/cloud/ZkStateReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index 4b608943fb2..12dc700c896 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -626,7 +626,7 @@ public class ZkStateReader { } /** - * Returns the baseURL corrisponding to a given node's nodeName -- + * Returns the baseURL corresponding to a given node's nodeName -- * NOTE: does not (currently) imply that the nodeName (or resulting * baseURL) exists in the cluster. * @lucene.experimental From e88091b3ddf90800cbfc2f71587774dfea57881d Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Wed, 5 Mar 2014 17:28:31 +0000 Subject: [PATCH 07/38] SOLR-5811: Additional cleanup. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574580 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/solr/cloud/Overseer.java | 69 +++++++++---------- .../org/apache/solr/cloud/ZkController.java | 16 ++--- 2 files changed, 40 insertions(+), 45 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java index 8bf202ce8ec..1b4f38c2c8e 100644 --- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java +++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java @@ -311,7 +311,7 @@ public class Overseer { private ClusterState createReplica(ClusterState clusterState, ZkNodeProps message) { log.info("createReplica() {} ", message); String coll = message.getStr(ZkStateReader.COLLECTION_PROP); - checkCollection(message, coll); + if (!checkCollectionKeyExistence(message)) return clusterState; String slice = message.getStr(ZkStateReader.SHARD_ID_PROP); Slice sl = clusterState.getSlice(coll, slice); if(sl == null){ @@ -352,7 +352,7 @@ public class Overseer { private ClusterState updateShardState(ClusterState clusterState, ZkNodeProps message) { String collection = message.getStr(ZkStateReader.COLLECTION_PROP); - checkCollection(message, collection); + if (!checkCollectionKeyExistence(message)) return clusterState; log.info("Update shard state invoked for collection: " + collection + " with message: " + message); for (String key : message.keySet()) { if (ZkStateReader.COLLECTION_PROP.equals(key)) continue; @@ -377,7 +377,7 @@ public class Overseer { private ClusterState addRoutingRule(ClusterState clusterState, ZkNodeProps message) { String collection = message.getStr(ZkStateReader.COLLECTION_PROP); - checkCollection(message, collection); + if (!checkCollectionKeyExistence(message)) return clusterState; String shard = message.getStr(ZkStateReader.SHARD_ID_PROP); String routeKey = message.getStr("routeKey"); String range = message.getStr("range"); @@ -417,15 +417,22 @@ public class Overseer { return clusterState; } - private void checkCollection(ZkNodeProps message, String collection) { - if (collection == null || collection.trim().length() == 0) { - log.error("Skipping invalid Overseer message because it has no collection specified: " + message); + private boolean checkCollectionKeyExistence(ZkNodeProps message) { + return checkKeyExistence(message, ZkStateReader.COLLECTION_PROP); + } + + private boolean checkKeyExistence(ZkNodeProps message, String key) { + String value = message.getStr(key); + if (value == null || value.trim().length() == 0) { + log.error("Skipping invalid Overseer message because it has no " + key + " specified: " + message); + return false; } + return true; } private ClusterState removeRoutingRule(ClusterState clusterState, ZkNodeProps message) { String collection = message.getStr(ZkStateReader.COLLECTION_PROP); - checkCollection(message, collection); + if (!checkCollectionKeyExistence(message)) return clusterState; String shard = message.getStr(ZkStateReader.SHARD_ID_PROP); String routeKeyStr = message.getStr("routeKey"); @@ -451,7 +458,7 @@ public class Overseer { private ClusterState createShard(ClusterState clusterState, ZkNodeProps message) { String collection = message.getStr(ZkStateReader.COLLECTION_PROP); - checkCollection(message, collection); + if (!checkCollectionKeyExistence(message)) return clusterState; String shardId = message.getStr(ZkStateReader.SHARD_ID_PROP); Slice slice = clusterState.getSlice(collection, shardId); if (slice == null) { @@ -498,7 +505,7 @@ public class Overseer { private ClusterState updateStateNew(ClusterState clusterState, ZkNodeProps message) { String collection = message.getStr(ZkStateReader.COLLECTION_PROP); - checkCollection(message, collection); + if (!checkCollectionKeyExistence(message)) return clusterState; String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP); if(collection==null || sliceName == null){ @@ -517,30 +524,30 @@ public class Overseer { /** * Try to assign core to the cluster. */ - private ClusterState updateState(ClusterState state, final ZkNodeProps message) { + private ClusterState updateState(ClusterState clusterState, final ZkNodeProps message) { final String collection = message.getStr(ZkStateReader.COLLECTION_PROP); - checkCollection(message, collection); + if (!checkCollectionKeyExistence(message)) return clusterState; Integer numShards = message.getInt(ZkStateReader.NUM_SHARDS_PROP, null); log.info("Update state numShards={} message={}", numShards, message); List shardNames = new ArrayList(); //collection does not yet exist, create placeholders if num shards is specified - boolean collectionExists = state.hasCollection(collection); + boolean collectionExists = clusterState.hasCollection(collection); if (!collectionExists && numShards!=null) { getShardNames(numShards, shardNames); - state = createCollection(state, collection, shardNames, message); + clusterState = createCollection(clusterState, collection, shardNames, message); } String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP); String coreNodeName = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP); if (coreNodeName == null) { - coreNodeName = getAssignedCoreNodeName(state, message); + coreNodeName = getAssignedCoreNodeName(clusterState, message); if (coreNodeName != null) { log.info("node=" + coreNodeName + " is already registered"); } else { // if coreNodeName is null, auto assign one - coreNodeName = Assign.assignNode(collection, state); + coreNodeName = Assign.assignNode(collection, clusterState); } message.getProperties().put(ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName); @@ -549,7 +556,7 @@ public class Overseer { // use the provided non null shardId if (sliceName == null) { //get shardId from ClusterState - sliceName = getAssignedId(state, coreNodeName, message); + sliceName = getAssignedId(clusterState, coreNodeName, message); if (sliceName != null) { log.info("shard=" + sliceName + " is already registered"); } @@ -558,14 +565,14 @@ public class Overseer { //request new shardId if (collectionExists) { // use existing numShards - numShards = state.getCollection(collection).getSlices().size(); + numShards = clusterState.getCollection(collection).getSlices().size(); log.info("Collection already exists with " + ZkStateReader.NUM_SHARDS_PROP + "=" + numShards); } - sliceName = Assign.assignShard(collection, state, numShards); + sliceName = Assign.assignShard(collection, clusterState, numShards); log.info("Assigning new node to shard shard=" + sliceName); } - Slice slice = state.getSlice(collection, sliceName); + Slice slice = clusterState.getSlice(collection, sliceName); Map replicaProps = new LinkedHashMap(); @@ -611,9 +618,9 @@ public class Overseer { Map replicas; if (slice != null) { - state = checkAndCompleteShardSplit(state, collection, coreNodeName, sliceName, replicaProps); + clusterState = checkAndCompleteShardSplit(clusterState, collection, coreNodeName, sliceName, replicaProps); // get the current slice again because it may have been updated due to checkAndCompleteShardSplit method - slice = state.getSlice(collection, sliceName); + slice = clusterState.getSlice(collection, sliceName); sliceProps = slice.getProperties(); replicas = slice.getReplicasCopy(); } else { @@ -627,7 +634,7 @@ public class Overseer { replicas.put(replica.getName(), replica); slice = new Slice(sliceName, replicas, sliceProps); - ClusterState newClusterState = updateSlice(state, collection, slice); + ClusterState newClusterState = updateSlice(clusterState, collection, slice); return newClusterState; } @@ -876,11 +883,9 @@ public class Overseer { * Remove collection from cloudstate */ private ClusterState removeCollection(final ClusterState clusterState, ZkNodeProps message) { - final String collection = message.getStr("name"); - checkCollection(message, collection); + if (!checkKeyExistence(message, "name")) return clusterState; -// ClusterState newState = new ClusterState(clusterState.getLiveNodes(), newCollections); return clusterState.copyWith(singletonMap(collection, (DocCollection)null)); } @@ -888,34 +893,28 @@ public class Overseer { * Remove collection slice from cloudstate */ private ClusterState removeShard(final ClusterState clusterState, ZkNodeProps message) { - final String collection = message.getStr(ZkStateReader.COLLECTION_PROP); - checkCollection(message, collection); final String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP); + final String collection = message.getStr(ZkStateReader.COLLECTION_PROP); + if (!checkCollectionKeyExistence(message)) return clusterState; log.info("Removing collection: " + collection + " shard: " + sliceId + " from clusterstate"); -// final Map newCollections = new LinkedHashMap(clusterState.getCollectionStates()); // shallow copy DocCollection coll = clusterState.getCollection(collection); Map newSlices = new LinkedHashMap(coll.getSlicesMap()); newSlices.remove(sliceId); DocCollection newCollection = new DocCollection(coll.getName(), newSlices, coll.getProperties(), coll.getRouter()); -// newCollections.put(newCollection.getName(), newCollection); return newState(clusterState, singletonMap(collection,newCollection)); - -// return new ClusterState(clusterState.getLiveNodes(), newCollections); } /* * Remove core from cloudstate */ private ClusterState removeCore(final ClusterState clusterState, ZkNodeProps message) { - - String cnn = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP); - + final String cnn = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP); final String collection = message.getStr(ZkStateReader.COLLECTION_PROP); - checkCollection(message, collection); + if (!checkCollectionKeyExistence(message)) return clusterState; // final Map newCollections = new LinkedHashMap(clusterState.getCollectionStates()); // shallow copy // DocCollection coll = newCollections.get(collection); diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java index ac36d4c5e2f..cee27041728 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java @@ -1013,7 +1013,8 @@ public final class ZkController { core.close(); } } - log.info("publishing core={} state={}", cd.getName(), state); + String collection = cd.getCloudDescriptor().getCollectionName(); + log.info("publishing core={} state={} collection={}", cd.getName(), state, collection); //System.out.println(Thread.currentThread().getStackTrace()[3]); Integer numShards = cd.getCloudDescriptor().getNumShards(); if (numShards == null) { //XXX sys prop hack @@ -1021,8 +1022,7 @@ public final class ZkController { numShards = Integer.getInteger(ZkStateReader.NUM_SHARDS_PROP); } - assert cd.getCloudDescriptor().getCollectionName() != null && cd.getCloudDescriptor() - .getCollectionName().length() > 0; + assert collection != null && collection.length() > 0; String coreNodeName = cd.getCloudDescriptor().getCoreNodeName(); //assert cd.getCloudDescriptor().getShardId() != null; @@ -1033,12 +1033,9 @@ public final class ZkController { ZkStateReader.ROLES_PROP, cd.getCloudDescriptor().getRoles(), ZkStateReader.NODE_NAME_PROP, getNodeName(), ZkStateReader.SHARD_ID_PROP, cd.getCloudDescriptor().getShardId(), - ZkStateReader.COLLECTION_PROP, cd.getCloudDescriptor() - .getCollectionName(), - ZkStateReader.NUM_SHARDS_PROP, numShards != null ? numShards.toString() - : null, - ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName != null ? coreNodeName - : null); + ZkStateReader.COLLECTION_PROP, collection, + ZkStateReader.NUM_SHARDS_PROP, numShards != null ? numShards.toString() : null, + ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName != null ? coreNodeName : null); if (updateLastState) { cd.getCloudDescriptor().lastPublished = state; } @@ -1368,7 +1365,6 @@ public final class ZkController { CloudDescriptor cloudDesc = cd.getCloudDescriptor(); - // make sure the node name is set on the descriptor if (cloudDesc.getCoreNodeName() == null) { cloudDesc.setCoreNodeName(coreNodeName); From af101b305706ffbb1272281490e1d18fcd86c2db Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Wed, 5 Mar 2014 17:48:27 +0000 Subject: [PATCH 08/38] SOLR-5813: tests for "" or null collection name - should default to core name. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574589 13f79535-47bb-0310-9956-ffa450edef68 --- .../CollectionsAPIDistributedZkTest.java | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java index 310c97d7ffa..e257b5e66ca 100644 --- a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java @@ -203,14 +203,14 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa testCollectionsAPI(); testCollectionsAPIAddRemoveStress(); testErrorHandling(); + testNoCollectionSpecified(); deletePartiallyCreatedCollection(); deleteCollectionRemovesStaleZkCollectionsNode(); clusterPropTest(); - addReplicaTest(); - // last deleteCollectionWithDownNodes(); + if (DEBUG) { super.printLayout(); } @@ -578,6 +578,40 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa String val2 = failure.getVal(0).toString(); assertTrue(val1.contains("SolrException") || val2.contains("SolrException")); } + + private void testNoCollectionSpecified() throws Exception { + + cloudClient.getZkStateReader().updateClusterState(true); + assertFalse(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection")); + assertFalse(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection2")); + + // try and create a SolrCore with no collection name + Create createCmd = new Create(); + createCmd.setCoreName("corewithnocollection"); + createCmd.setCollection(""); + String dataDir = SolrTestCaseJ4.dataDir.getAbsolutePath() + File.separator + + System.currentTimeMillis() + "corewithnocollection" + "_1v"; + createCmd.setDataDir(dataDir); + createCmd.setNumShards(1); + if (secondConfigSet) { + createCmd.setCollectionConfigName("conf1"); + } + + createNewSolrServer("", getBaseUrl((HttpSolrServer) clients.get(1))) + .request(createCmd); + + // try and create a SolrCore with no collection name + createCmd.setCollection(null); + createCmd.setCoreName("corewithnocollection2"); + + createNewSolrServer("", getBaseUrl((HttpSolrServer) clients.get(1))) + .request(createCmd); + + // in both cases, the collection should have default to the core name + cloudClient.getZkStateReader().updateClusterState(true); + assertTrue(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection")); + assertTrue(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection2")); + } private void testNodesUsedByCreate() throws Exception { // we can use this client because we just want base url From f54178970e5ab2d6a77ace8e45f735b2168e2c87 Mon Sep 17 00:00:00 2001 From: "Chris M. Hostetter" Date: Wed, 5 Mar 2014 18:05:25 +0000 Subject: [PATCH 09/38] LUCENE-5472: IndexWriter.addDocument will now throw an IllegalArgumentException if a Term to be indexed exceeds IndexWriter.MAX_TERM_LENGTH git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574595 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 7 + .../lucene/index/DocFieldProcessor.java | 5 - .../lucene/index/DocInverterPerField.java | 12 +- .../org/apache/lucene/index/IndexWriter.java | 7 +- .../lucene/index/TermsHashPerField.java | 11 +- .../lucene/index/TestExceedMaxTermLength.java | 105 ++++++++++++ .../apache/lucene/index/TestIndexWriter.java | 26 +-- solr/CHANGES.txt | 16 +- .../solr/collection1/conf/schema11.xml | 13 ++ .../solr/update/TestExceedMaxTermLength.java | 153 ++++++++++++++++++ 10 files changed, 326 insertions(+), 29 deletions(-) create mode 100644 lucene/core/src/test/org/apache/lucene/index/TestExceedMaxTermLength.java create mode 100644 solr/core/src/test/org/apache/solr/update/TestExceedMaxTermLength.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index cca653dc97a..5f4511a6d8c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -68,6 +68,13 @@ Optimizations ======================= Lucene 4.8.0 ======================= +Changes in Runtime Behavior + +* LUCENE-5472: IndexWriter.addDocument will now throw an IllegalArgumentException + if a Term to be indexed exceeds IndexWriter.MAX_TERM_LENGTH. To recreate previous + behavior of silently ignoring these terms, use LengthFilter in your Analyzer. + (hossman, Mike McCandless, Varun Thacker) + New Features * LUCENE-5454: Add SortedSetSortField to lucene/sandbox, to allow sorting diff --git a/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java index c35b914fc47..23c60a09c25 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -209,11 +209,6 @@ final class DocFieldProcessor extends DocConsumer { final DocFieldProcessorPerField perField = fields[i]; perField.consumer.processFields(perField.fields, perField.fieldCount); } - - if (docState.maxTermPrefix != null && docState.infoStream.isEnabled("IW")) { - docState.infoStream.message("IW", "WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'"); - docState.maxTermPrefix = null; - } } private DocFieldProcessorPerField processField(FieldInfos.Builder fieldInfos, diff --git a/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java b/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java index df21f211b24..39167f4c79c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java @@ -23,7 +23,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.index.FieldInfo.IndexOptions; -import org.apache.lucene.util.IOUtils; /** * Holds state for inverting all occurrences of a single @@ -182,6 +181,17 @@ final class DocInverterPerField extends DocFieldConsumerPerField { // when we come back around to the field... fieldState.position += posIncrAttribute.getPositionIncrement(); fieldState.offset += offsetAttribute.endOffset(); + + + if (docState.maxTermPrefix != null) { + final String msg = "Document contains at least one immense term in field=\"" + fieldInfo.name + "\" (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'"; + if (docState.infoStream.isEnabled("IW")) { + docState.infoStream.message("IW", "ERROR: " + msg); + } + docState.maxTermPrefix = null; + throw new IllegalArgumentException(msg); + } + /* if success was false above there is an exception coming through and we won't get here.*/ succeededInProcessingField = true; } finally { diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 0a792b8d1dd..ce54da7db6d 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -207,8 +207,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{ /** * Absolute hard maximum length for a term, in bytes once * encoded as UTF8. If a term arrives from the analyzer - * longer than this length, it is skipped and a message is - * printed to infoStream, if set (see {@link + * longer than this length, an + * IllegalArgumentException is thrown + * and a message is printed to infoStream, if set (see {@link * IndexWriterConfig#setInfoStream(InfoStream)}). */ public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8; @@ -1159,7 +1160,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{ * merge policy. * *

Note that each term in the document can be no longer - * than 16383 characters, otherwise an + * than {@link #MAX_TERM_LENGTH} in bytes, otherwise an * IllegalArgumentException will be thrown.

* *

Note that it's possible to create an invalid Unicode diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java b/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java index bb67d642c3b..aa4fcba2647 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java +++ b/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java @@ -179,12 +179,11 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { try { termID = bytesHash.add(termBytesRef, termAtt.fillBytesRef()); } catch (MaxBytesLengthExceededException e) { - // Not enough room in current block - // Just skip this term, to remain as robust as - // possible during indexing. A TokenFilter - // can be inserted into the analyzer chain if - // other behavior is wanted (pruning the term - // to a prefix, throwing an exception, etc). + // Term is too large; record this here (can't throw an + // exc because DocInverterPerField will then abort the + // entire segment) and then throw an exc later in + // DocInverterPerField.java. LengthFilter can always be + // used to prune the term before indexing: if (docState.maxTermPrefix == null) { final int saved = termBytesRef.length; try { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestExceedMaxTermLength.java b/lucene/core/src/test/org/apache/lucene/index/TestExceedMaxTermLength.java new file mode 100644 index 00000000000..ec8ea99d753 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/index/TestExceedMaxTermLength.java @@ -0,0 +1,105 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; +import java.io.IOException; + +import org.junit.Before; +import org.junit.After; + +/** + * Tests that a useful exception is thrown when attempting to index a term that is + * too large + * + * @see IndexWriter#MAX_TERM_LENGTH + */ +public class TestExceedMaxTermLength extends LuceneTestCase { + + private final static int minTestTermLength = IndexWriter.MAX_TERM_LENGTH + 1; + private final static int maxTestTermLegnth = IndexWriter.MAX_TERM_LENGTH * 2; + + Directory dir = null; + + @Before + public void createDir() { + dir = newDirectory(); + } + @After + public void destroyDir() throws IOException { + dir.close(); + dir = null; + } + + public void test() throws Exception { + + IndexWriter w = new IndexWriter + (dir, newIndexWriterConfig(random(), + TEST_VERSION_CURRENT, + new MockAnalyzer(random()))); + try { + final FieldType ft = new FieldType(); + ft.setIndexed(true); + ft.setStored(random().nextBoolean()); + ft.freeze(); + + final Document doc = new Document(); + if (random().nextBoolean()) { + // totally ok short field value + doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10), + TestUtil.randomSimpleString(random(), 1, 10), + ft)); + } + // problematic field + final String name = TestUtil.randomSimpleString(random(), 1, 50); + final String value = TestUtil.randomSimpleString(random(), + minTestTermLength, + maxTestTermLegnth); + final Field f = new Field(name, value, ft); + if (random().nextBoolean()) { + // totally ok short field value + doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10), + TestUtil.randomSimpleString(random(), 1, 10), + ft)); + } + doc.add(f); + + try { + w.addDocument(doc); + fail("Did not get an exception from adding a monster term"); + } catch (IllegalArgumentException e) { + final String maxLengthMsg = String.valueOf(IndexWriter.MAX_TERM_LENGTH); + final String msg = e.getMessage(); + assertTrue("IllegalArgumentException didn't mention 'immense term': " + msg, + msg.contains("immense term")); + assertTrue("IllegalArgumentException didn't mention max length ("+maxLengthMsg+"): " + msg, + msg.contains(maxLengthMsg)); + assertTrue("IllegalArgumentException didn't mention field name ("+name+"): " + msg, + msg.contains(name)); + } + } finally { + w.close(); + } + } +} diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index cf8a1ba5b16..3284dde2782 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -1660,32 +1660,32 @@ public class TestIndexWriter extends LuceneTestCase { // This contents produces a too-long term: String contents = "abc xyz x" + bigTerm + " another term"; doc.add(new TextField("content", contents, Field.Store.NO)); - w.addDocument(doc); + try { + w.addDocument(doc); + fail("should have hit exception"); + } catch (IllegalArgumentException iae) { + // expected + } // Make sure we can add another normal document doc = new Document(); doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO)); w.addDocument(doc); + // So we remove the deleted doc: + w.forceMerge(1); + IndexReader reader = w.getReader(); w.close(); // Make sure all terms < max size were indexed - assertEquals(2, reader.docFreq(new Term("content", "abc"))); + assertEquals(1, reader.docFreq(new Term("content", "abc"))); assertEquals(1, reader.docFreq(new Term("content", "bbb"))); - assertEquals(1, reader.docFreq(new Term("content", "term"))); - assertEquals(1, reader.docFreq(new Term("content", "another"))); + assertEquals(0, reader.docFreq(new Term("content", "term"))); - // Make sure position is still incremented when - // massive term is skipped: - DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, null, "content", new BytesRef("another")); - assertEquals(0, tps.nextDoc()); - assertEquals(1, tps.freq()); - assertEquals(3, tps.nextPosition()); - - // Make sure the doc that has the massive term is in + // Make sure the doc that has the massive term is NOT in // the index: - assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs()); + assertEquals("document with wicked long term is in the index!", 1, reader.numDocs()); reader.close(); dir.close(); diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index b9b572cc005..63d84a170c4 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -76,7 +76,16 @@ Velocity 1.7 and Velocity Tools 2.0 Apache UIMA 2.3.1 Apache ZooKeeper 3.4.5 - +Upgrading from Solr 4.7 +---------------------- + +* In previous versions of Solr, Terms that exceeded Lucene's MAX_TERM_LENGTH were + silently ignored when indexing documents. Begining with Solr 4.8, a document + an error will be generated when attempting to index a document with a term + that is too large. If you wish to continue to have large terms ignored, + use "solr.LengthFilterFactory" in all of your Analyzers. See LUCENE-5472 for + more details. + Detailed Change List ---------------------- @@ -154,6 +163,11 @@ Other Changes registration exists, wait a short time to see if it goes away. (Mark Miller) +* LUCENE-5472: IndexWriter.addDocument will now throw an IllegalArgumentException + if a Term to be indexed exceeds IndexWriter.MAX_TERM_LENGTH. To recreate previous + behavior of silently ignoring these terms, use LengthFilter in your Analyzer. + (hossman, Mike McCandless, Varun Thacker) + ================== 4.7.0 ================== Versions of Major Components diff --git a/solr/core/src/test-files/solr/collection1/conf/schema11.xml b/solr/core/src/test-files/solr/collection1/conf/schema11.xml index a993cbd6f61..ea4edd553aa 100755 --- a/solr/core/src/test-files/solr/collection1/conf/schema11.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema11.xml @@ -287,6 +287,16 @@ valued. --> class="solr.ExternalFileField"/> + + + + + + + + + + @@ -324,6 +334,9 @@ valued. --> + + + -

Provides index sorting capablities. The application can use one of the -pre-existing Sorter implementations, e.g. to sort by a -{@link org.apache.lucene.index.sorter.NumericDocValuesSorter} -or {@link org.apache.lucene.index.sorter.Sorter#REVERSE_DOCS reverse} the order -of the documents. Additionally, the application can implement a custom -{@link org.apache.lucene.index.sorter.Sorter} which returns a permutation on -a source {@link org.apache.lucene.index.AtomicReader}'s document IDs, to sort -the input documents by additional criteria. +

Provides index sorting capablities. The application can use any +Sort specification, e.g. to sort by fields using DocValues or FieldCache, or to +reverse the order of the documents (by using SortField.Type.DOC in reverse). +Multi-level sorts can be specified the same way you would when searching, by +building Sort from multiple SortFields.

{@link org.apache.lucene.index.sorter.SortingMergePolicy} can be used to make Lucene sort segments before merging them. This will ensure that every segment resulting from a merge will be sorted according to the provided -{@link org.apache.lucene.index.sorter.Sorter}. This however makes merging and +{@link org.apache.lucene.search.Sort}. This however makes merging and thus indexing slower.

Sorted segments allow for early query termination when the sort order From 58198c299cd88fecacf3bfdbfbfb7f4aef7694b7 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 6 Mar 2014 17:03:56 +0000 Subject: [PATCH 27/38] LUCENE-5493: fix precommit git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574962 13f79535-47bb-0310-9956-ffa450edef68 --- .../index/sorter/BlockJoinComparatorSource.java | 15 +++++++++++---- .../lucene/index/sorter/SortingAtomicReader.java | 10 +++++----- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java index c2a2a476b28..3029bcab656 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java +++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java @@ -31,7 +31,16 @@ import org.apache.lucene.util.FixedBitSet; /** * Helper class to sort readers that contain blocks of documents. + *

+ * Note that this currently has some limitations: + *

    + *
  • Cannot yet be used with IndexSearcher.searchAfter + *
  • Filling sort value fields is not yet supported. + *
+ * Its intended to be used with {@link SortingMergePolicy}. */ +// TODO: can/should we clean this thing up (e.g. return a proper sort value) +// and move to the join/ module? public class BlockJoinComparatorSource extends FieldComparatorSource { final Filter parentsFilter; final Sort parentSort; @@ -84,8 +93,8 @@ public class BlockJoinComparatorSource extends FieldComparatorSource { childComparators[i] = childFields[i].getComparator(1, i); } - // NOTE: not quite right i guess, really our sort "value" is more complex... - // but at the moment you really should only use this at indexing time. + // NOTE: we could return parent ID as value but really our sort "value" is more complex... + // So we throw UOE for now. At the moment you really should only use this at indexing time. return new FieldComparator() { int bottomParent; int bottomChild; @@ -171,7 +180,6 @@ public class BlockJoinComparatorSource extends FieldComparatorSource { int compare(int docID1, int parent1, int docID2, int parent2) throws IOException { if (parent1 == parent2) { // both are in the same block - // nocommit: should not be needed? if (docID1 == parent1 || docID2 == parent2) { // keep parents at the end of blocks return docID1 - docID2; @@ -180,7 +188,6 @@ public class BlockJoinComparatorSource extends FieldComparatorSource { } } else { int cmp = compare(parent1, parent2, parentComparators, parentReverseMul); - // nocommit: should not be needed? if (cmp == 0) { return parent1 - parent2; } else { diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java index 55693434459..1ecde39d9e2 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java +++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java @@ -49,13 +49,13 @@ import org.apache.lucene.util.automaton.CompiledAutomaton; /** * An {@link AtomicReader} which supports sorting documents by a given - * {@link Sorter}. You can use this class to sort an index as follows: + * {@link Sort}. You can use this class to sort an index as follows: * *
  * IndexWriter writer; // writer to which the sorted index will be added
  * DirectoryReader reader; // reader on the input index
- * Sorter sorter; // determines how the documents are sorted
- * AtomicReader sortingReader = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sorter);
+ * Sort sort; // determines how the documents are sorted
+ * AtomicReader sortingReader = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
  * writer.addIndexes(reader);
  * writer.close();
  * reader.close();
@@ -481,7 +481,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
   static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum {
     
     /**
-     * A {@link Sorter} which sorts two parallel arrays of doc IDs and
+     * A {@link TimSorter} which sorts two parallel arrays of doc IDs and
      * offsets in one go. Everytime a doc ID is 'swapped', its correponding offset
      * is swapped too.
      */
@@ -709,7 +709,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
   }
 
   /** Return a sorted view of reader according to the order
-   *  defined by sorter. If the reader is already sorted, this
+   *  defined by sort. If the reader is already sorted, this
    *  method might return the reader as-is. */
   public static AtomicReader wrap(AtomicReader reader, Sort sort) throws IOException {
     return wrap(reader, new Sorter(sort).sort(reader));

From 57569ed1aaa91bdf693bf4fea8e9ff7ae96d1b0e Mon Sep 17 00:00:00 2001
From: Michael McCandless 
Date: Thu, 6 Mar 2014 17:11:46 +0000
Subject: [PATCH 28/38] LUCENE-5493: don't do forceMerge on initital build of
 AnalyzingInfixSuggester

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574965 13f79535-47bb-0310-9956-ffa450edef68
---
 .../analyzing/AnalyzingInfixSuggester.java    |  75 +++-------
 .../analyzing/BlendedInfixSuggester.java      |  10 +-
 .../search/suggest/LookupBenchmarkTest.java   |   3 +-
 .../AnalyzingInfixSuggesterTest.java          | 131 +++---------------
 .../analyzing/BlendedInfixSuggesterTest.java  |  59 ++------
 5 files changed, 60 insertions(+), 218 deletions(-)

diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
index 1f72b2b0cbf..df3aa04b859 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
@@ -46,15 +46,12 @@ import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FilterAtomicReader;
-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.MultiDocValues;
 import org.apache.lucene.index.SegmentReader;
-import org.apache.lucene.index.SlowCompositeReaderWrapper;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.sorter.EarlyTerminatingSortingCollector;
-import org.apache.lucene.index.sorter.SortingAtomicReader;
 import org.apache.lucene.index.sorter.SortingMergePolicy;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
@@ -115,9 +112,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
   /** Analyzer used at index time */
   protected final Analyzer indexAnalyzer;
   final Version matchVersion;
-  private final File indexPath;
+  private final Directory dir;
   final int minPrefixChars;
-  private Directory dir;
 
   /** Used for ongoing NRT additions/updates. */
   private IndexWriter writer;
@@ -133,13 +129,15 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
   private static final Sort SORT = new Sort(new SortField("weight", SortField.Type.LONG, true));
 
   /** Create a new instance, loading from a previously built
-   *  directory, if it exists. */
-  public AnalyzingInfixSuggester(Version matchVersion, File indexPath, Analyzer analyzer) throws IOException {
-    this(matchVersion, indexPath, analyzer, analyzer, DEFAULT_MIN_PREFIX_CHARS);
+   *  directory, if it exists.  Note that {@link #close}
+   *  will also close the provided directory. */
+  public AnalyzingInfixSuggester(Version matchVersion, Directory dir, Analyzer analyzer) throws IOException {
+    this(matchVersion, dir, analyzer, analyzer, DEFAULT_MIN_PREFIX_CHARS);
   }
 
   /** Create a new instance, loading from a previously built
-   *  directory, if it exists.
+   *  directory, if it exists. Note that {@link #close}
+   *  will also close the provided directory.
    *
    *  @param minPrefixChars Minimum number of leading characters
    *     before PrefixQuery is used (default 4).
@@ -147,7 +145,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
    *     ngrams (increasing index size but making lookups
    *     faster).
    */
-  public AnalyzingInfixSuggester(Version matchVersion, File indexPath, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars) throws IOException {
+  public AnalyzingInfixSuggester(Version matchVersion, Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars) throws IOException {
 
     if (minPrefixChars < 0) {
       throw new IllegalArgumentException("minPrefixChars must be >= 0; got: " + minPrefixChars);
@@ -156,32 +154,29 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
     this.queryAnalyzer = queryAnalyzer;
     this.indexAnalyzer = indexAnalyzer;
     this.matchVersion = matchVersion;
-    this.indexPath = indexPath;
+    this.dir = dir;
     this.minPrefixChars = minPrefixChars;
-    dir = getDirectory(indexPath);
 
     if (DirectoryReader.indexExists(dir)) {
       // Already built; open it:
       writer = new IndexWriter(dir,
-                               getIndexWriterConfig(matchVersion, getGramAnalyzer(), SORT, IndexWriterConfig.OpenMode.APPEND));
+                               getIndexWriterConfig(matchVersion, getGramAnalyzer(), IndexWriterConfig.OpenMode.APPEND));
       searcherMgr = new SearcherManager(writer, true, null);
     }
   }
 
   /** Override this to customize index settings, e.g. which
-   *  codec to use. The sort is null if this config is for
-   *  the first pass writer. */
-  protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, Sort sort, IndexWriterConfig.OpenMode openMode) {
+   *  codec to use. */
+  protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, IndexWriterConfig.OpenMode openMode) {
     IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer);
     iwc.setCodec(new Lucene46Codec());
     iwc.setOpenMode(openMode);
 
-    if (sort != null) {
-      // This way all merged segments will be sorted at
-      // merge time, allow for per-segment early termination
-      // when those segments are searched:
-      iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), sort));
-    }
+    // This way all merged segments will be sorted at
+    // merge time, allow for per-segment early termination
+    // when those segments are searched:
+    iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), SORT));
+
     return iwc;
   }
 
@@ -204,16 +199,13 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
       writer = null;
     }
 
-    Directory dirTmp = getDirectory(new File(indexPath.toString() + ".tmp"));
-
-    IndexWriter w = null;
     AtomicReader r = null;
     boolean success = false;
     try {
       // First pass: build a temporary normal Lucene index,
       // just indexing the suggestions as they iterate:
-      w = new IndexWriter(dirTmp,
-                          getIndexWriterConfig(matchVersion, getGramAnalyzer(), null, IndexWriterConfig.OpenMode.CREATE));
+      writer = new IndexWriter(dir,
+                               getIndexWriterConfig(matchVersion, getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
       BytesRef text;
       Document doc = new Document();
       FieldType ft = getTextFieldType();
@@ -251,35 +243,17 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
         if (iter.hasPayloads()) {
           payloadField.setBytesValue(iter.payload());
         }
-        w.addDocument(doc);
+        writer.addDocument(doc);
       }
       //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
 
-      // Second pass: sort the entire index:
-      r = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(w, false));
-      //long t1 = System.nanoTime();
-
-      // We can rollback the first pass, now that have have
-      // the reader open, because we will discard it anyway
-      // (no sense in fsync'ing it):
-      w.rollback();
-
-      r = SortingAtomicReader.wrap(r, SORT);
-      
-      writer = new IndexWriter(dir,
-                               getIndexWriterConfig(matchVersion, getGramAnalyzer(), SORT, IndexWriterConfig.OpenMode.CREATE));
-      writer.addIndexes(new IndexReader[] {r});
-      r.close();
-
-      //System.out.println("sort time: " + ((System.nanoTime()-t1)/1000000) + " msec");
-
       searcherMgr = new SearcherManager(writer, true, null);
       success = true;
     } finally {
       if (success) {
-        IOUtils.close(w, r, dirTmp);
+        IOUtils.close(r);
       } else {
-        IOUtils.closeWhileHandlingException(w, writer, r, dirTmp);
+        IOUtils.closeWhileHandlingException(writer, r);
         writer = null;
       }
     }
@@ -638,11 +612,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
     }
     if (writer != null) {
       writer.close();
-      writer = null;
-    }
-    if (dir != null) {
       dir.close();
-      dir = null;
+      writer = null;
     }
   }
 
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
index 02281069a93..46df98c648e 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
@@ -17,7 +17,6 @@ package org.apache.lucene.search.suggest.analyzing;
  * limitations under the License.
  */
 
-import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Comparator;
@@ -38,6 +37,7 @@ import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.TopFieldDocs;
 import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.Version;
 
@@ -92,8 +92,8 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester {
    * Create a new instance, loading from a previously built
    * directory, if it exists.
    */
-  public BlendedInfixSuggester(Version matchVersion, File indexPath, Analyzer analyzer) throws IOException {
-    super(matchVersion, indexPath, analyzer);
+  public BlendedInfixSuggester(Version matchVersion, Directory dir, Analyzer analyzer) throws IOException {
+    super(matchVersion, dir, analyzer);
     this.blenderType = BlenderType.POSITION_LINEAR;
     this.numFactor = DEFAULT_NUM_FACTOR;
   }
@@ -106,9 +106,9 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester {
    * @param numFactor   Factor to multiply the number of searched elements before ponderate
    * @throws IOException If there are problems opening the underlying Lucene index.
    */
-  public BlendedInfixSuggester(Version matchVersion, File indexPath, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
+  public BlendedInfixSuggester(Version matchVersion, Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
                                int minPrefixChars, BlenderType blenderType, int numFactor) throws IOException {
-    super(matchVersion, indexPath, indexAnalyzer, queryAnalyzer, minPrefixChars);
+    super(matchVersion, dir, indexAnalyzer, queryAnalyzer, minPrefixChars);
     this.blenderType = blenderType;
     this.numFactor = numFactor;
   }
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
index 16ee899ac09..b2471ef6c5c 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
@@ -40,6 +40,7 @@ import org.apache.lucene.search.suggest.fst.FSTCompletionLookup;
 import org.apache.lucene.search.suggest.fst.WFSTCompletionLookup;
 import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
 import org.apache.lucene.search.suggest.tst.TSTLookup;
+import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.*;
 import org.junit.BeforeClass;
 import org.junit.Ignore;
@@ -161,7 +162,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
     } catch (InstantiationException e) {
       Analyzer a = new MockAnalyzer(random, MockTokenizer.KEYWORD, false);
       if (cls == AnalyzingInfixSuggester.class) {
-        lookup = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, TestUtil.getTempDir("LookupBenchmarkTest"), a);
+        lookup = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, FSDirectory.open(TestUtil.getTempDir("LookupBenchmarkTest")), a);
       } else {
         Constructor ctor = cls.getConstructor(Analyzer.class);
         lookup = ctor.newInstance(a);
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
index e85713864a4..147ee3b1b61 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
@@ -21,7 +21,6 @@ import java.io.File;
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashSet;
@@ -39,7 +38,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.search.suggest.Input;
 import org.apache.lucene.search.suggest.InputArrayIterator;
 import org.apache.lucene.search.suggest.Lookup.LookupResult;
-import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 import org.apache.lucene.util.LuceneTestCase;
@@ -55,15 +53,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
       new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
     };
 
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
-
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
     suggester.build(new InputArrayIterator(keys));
 
     List results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
@@ -106,22 +97,12 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
     File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
 
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newFSDirectory(path);
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, 3);
     suggester.build(new InputArrayIterator(keys));
     assertEquals(2, suggester.getCount());
     suggester.close();
 
-    suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newFSDirectory(path);
-        }
-      };
+    suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, 3);
     List results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
     assertEquals(2, results.size());
     assertEquals("a penny saved is a penny earned", results.get(0).key);
@@ -159,15 +140,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
       new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
     };
 
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
-
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3) {
         @Override
         protected Object highlight(String text, Set matchedTokens, String prefixToken) throws IOException {
           try (TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text))) {
@@ -239,17 +213,11 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
       new Input("lend me your ear", 8, new BytesRef("foobar")),
       new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
     };
-
     File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
 
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
     int minPrefixLength = random().nextInt(10);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixLength) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newFSDirectory(path);
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixLength);
     suggester.build(new InputArrayIterator(keys));
 
     for(int i=0;i<2;i++) {
@@ -306,12 +274,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
 
       // Make sure things still work after close and reopen:
       suggester.close();
-      suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixLength) {
-          @Override
-          protected Directory getDirectory(File path) {
-            return newFSDirectory(path);
-          }
-        };
+      suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixLength);
     }
     suggester.close();
   }
@@ -321,15 +284,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
       new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
     };
 
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
-
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
     suggester.build(new InputArrayIterator(keys));
     List results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
     assertEquals(1, results.size());
@@ -342,15 +298,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
       new Input("a Penny saved is a penny earned", 10, new BytesRef("foobaz")),
     };
 
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
-
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
     suggester.build(new InputArrayIterator(keys));
     List results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
     assertEquals(1, results.size());
@@ -359,18 +308,13 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
 
     // Try again, but overriding addPrefixMatch to highlight
     // the entire hit:
-    suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
+    suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3) {
         @Override
         protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) {
           sb.append("");
           sb.append(surface);
           sb.append("");
         }
-
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
       };
     suggester.build(new InputArrayIterator(keys));
     results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
@@ -384,15 +328,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
       new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
     };
 
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
-
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
     suggester.build(new InputArrayIterator(keys));
     suggester.close();
     suggester.close();
@@ -418,14 +355,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
         }
       };
 
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
-
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, indexAnalyzer, queryAnalyzer, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), indexAnalyzer, queryAnalyzer, 3);
 
     Input keys[] = new Input[] {
       new Input("a bob for apples", 10, new BytesRef("foobaz")),
@@ -439,14 +369,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
   }
 
   public void testEmptyAtStart() throws Exception {
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
     suggester.build(new InputArrayIterator(new Input[0]));
     suggester.add(new BytesRef("a penny saved is a penny earned"), 10, new BytesRef("foobaz"));
     suggester.add(new BytesRef("lend me your ear"), 8, new BytesRef("foobar"));
@@ -483,14 +407,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
   }
 
   public void testBothExactAndPrefix() throws Exception {
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
     suggester.build(new InputArrayIterator(new Input[0]));
     suggester.add(new BytesRef("the pen is pretty"), 10, new BytesRef("foobaz"));
     suggester.refresh();
@@ -563,12 +481,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
       System.out.println("  minPrefixChars=" + minPrefixChars);
     }
 
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixChars) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newFSDirectory(path);
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixChars);
 
     // Initial suggester built with nothing:
     suggester.build(new InputArrayIterator(new Input[0]));
@@ -648,12 +561,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
         }
         lookupThread.finish();
         suggester.close();
-        suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixChars) {
-            @Override
-            protected Directory getDirectory(File path) {
-              return newFSDirectory(path);
-            }
-          };
+        suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixChars);
         lookupThread = new LookupThread(suggester);
         lookupThread.start();
 
@@ -824,15 +732,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
       new Input("lend me your ear", 8, new BytesRef("foobar")),
     };
 
-    File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
-
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
-        @Override
-        protected Directory getDirectory(File path) {
-          return newDirectory();
-        }
-      };
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
     suggester.build(new InputArrayIterator(keys));
 
     List results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
index 71ac3df23d0..89c9629a675 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
@@ -23,7 +23,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.search.suggest.Input;
 import org.apache.lucene.search.suggest.InputArrayIterator;
 import org.apache.lucene.search.suggest.Lookup;
-import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
@@ -49,15 +48,10 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
     File tempDir = TestUtil.getTempDir("BlendedInfixSuggesterTest");
 
     Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
-    BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
-        AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
-        BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
-        BlendedInfixSuggester.DEFAULT_NUM_FACTOR) {
-      @Override
-      protected Directory getDirectory(File path) {
-        return newFSDirectory(path);
-      }
-    };
+    BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
+                                                                AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
+                                                                BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
+                                                                BlendedInfixSuggester.DEFAULT_NUM_FACTOR);
     suggester.build(new InputArrayIterator(keys));
 
     // we query for star wars and check that the weight
@@ -94,12 +88,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
     Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
 
     // BlenderType.LINEAR is used by default (remove position*10%)
-    BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a) {
-      @Override
-      protected Directory getDirectory(File path) {
-        return newFSDirectory(path);
-      }
-    };
+    BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a);
     suggester.build(new InputArrayIterator(keys));
 
     assertEquals(w, getInResults(suggester, "top", pl, 1));
@@ -109,13 +98,8 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
     suggester.close();
 
     // BlenderType.RECIPROCAL is using 1/(1+p) * w where w is weight and p the position of the word
-    suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
-        AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1) {
-      @Override
-      protected Directory getDirectory(File path) {
-        return newFSDirectory(path);
-      }
-    };
+    suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
+                                          AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1);
     suggester.build(new InputArrayIterator(keys));
 
     assertEquals(w, getInResults(suggester, "top", pl, 1));
@@ -145,13 +129,8 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
     Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
 
     // if factor is small, we don't get the expected element
-    BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
-        AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1) {
-      @Override
-      protected Directory getDirectory(File path) {
-        return newFSDirectory(path);
-      }
-    };
+    BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
+                                                                AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1);
 
     suggester.build(new InputArrayIterator(keys));
 
@@ -169,13 +148,8 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
     suggester.close();
 
     // if we increase the factor we have it
-    suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
-        AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 2) {
-      @Override
-      protected Directory getDirectory(File path) {
-        return newFSDirectory(path);
-      }
-    };
+    suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
+                                          AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 2);
     suggester.build(new InputArrayIterator(keys));
 
     // we have it
@@ -205,14 +179,9 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
     Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
 
     // if factor is small, we don't get the expected element
-    BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
-        AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL,
-        BlendedInfixSuggester.DEFAULT_NUM_FACTOR) {
-      @Override
-      protected Directory getDirectory(File path) {
-        return newFSDirectory(path);
-      }
-    };
+    BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
+                                                                AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL,
+                                                                BlendedInfixSuggester.DEFAULT_NUM_FACTOR);
     suggester.build(new InputArrayIterator(keys));
 
 

From cee0e37635f042286f555e660caa417f8186e48b Mon Sep 17 00:00:00 2001
From: Michael McCandless 
Date: Thu, 6 Mar 2014 17:20:45 +0000
Subject: [PATCH 29/38] LUCENE-5493: fix solr

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574969 13f79535-47bb-0310-9956-ffa450edef68
---
 .../spelling/suggest/fst/AnalyzingInfixLookupFactory.java  | 4 +++-
 .../spelling/suggest/fst/BlendedInfixLookupFactory.java    | 7 +++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java
index f09c089d743..a11d6d22361 100644
--- a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java
+++ b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java
@@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.search.suggest.Lookup;
 import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
 import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester;
+import org.apache.lucene.store.FSDirectory;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.schema.FieldType;
@@ -90,7 +91,8 @@ public class AnalyzingInfixLookupFactory extends LookupFactory {
 
     try {
       return new AnalyzingInfixSuggester(core.getSolrConfig().luceneMatchVersion, 
-          new File(indexPath), indexAnalyzer, queryAnalyzer, minPrefixChars);
+                                         FSDirectory.open(new File(indexPath)), indexAnalyzer,
+                                         queryAnalyzer, minPrefixChars);
     } catch (IOException e) {
       throw new RuntimeException();
     }
diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/BlendedInfixLookupFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/BlendedInfixLookupFactory.java
index 1662913c694..7c20b5645a5 100644
--- a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/BlendedInfixLookupFactory.java
+++ b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/BlendedInfixLookupFactory.java
@@ -23,8 +23,9 @@ import java.io.IOException;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.search.suggest.Lookup;
 import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
-import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester;
 import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester.BlenderType;
+import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester;
+import org.apache.lucene.store.FSDirectory;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.schema.FieldType;
@@ -94,7 +95,9 @@ public class BlendedInfixLookupFactory extends AnalyzingInfixLookupFactory {
     
     try {
       return new BlendedInfixSuggester(core.getSolrConfig().luceneMatchVersion, 
-          new File(indexPath), indexAnalyzer, queryAnalyzer, minPrefixChars, blenderType, numFactor);
+                                       FSDirectory.open(new File(indexPath)),
+                                       indexAnalyzer, queryAnalyzer, minPrefixChars,
+                                       blenderType, numFactor);
     } catch (IOException e) {
       throw new RuntimeException();
     }

From 740034cdc182974779dab1201a3f4abf1700f339 Mon Sep 17 00:00:00 2001
From: Robert Muir 
Date: Thu, 6 Mar 2014 17:27:19 +0000
Subject: [PATCH 30/38] LUCENE-5493: add CHANGES

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574972 13f79535-47bb-0310-9956-ffa450edef68
---
 lucene/CHANGES.txt | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 5f4511a6d8c..7e2fc66352e 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -99,6 +99,10 @@ New Features
 * LUCENE-5224: Add iconv, oconv, and ignore support to HunspellStemFilter.
   (Robert Muir)
 
+* LUCENE-5493: SortingMergePolicy, and EarlyTerminatingSortingCollector
+  support arbitrary Sort specifications.  
+  (Robert Muir, Mike McCandless, Adrien Grand)
+
 API Changes
 
 * LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues
@@ -106,6 +110,12 @@ API Changes
 
 * LUCENE-5468: Move offline Sort (from suggest module) to OfflineSort. (Robert Muir)
 
+* LUCENE-5493: SortingMergePolicy and EarlyTerminatingSortingCollector take
+  Sort instead of Sorter. BlockJoinSorter is removed, replaced with 
+  BlockJoinComparatorSource, which can take a Sort for ordering of parents
+  and a separate Sort for ordering of children within a block. 
+  (Robert Muir, Mike McCandless, Adrien Grand)
+
 Optimizations
 
 * LUCENE-5468: HunspellStemFilter uses 10 to 100x less RAM. It also loads

From 6890323868b0e615f46d913ae988940cb0163096 Mon Sep 17 00:00:00 2001
From: Robert Muir 
Date: Thu, 6 Mar 2014 19:04:40 +0000
Subject: [PATCH 31/38] LUCENE-5493: javadocs cleanups

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1575008 13f79535-47bb-0310-9956-ffa450edef68
---
 .../sorter/BlockJoinComparatorSource.java     | 13 +++++----
 .../EarlyTerminatingSortingCollector.java     | 28 ++++++++++---------
 .../index/sorter/SortingMergePolicy.java      | 19 +++++++------
 3 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
index 3029bcab656..6d5ff0bdd89 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java
@@ -24,6 +24,9 @@ import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.FieldComparator;
 import org.apache.lucene.search.FieldComparatorSource;
 import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.IndexSearcher; // javadocs
+import org.apache.lucene.search.Query; // javadocs
+import org.apache.lucene.search.ScoreDoc; // javadocs
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
@@ -32,12 +35,12 @@ import org.apache.lucene.util.FixedBitSet;
 /**
  * Helper class to sort readers that contain blocks of documents.
  * 

- * Note that this currently has some limitations: + * Note that this class is intended to used with {@link SortingMergePolicy}, + * and for other purposes has some limitations: *

    - *
  • Cannot yet be used with IndexSearcher.searchAfter - *
  • Filling sort value fields is not yet supported. + *
  • Cannot yet be used with {@link IndexSearcher#searchAfter(ScoreDoc, Query, int, Sort) IndexSearcher.searchAfter} + *
  • Filling sort field values is not yet supported. *
- * Its intended to be used with {@link SortingMergePolicy}. */ // TODO: can/should we clean this thing up (e.g. return a proper sort value) // and move to the join/ module? @@ -160,7 +163,7 @@ public class BlockJoinComparatorSource extends FieldComparatorSource { @Override public Integer value(int slot) { // really our sort "value" is more complex... - throw new UnsupportedOperationException(); + throw new UnsupportedOperationException("filling sort field values is not yet supported"); } @Override diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java index fa032edc462..23772e18f23 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java +++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java @@ -34,41 +34,43 @@ import org.apache.lucene.search.TotalHitCountCollector; * {@link Sort}. * *

- * NOTE: the {@link Collector} detects sorted segments according to + * NOTE: the {@code Collector} detects sorted segments according to * {@link SortingMergePolicy}, so it's best used in conjunction with it. Also, - * it collects up to a specified num docs from each segment, and therefore is - * mostly suitable for use in conjunction with collectors such as + * it collects up to a specified {@code numDocsToCollect} from each segment, + * and therefore is mostly suitable for use in conjunction with collectors such as * {@link TopDocsCollector}, and not e.g. {@link TotalHitCountCollector}. *

- * NOTE: If you wrap a {@link TopDocsCollector} that sorts in the same - * order as the index order, the returned {@link TopDocsCollector#topDocs()} + * NOTE: If you wrap a {@code TopDocsCollector} that sorts in the same + * order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs} * will be correct. However the total of {@link TopDocsCollector#getTotalHits() * hit count} will be underestimated since not all matching documents will have * been collected. *

- * NOTE: This {@link Collector} uses {@link Sort#toString()} to detect - * whether a segment was sorted with the same {@link Sort} as the one given in - * {@link #EarlyTerminatingSortingCollector(Collector, Sort, int)}. This has + * NOTE: This {@code Collector} uses {@link Sort#toString()} to detect + * whether a segment was sorted with the same {@code Sort}. This has * two implications: *

    *
  • if a custom comparator is not implemented correctly and returns * different identifiers for equivalent instances, this collector will not * detect sorted segments,
  • *
  • if you suddenly change the {@link IndexWriter}'s - * {@link SortingMergePolicy} to sort according to another criterion and if both - * the old and the new {@link Sort}s have the same identifier, this - * {@link Collector} will incorrectly detect sorted segments.
  • + * {@code SortingMergePolicy} to sort according to another criterion and if both + * the old and the new {@code Sort}s have the same identifier, this + * {@code Collector} will incorrectly detect sorted segments. *
* * @lucene.experimental */ public class EarlyTerminatingSortingCollector extends Collector { - + /** The wrapped Collector */ protected final Collector in; + /** Sort used to sort the search results */ protected final Sort sort; + /** Number of documents to collect in each segment */ protected final int numDocsToCollect; - + /** Number of documents to collect in the current segment being processed */ protected int segmentTotalCollect; + /** True if the current segment being processed is sorted by {@link #sort} */ protected boolean segmentSorted; private int numCollected; diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java index 58263407e5d..8b11b689fd9 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java +++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java @@ -22,6 +22,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import org.apache.lucene.analysis.Analyzer; // javadocs import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; @@ -42,14 +43,14 @@ import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; * before merging them. As a consequence, all segments resulting from a merge * will be sorted while segments resulting from a flush will be in the order * in which documents have been added. - *

NOTE: Never use this {@link MergePolicy} if you rely on - * {@link IndexWriter#addDocuments(Iterable, org.apache.lucene.analysis.Analyzer)} + *

NOTE: Never use this policy if you rely on + * {@link IndexWriter#addDocuments(Iterable, Analyzer) IndexWriter.addDocuments} * to have sequentially-assigned doc IDs, this policy will scatter doc IDs. - *

NOTE: This {@link MergePolicy} should only be used with idempotent - * {@link Sort}s so that the order of segments is predictable. For example, - * using {@link SortingMergePolicy} with {@link Sort#INDEXORDER in reverse} (which is - * not idempotent) will make the order of documents in a segment depend on the - * number of times the segment has been merged. + *

NOTE: This policy should only be used with idempotent {@code Sort}s + * so that the order of segments is predictable. For example, using + * {@link Sort#INDEXORDER} in reverse (which is not idempotent) will make + * the order of documents in a segment depend on the number of times the segment + * has been merged. * @lucene.experimental */ public final class SortingMergePolicy extends MergePolicy { @@ -148,7 +149,7 @@ public final class SortingMergePolicy extends MergePolicy { } - /** Returns true if the given reader is sorted by the given sort. */ + /** Returns {@code true} if the given {@code reader} is sorted by the specified {@code sort}. */ public static boolean isSorted(AtomicReader reader, Sort sort) { if (reader instanceof SegmentReader) { final SegmentReader segReader = (SegmentReader) reader; @@ -175,7 +176,7 @@ public final class SortingMergePolicy extends MergePolicy { final Sorter sorter; final Sort sort; - /** Create a new {@link MergePolicy} that sorts documents with sort. */ + /** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */ public SortingMergePolicy(MergePolicy in, Sort sort) { this.in = in; this.sorter = new Sorter(sort); From c16165e760969d5c4571551641128e191f3f7357 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 6 Mar 2014 19:15:25 +0000 Subject: [PATCH 32/38] LUCENE-5493: add missing experimental tag git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1575017 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/sorter/BlockJoinComparatorSource.java | 1 + 1 file changed, 1 insertion(+) diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java index 6d5ff0bdd89..af91463b297 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java +++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/BlockJoinComparatorSource.java @@ -41,6 +41,7 @@ import org.apache.lucene.util.FixedBitSet; *

  • Cannot yet be used with {@link IndexSearcher#searchAfter(ScoreDoc, Query, int, Sort) IndexSearcher.searchAfter} *
  • Filling sort field values is not yet supported. * + * @lucene.experimental */ // TODO: can/should we clean this thing up (e.g. return a proper sort value) // and move to the join/ module? From 7f695434f44ff2718b7f85a1fd88ae848d766a4c Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 6 Mar 2014 19:53:10 +0000 Subject: [PATCH 33/38] disable slow solr tests in smoketester git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1575024 13f79535-47bb-0310-9956-ffa450edef68 --- dev-tools/scripts/smokeTestRelease.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-tools/scripts/smokeTestRelease.py b/dev-tools/scripts/smokeTestRelease.py index b1a5953232c..c56f696b3c5 100644 --- a/dev-tools/scripts/smokeTestRelease.py +++ b/dev-tools/scripts/smokeTestRelease.py @@ -731,7 +731,7 @@ def verifyUnpacked(project, artifact, unpackPath, svnRevision, version, testArgs os.chdir('solr') print(" run tests w/ Java 7 and testArgs='%s'..." % testArgs) - run('%s; ant clean test %s' % (javaExe('1.7'), testArgs), '%s/test.log' % unpackPath) + run('%s; ant clean test -Dtests.slow=false %s' % (javaExe('1.7'), testArgs), '%s/test.log' % unpackPath) # test javadocs print(' generate javadocs w/ Java 7...') From 1e02da264569cb09ceefdc1bdececc2c61e75673 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 7 Mar 2014 02:46:49 +0000 Subject: [PATCH 34/38] add thunderbird version of TestAllDictionaries git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1575126 13f79535-47bb-0310-9956-ffa450edef68 --- .../hunspell/TestAllDictionaries2.java | 219 ++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java new file mode 100644 index 00000000000..d0a83561802 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java @@ -0,0 +1,219 @@ +package org.apache.lucene.analysis.hunspell; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.InputStream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; + +import org.apache.lucene.analysis.hunspell.Dictionary; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.RamUsageEstimator; +import org.junit.Ignore; + +/** + * These thunderbird dictionaries can be retrieved via: + * https://addons.mozilla.org/en-US/thunderbird/language-tools/ + * You must click and download every file: sorry! + */ +@Ignore("enable manually") +public class TestAllDictionaries2 extends LuceneTestCase { + + // set this to the location of where you downloaded all the files + static final File DICTIONARY_HOME = + new File("/data/thunderbirdDicts"); + + final String tests[] = { + /* zip file */ /* dictionary */ /* affix */ + "addon-0.4.5-an+fx+tb+fn+sm.xpi", "dictionaries/ru.dic", "dictionaries/ru.aff", + "addon-0.5.5-fx+tb.xpi", "dictionaries/ko-KR.dic", "dictionaries/ko-KR.aff", + "afrikaans_spell_checker-20110323-fx+tb+fn+sm.xpi", "dictionaries/af-ZA.dic", "dictionaries/af-ZA.aff", + "albanisches_worterbuch-1.6.9-fx+tb+sm+fn.xpi", "dictionaries/sq.dic", "dictionaries/sq.aff", + "amharic_spell_checker-0.4-fx+fn+tb+sm.xpi", "dictionaries/am_ET.dic", "dictionaries/am_ET.aff", +//BUG! "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi", "dictionaries/ar.dic", "dictionaries/ar.aff", +//BUG! "armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi", "dictionaries/hy_AM.dic", "dictionaries/hy_AM.aff", + "azerbaijani_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/az-Latn-AZ.dic", "dictionaries/az-Latn-AZ.aff", + "belarusian_classic_dictionary-0.1.2-tb+fx+sm.xpi", "dictionaries/be-classic.dic", "dictionaries/be-classic.aff", + "belarusian_dictionary-0.1.2-fx+sm+tb.xpi", "dictionaries/be.dic", "dictionaries/be.aff", + "bengali_bangladesh_dictionary-0.08-sm+tb+fx.xpi", "dictionaries/bn-BD.dic", "dictionaries/bn-BD.aff", + "brazilian_portuguese_dictionary_former_spelling-28.20140203-tb+sm+fx.xpi", "dictionaries/pt-BR-antigo.dic", "dictionaries/pt-BR-antigo.aff", + "brazilian_portuguese_dictionary_new_spelling-28.20140203-fx+sm+tb.xpi", "dictionaries/pt-BR.dic", "dictionaries/pt-BR.aff", + "british_english_dictionary_updated-1.19.5-sm+fx+tb.xpi", "dictionaries/en-GB.dic", "dictionaries/en-GB.aff", + "bulgarian_dictionary-4.3-fx+tb+sm.xpi", "dictionaries/bg.dic", "dictionaries/bg.aff", + "canadian_english_dictionary-2.0.8-fx+sm+tb.xpi", "dictionaries/en-CA.dic", "dictionaries/en-CA.aff", + "ceske_slovniky_pro_kontrolu_pravopisu-1.0.4-tb+sm+fx.xpi", "dictionaries/cs.dic", "dictionaries/cs.aff", + "chichewa_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/ny_MW.dic", "dictionaries/ny_MW.aff", + "corrector_de_galego-13.10.0-fn+sm+tb+fx.xpi", "dictionaries/gl_ES.dic", "dictionaries/gl_ES.aff", + "corrector_orthographic_de_interlingua-6.0-fn+sm+tb+fx.xpi", "dictionaries/ia-ia.dic", "dictionaries/ia-ia.aff", + "corrector_ortografico_aragones-0.2-fx+tb+sm.xpi", "dictionaries/an_ES.dic", "dictionaries/an_ES.aff", + "croatian_dictionary_-_hrvatski_rjecnik-1.0.1-firefox+thunderbird+seamonkey.xpi", "dictionaries/hr.dic", "dictionaries/hr.aff", + "croatian_dictionary_hrvatski_rjecnik-1.0.9-an+fx+fn+tb+sm.xpi", "dictionaries/hr-HR.dic", "dictionaries/hr-HR.aff", + "dansk_ordbog_til_stavekontrollen-2.2.1-sm+tb+fx.xpi", "dictionaries/da.dic", "dictionaries/da.aff", + "deutsches_worterbuch_de_de_alte_rechtschreibung-2.1.8-sm.xpi", "dictionaries/de-DE-1901.dic", "dictionaries/de-DE-1901.aff", + "diccionario_de_espanolespana-1.7-sm+tb+fn+fx.xpi", "dictionaries/es-ES.dic", "dictionaries/es-ES.aff", + "diccionario_en_espanol_para_venezuela-1.1.17-sm+an+tb+fn+fx.xpi", "dictionaries/es_VE.dic", "dictionaries/es_VE.aff", + "diccionario_espanol_argentina-2.5.1-tb+fx+sm.xpi", "dictionaries/es_AR.dic", "dictionaries/es_AR.aff", + "diccionario_espanol_mexico-1.1.3-fn+tb+fx+sm.xpi", "dictionaries/es_MX.dic", "dictionaries/es_MX.aff", + "diccionario_ortografico_valenciano-2.2.0-fx+tb+fn+sm.xpi", "dictionaries/roa-ES-val.dic", "dictionaries/roa-ES-val.aff", +//BUG! "diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi", "dictionaries/Papiamento.dic", "dictionaries/Papiamento.aff", + "dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic-reform.dic", "dictionaries/fr-classic-reform.aff", + "dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic.dic", "dictionaries/fr-classic.aff", + "dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-modern.dic", "dictionaries/fr-modern.aff", + "dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-reform.dic", "dictionaries/fr-reform.aff", + "difazier_an_drouizig-0.12-tb+sm+fx.xpi", "dictionaries/br.dic", "dictionaries/br.aff", +//BUG! "dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi", "dictionaries/Papiamentu.dic", "dictionaries/Papiamentu.aff", + "dizionari_furlan-3.1-tb+fx+sm.xpi", "dictionaries/fur-IT.dic", "dictionaries/fur-IT.aff", + "dizionario_italiano-3.3.2-fx+sm+tb.xpi", "dictionaries/it_IT.dic", "dictionaries/it_IT.aff", + "eesti_keele_speller-3.2-fx+tb+sm.xpi", "dictionaries/et-EE.dic", "dictionaries/et-EE.aff", + "english_australian_dictionary-2.1.2-tb+fx+sm.xpi", "dictionaries/en-AU.dic", "dictionaries/en-AU.aff", + "esperanta_vortaro-1.0.2-fx+tb+sm.xpi", "dictionaries/eo-EO.dic", "dictionaries/eo-EO.aff", + "european_portuguese_spellchecker-14.1.1.1-tb+fx.xpi", "dictionaries/pt-PT.dic", "dictionaries/pt-PT.aff", + "faroese_spell_checker_faroe_islands-2.0-tb+sm+fx+fn.xpi", "dictionaries/fo_FO.dic", "dictionaries/fo_FO.aff", + "frysk_wurdboek-2.1.1-fn+sm+fx+an+tb.xpi", "dictionaries/fy.dic", "dictionaries/fy.aff", + "geiriadur_cymraeg-1.08-tb+sm+fx.xpi", "dictionaries/cy_GB.dic", "dictionaries/cy_GB.aff", + "general_catalan_dictionary-2.5.0-tb+sm+fn+fx.xpi", "dictionaries/ca.dic", "dictionaries/ca.aff", + "german_dictionary-2.0.3-fn+fx+sm+tb.xpi", "dictionaries/de-DE.dic", "dictionaries/de-DE.aff", + "german_dictionary_de_at_new_orthography-20130905-tb+fn+an+fx+sm.xpi", "dictionaries/de-AT.dic", "dictionaries/de-AT.aff", + "german_dictionary_de_ch_new_orthography-20130905-fx+tb+fn+sm+an.xpi", "dictionaries/de-CH.dic", "dictionaries/de-CH.aff", + "german_dictionary_de_de_new_orthography-20130905-tb+sm+an+fn+fx.xpi", "dictionaries/de-DE.dic", "dictionaries/de-DE.aff", + "german_dictionary_extended_for_austria-2.0.3-fx+fn+sm+tb.xpi", "dictionaries/de-AT.dic", "dictionaries/de-AT.aff", + "german_dictionary_switzerland-2.0.3-sm+fx+tb+fn.xpi", "dictionaries/de-CH.dic", "dictionaries/de-CH.aff", + "greek_spelling_dictionary-0.8.5-fx+tb+sm.xpi", "dictionaries/el-GR.dic", "dictionaries/el-GR.aff", + "gujarati_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/gu_IN.dic", "dictionaries/gu_IN.aff", + "haitian_creole_spell_checker-0.08-tb+sm+fx.xpi", "dictionaries/ht-HT.dic", "dictionaries/ht-HT.aff", + "hausa_spelling_dictionary-0.2-tb+fx.xpi", "dictionaries/ha-GH.dic", "dictionaries/ha-GH.aff", + "hebrew_spell_checking_dictionary_from_hspell-1.2.0.1-fx+sm+tb.xpi", "dictionaries/he.dic", "dictionaries/he.aff", + "hindi_spell_checker-0.4-fx+tb+sm+sb+fn.xpi", "dictionaries/hi_IN.dic", "dictionaries/hi_IN.aff", +//BUG! "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi", "dictionaries/hu_HU.dic", "dictionaries/hu_HU.aff", +//BUG! "icelandic_dictionary-1.3-fx+tb+sm.xpi", "dictionaries/is.dic", "dictionaries/is.aff", + "kamus_pengecek_ejaan_bahasa_indonesia-1.1-fx+tb.xpi", "dictionaries/id.dic", "dictionaries/id.aff", +//BUG! "kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi", "dictionaries/kn.dic", "dictionaries/kn.aff", + "kashubian_spell_checker_poland-0.9-sm+tb+fx.xpi", "dictionaries/Kaszebsczi.dic", "dictionaries/Kaszebsczi.aff", + "kiswahili_spell_checker-0.3-sb+tb+fn+fx+sm.xpi", "dictionaries/sw_TZ.dic", "dictionaries/sw_TZ.aff", + "kurdish_spell_checker-0.96-fx+tb+sm.xpi", "dictionaries/ku-TR.dic", "dictionaries/ku-TR.aff", + "lao_spellchecking_dictionary-0-fx+tb+sm+fn+an.xpi", "dictionaries/lo_LA.dic", "dictionaries/lo_LA.aff", + "latviesu_valodas_pareizrakstibas_parbaudes_vardnica-1.0.0-fn+fx+tb+sm.xpi", "dictionaries/lv_LV.dic", "dictionaries/lv_LV.aff", + "lithuanian_spelling_check_dictionary-1.3-fx+tb+sm+fn.xpi", "dictionaries/lt.dic", "dictionaries/lt.aff", + "litreoir_gaelspell_do_mhozilla-4.7-tb+fx+sm+fn.xpi", "dictionaries/ga.dic", "dictionaries/ga.aff", + "litreoir_na_liongailise-0.03-fx+sm+tb.xpi", "dictionaries/ln-CD.dic", "dictionaries/ln-CD.aff", +//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Cyrl.dic", "dictionaries/mk-MK-Cyrl.aff", +//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Latn.dic", "dictionaries/mk-MK-Latn.aff", + "malagasy_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/mg_MG.dic", "dictionaries/mg_MG.aff", + "marathi_dictionary-9.3-sm+tb+sb+fx.xpi", "dictionaries/mr-IN.dic", "dictionaries/mr-IN.aff", + "ndebele_south_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/nr-ZA.dic", "dictionaries/nr-ZA.aff", + "nepali_dictionary-1.2-fx+tb.xpi", "dictionaries/ne_NP.dic", "dictionaries/ne_NP.aff", + "norsk_bokmal_ordliste-2.0.10.2-fx+tb+sm.xpi", "dictionaries/nb.dic", "dictionaries/nb.aff", + "norsk_nynorsk_ordliste-2.1.0-sm+fx+tb.xpi", "dictionaries/nn.dic", "dictionaries/nn.aff", + "northern_sotho_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/nso-ZA.dic", "dictionaries/nso-ZA.aff", + "oriya_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/or-IN.dic", "dictionaries/or-IN.aff", + "polski_slownik_poprawnej_pisowni-1.0.20110621-fx+tb+sm.xpi", "dictionaries/pl.dic", "dictionaries/pl.aff", + "punjabi_spell_checker-0.3-fx+tb+sm+sb+fn.xpi", "dictionaries/pa-IN.dic", "dictionaries/pa-IN.aff", +//BUG! "romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi", "dictionaries/ro_RO-ante1993.dic", "dictionaries/ro_RO-ante1993.aff", +//BUG! "russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi", "dictionaries/ru_RU.dic", "dictionaries/ru_RU.aff", + "sanskrit_spell_checker-1.1-fx+tb+sm+sb+fn.xpi", "dictionaries/sa_IN.dic", "dictionaries/sa_IN.aff", + "scottish_gaelic_spell_checker-2.7-tb+fx+sm.xpi", "dictionaries/gd-GB.dic", "dictionaries/gd-GB.aff", + "serbian_dictionary-0.18-fx+tb+sm.xpi", "dictionaries/sr-RS-Cyrl.dic", "dictionaries/sr-RS-Cyrl.aff", + "serbian_dictionary-0.18-fx+tb+sm.xpi", "dictionaries/sr-RS-Latn.dic", "dictionaries/sr-RS-Latn.aff", + "slovak_spell_checking_dictionary-2.04.0-tb+fx+sm.xpi", "dictionaries/sk-SK.dic", "dictionaries/sk-SK.aff", + "slovak_spell_checking_dictionary-2.04.0-tb+fx+sm.xpi", "dictionaries/sk-SK-ascii.dic", "dictionaries/sk-SK-ascii.aff", + "slovar_za_slovenski_jezik-0.1.1.1-fx+tb+sm.xpi", "dictionaries/sl.dic", "dictionaries/sl.aff", + "songhay_spell_checker-0.03-fx+tb+sm.xpi", "dictionaries/Songhay - Mali.dic", "dictionaries/Songhay - Mali.aff", + "southern_sotho_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/st-ZA.dic", "dictionaries/st-ZA.aff", + "sownik_acinski-0.41.20110603-tb+fx+sm.xpi", "dictionaries/la.dic", "dictionaries/la.aff", + "sownik_jezyka_dolnouzyckiego-1.4.8-an+fx+tb+fn+sm.xpi", "dictionaries/dsb.dic", "dictionaries/dsb.aff", + "srpska_latinica-0.1-fx+tb+sm.xpi", "dictionaries/Srpski_latinica.dic", "dictionaries/Srpski_latinica.aff", + "svenska_fria_ordlistan-1.1-tb+sm+fx.xpi", "dictionaries/sv.dic", "dictionaries/sv.aff", + "svenska_fria_ordlistan-1.1-tb+sm+fx.xpi", "dictionaries/sv_FI.dic", "dictionaries/sv_FI.aff", + "swati_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ss-ZA.dic", "dictionaries/ss-ZA.aff", + "tamil_spell_checker_for_firefox-0.4-tb+fx.xpi", "dictionaries/ta-TA.dic", "dictionaries/ta-TA.aff", + "telugu_spell_checker-0.3-tb+fx+sm.xpi", "dictionaries/te_IN.dic", "dictionaries/te_IN.aff", + "te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi-x-Tai Tokerau.dic", "dictionaries/mi-x-Tai Tokerau.aff", + "te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi.dic", "dictionaries/mi.aff", +//BUG! "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi", "dictionaries/ta_IN.dic", "dictionaries/ta_IN.aff", + "tsonga_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ts-ZA.dic", "dictionaries/ts-ZA.aff", + "tswana_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/tn-ZA.dic", "dictionaries/tn-ZA.aff", + "turkce_yazm_denetimi-3.5-sm+tb+fx.xpi", "dictionaries/tr.dic", "dictionaries/tr.aff", +//BUG! "turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff", + "ukrainian_dictionary-1.7.0-sm+an+fx+fn+tb.xpi", "dictionaries/uk-UA.dic", "dictionaries/uk-UA.aff", + "united_states_english_spellchecker-7.0.1-sm+tb+fx+an.xpi", "dictionaries/en-US.dic", "dictionaries/en-US.aff", + "upper_sorbian_spelling_dictionary-0.0.20060327.3-tb+fx+sm.xpi", "dictionaries/hsb.dic", "dictionaries/hsb.aff", +//BUG! "urdu_dictionary-0.64-fx+tb+sm+sb.xpi", "dictionaries/ur.dic", "dictionaries/ur.aff", + "uzbek_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/uz.dic", "dictionaries/uz.aff", + "valencian_catalan_dictionary-2.5.0-tb+fn+sm+fx.xpi", "dictionaries/ca-ES-valencia.dic", "dictionaries/ca-ES-valencia.aff", + "venda_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/ve-ZA.dic", "dictionaries/ve-ZA.aff", + "verificador_ortografico_para_portugues_do_brasil-2.3-3.2b1-tb+sm+fn+fx.xpi", "dictionaries/pt_BR.dic", "dictionaries/pt_BR.aff", + "vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauCu.dic", "dictionaries/vi-DauCu.aff", + "vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauMoi.dic", "dictionaries/vi-DauMoi.aff", +//BUG! "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi", "dictionaries/nl.dic", "dictionaries/nl.aff", + "xhosa_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/xh-ZA.dic", "dictionaries/xh-ZA.aff", + "xuxen-4.0.1-fx+tb+sm.xpi", "dictionaries/eu.dic", "dictionaries/eu.aff", + "yiddish_spell_checker_yivo-0.0.3-sm+fn+fx+tb.xpi", "dictionaries/yi.dic", "dictionaries/yi.aff", + "zulu_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/zu-ZA.dic", "dictionaries/zu-ZA.aff" + }; + + public void test() throws Exception { + for (int i = 0; i < tests.length; i += 3) { + File f = new File(DICTIONARY_HOME, tests[i]); + assert f.exists(); + + try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) { + ZipEntry dicEntry = zip.getEntry(tests[i+1]); + assert dicEntry != null; + ZipEntry affEntry = zip.getEntry(tests[i+2]); + assert affEntry != null; + + try (InputStream dictionary = zip.getInputStream(dicEntry); + InputStream affix = zip.getInputStream(affEntry)) { + Dictionary dic = new Dictionary(affix, dictionary); + System.out.println(tests[i] + "\t" + RamUsageEstimator.humanSizeOf(dic) + "\t(" + + "words=" + RamUsageEstimator.humanSizeOf(dic.words) + ", " + + "flags=" + RamUsageEstimator.humanSizeOf(dic.flagLookup) + ", " + + "strips=" + RamUsageEstimator.humanSizeOf(dic.stripLookup) + ", " + + "conditions=" + RamUsageEstimator.humanSizeOf(dic.patterns) + ", " + + "affixData=" + RamUsageEstimator.humanSizeOf(dic.affixData) + ", " + + "prefixes=" + RamUsageEstimator.humanSizeOf(dic.prefixes) + ", " + + "suffixes=" + RamUsageEstimator.humanSizeOf(dic.suffixes) + ")"); + } + } + } + } + + public void testOneDictionary() throws Exception { + String toTest = "hausa_spelling_dictionary-0.2-tb+fx.xpi"; + for (int i = 0; i < tests.length; i++) { + if (tests[i].equals(toTest)) { + File f = new File(DICTIONARY_HOME, tests[i]); + assert f.exists(); + + try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) { + ZipEntry dicEntry = zip.getEntry(tests[i+1]); + assert dicEntry != null; + ZipEntry affEntry = zip.getEntry(tests[i+2]); + assert affEntry != null; + + try (InputStream dictionary = zip.getInputStream(dicEntry); + InputStream affix = zip.getInputStream(affEntry)) { + new Dictionary(affix, dictionary); + } + } + } + } + } +} From b87af547745b150ff6ca0c8af984cd0bb06b3704 Mon Sep 17 00:00:00 2001 From: Joel Bernstein Date: Fri, 7 Mar 2014 14:20:48 +0000 Subject: [PATCH 35/38] SOLR-5720: Updated CHANGES.txt git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1575266 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 4d10404fc78..f7409a97f52 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -100,6 +100,10 @@ New Features * SOLR-5714: You can now use one pool of memory for for the HDFS block cache that all collections share. (Mark Miller, Gregory Chanan) +* SOLR-5720: Add ExpandComponent to expand results collapsed by the + CollapsingQParserPlugin. (Joel Bernstein) + + Bug Fixes ---------------------- From 55edc565d8f192f8a349611f8ca827610b313148 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 7 Mar 2014 16:12:00 +0000 Subject: [PATCH 36/38] LUCENE-5500: SortingMergePolicy should error if the Sort refers to the score git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1575306 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/core/src/java/org/apache/lucene/search/Sort.java | 4 ++-- .../src/java/org/apache/lucene/index/sorter/Sorter.java | 3 +++ .../lucene/index/sorter/SortingAtomicReaderTest.java | 9 +++++++++ .../lucene/index/sorter/TestSortingMergePolicy.java | 9 +++++++++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/Sort.java b/lucene/core/src/java/org/apache/lucene/search/Sort.java index 0650d37fe33..57630635d20 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Sort.java +++ b/lucene/core/src/java/org/apache/lucene/search/Sort.java @@ -202,8 +202,8 @@ public class Sort { return 0x45aaf665 + Arrays.hashCode(fields); } - /** Whether the relevance score is needed to sort documents. */ - boolean needsScores() { + /** Returns true if the relevance score is needed to sort documents. */ + public boolean needsScores() { for (SortField sortField : fields) { if (sortField.needsScores()) { return true; diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java index d32785f8876..608b072237a 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java +++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java @@ -39,6 +39,9 @@ final class Sorter { /** Creates a new Sorter to sort the index with {@code sort} */ Sorter(Sort sort) { + if (sort.needsScores()) { + throw new IllegalArgumentException("Cannot sort an index with a Sort that refers to the relevance score"); + } this.sort = sort; } diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java index 89d6403619b..bb75fbcb62d 100644 --- a/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java +++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java @@ -62,5 +62,14 @@ public class SortingAtomicReaderTest extends SorterTestBase { TestUtil.checkReader(reader); } + + public void testBadSort() throws Exception { + try { + SortingAtomicReader.wrap(reader, Sort.RELEVANCE); + fail("Didn't get expected exception"); + } catch (IllegalArgumentException e) { + assertEquals("Cannot sort an index with a Sort that refers to the relevance score", e.getMessage()); + } + } } diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java index 47fb654d3ef..5095aeca299 100644 --- a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java +++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java @@ -172,5 +172,14 @@ public class TestSortingMergePolicy extends LuceneTestCase { assertReaderEquals("", sortedReader1, sortedReader2); } + + public void testBadSort() throws Exception { + try { + new SortingMergePolicy(newMergePolicy(), Sort.RELEVANCE); + fail("Didn't get expected exception"); + } catch (IllegalArgumentException e) { + assertEquals("Cannot sort an index with a Sort that refers to the relevance score", e.getMessage()); + } + } } From 36edbb84ea0431bd27debd3ed6460c4b33f3ec23 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Fri, 7 Mar 2014 17:09:27 +0000 Subject: [PATCH 37/38] unescape %20 in urls so we don't get false failures with 1.7.0_60 git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1575328 13f79535-47bb-0310-9956-ffa450edef68 --- dev-tools/scripts/checkJavaDocs.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dev-tools/scripts/checkJavaDocs.py b/dev-tools/scripts/checkJavaDocs.py index e68f6072493..4089a8f15b5 100644 --- a/dev-tools/scripts/checkJavaDocs.py +++ b/dev-tools/scripts/checkJavaDocs.py @@ -212,7 +212,7 @@ def checkClassSummaries(fullPath): if inThing: if lineLower.find('') != -1: if not hasDesc: - missing.append((lastCaption, lastItem)) + missing.append((lastCaption, unEscapeURL(lastItem))) inThing = False continue else: @@ -298,6 +298,11 @@ def checkSummary(fullPath): f.close() return anyMissing +def unEscapeURL(s): + # Not exhaustive!! + s = s.replace('%20', ' ') + return s + def unescapeHTML(s): s = s.replace('<', '<') s = s.replace('>', '>') From 26c79531b0eabeb0b7a1ab0dfa39a2318f8ee631 Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Fri, 7 Mar 2014 18:01:52 +0000 Subject: [PATCH 38/38] SOLR-5818: distrib search with custom comparator does not quite work correctly git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1575344 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 3 + .../handler/component/QueryComponent.java | 79 ++++++++++++++++++- .../conf/schema-field-sort-values.xml | 41 ++++++++++ .../apache/solr/schema/WrappedIntField.java | 46 +++++++++++ .../solr/search/TestFieldSortValues.java | 53 +++++++++++++ 5 files changed, 218 insertions(+), 4 deletions(-) create mode 100644 solr/core/src/test-files/solr/collection1/conf/schema-field-sort-values.xml create mode 100644 solr/core/src/test/org/apache/solr/schema/WrappedIntField.java create mode 100644 solr/core/src/test/org/apache/solr/search/TestFieldSortValues.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index f7409a97f52..decef0fa9f0 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -135,6 +135,9 @@ Bug Fixes * SOLR-5796: Increase how long we are willing to wait for a core to see the ZK advertised leader in it's local state. (Timothy Potter, Mark Miller) +* SOLR-5818: distrib search with custom comparator does not quite work correctly + (Ryan Ernst) + Optimizations ---------------------- * SOLR-1880: Distributed Search skips GET_FIELDS stage if EXECUTE_QUERY diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index 43f3841434d..e2c5ba8e5cb 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -25,12 +25,14 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.grouping.GroupDocs; import org.apache.lucene.search.grouping.SearchGroup; import org.apache.lucene.search.grouping.TopGroups; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.InPlaceMergeSorter; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; @@ -500,12 +502,32 @@ public class QueryComponent extends SearchComponent // sort ids from lowest to highest so we can access them in order int nDocs = docList.size(); - long[] sortedIds = new long[nDocs]; - DocIterator it = rb.getResults().docList.iterator(); + final long[] sortedIds = new long[nDocs]; + final float[] scores = new float[nDocs]; // doc scores, parallel to sortedIds + DocList docs = rb.getResults().docList; + DocIterator it = docs.iterator(); for (int i=0; i>> 32); int position = (int)idAndPos; @@ -546,6 +570,7 @@ public class QueryComponent extends SearchComponent } doc -= currentLeaf.docBase; // adjust for what segment this is in + comparator.setScorer(new FakeScorer(doc, score)); comparator.copy(0, doc); Object val = comparator.value(0); if (null != ft) val = ft.marshalSortValue(val); @@ -1157,4 +1182,50 @@ public class QueryComponent extends SearchComponent public URL[] getDocs() { return null; } + + /** + * Fake scorer for a single document + * + * TODO: when SOLR-5595 is fixed, this wont be needed, as we dont need to recompute sort values here from the comparator + */ + private static class FakeScorer extends Scorer { + final int docid; + final float score; + + FakeScorer(int docid, float score) { + super(null); + this.docid = docid; + this.score = score; + } + + @Override + public int docID() { + return docid; + } + + @Override + public float score() throws IOException { + return score; + } + + @Override + public int freq() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int nextDoc() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int advance(int target) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long cost() { + return 1; + } + } } diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-field-sort-values.xml b/solr/core/src/test-files/solr/collection1/conf/schema-field-sort-values.xml new file mode 100644 index 00000000000..22063d5542c --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-field-sort-values.xml @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + + + + + + + + + text + id + diff --git a/solr/core/src/test/org/apache/solr/schema/WrappedIntField.java b/solr/core/src/test/org/apache/solr/schema/WrappedIntField.java new file mode 100644 index 00000000000..7f52b3e3f4e --- /dev/null +++ b/solr/core/src/test/org/apache/solr/schema/WrappedIntField.java @@ -0,0 +1,46 @@ +package org.apache.solr.schema; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.expressions.Expression; +import org.apache.lucene.expressions.SimpleBindings; +import org.apache.lucene.expressions.js.JavascriptCompiler; +import org.apache.lucene.search.SortField; + +/** + * Custom field wrapping an int, to test sorting via a custom comparator. + */ +public class WrappedIntField extends TrieIntField { + Expression expr; + + public WrappedIntField() { + try { + expr = JavascriptCompiler.compile("payload % 3"); + } catch (Exception e) { + throw new RuntimeException("impossible?", e); + } + } + + @Override + public SortField getSortField(final SchemaField field, final boolean reverse) { + field.checkSortability(); + SimpleBindings bindings = new SimpleBindings(); + bindings.add(super.getSortField(field, reverse)); + return expr.getSortField(bindings, reverse); + } +} diff --git a/solr/core/src/test/org/apache/solr/search/TestFieldSortValues.java b/solr/core/src/test/org/apache/solr/search/TestFieldSortValues.java new file mode 100644 index 00000000000..e234ff7dc3e --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/TestFieldSortValues.java @@ -0,0 +1,53 @@ +package org.apache.solr.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.apache.solr.SolrTestCaseJ4; +import org.junit.BeforeClass; + + +/** + * Test QueryComponent.doFieldSortValues + */ +@SuppressCodecs({"Lucene3x"}) +public class TestFieldSortValues extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig-minimal.xml", "schema-field-sort-values.xml"); + } + + public void testCustomComparator() throws Exception { + clearIndex(); + assertU(adoc(sdoc("id", "1", "payload", "2"))); + assertU(adoc(sdoc("id", "2", "payload", "3"))); + assertU(adoc(sdoc("id", "3", "payload", "1"))); + assertU(adoc(sdoc("id", "4", "payload", "5"))); + assertU(adoc(sdoc("id", "5", "payload", "4"))); + assertU(commit()); + + // payload is backed by a custom sort field which returns the payload value mod 3 + assertQ(req("q", "*:*", "fl", "id", "sort", "payload asc, id asc", "fsv", "true") + , "//result/doc[int='2' and position()=1]" + , "//result/doc[int='3' and position()=2]" + , "//result/doc[int='5' and position()=3]" + , "//result/doc[int='1' and position()=4]" + , "//result/doc[int='4' and position()=5]"); + } +}