diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index c1c8c837ad9..39c875604e2 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -70,6 +70,9 @@ New Features This is necessary when using collectors like ToParentBlockJoinCollector with DrillSideways. (Mike McCandless) +* SOLR-4761: Add SimpleMergedSegmentWarmer, which just initializes terms, + norms, docvalues, and so on. (Mark Miller, Mike McCandless, Robert Muir) + ======================= Lucene 4.3.0 ======================= Changes in backwards compatibility policy diff --git a/lucene/core/src/java/org/apache/lucene/index/SimpleMergedSegmentWarmer.java b/lucene/core/src/java/org/apache/lucene/index/SimpleMergedSegmentWarmer.java new file mode 100644 index 00000000000..82d3045d51c --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/index/SimpleMergedSegmentWarmer.java @@ -0,0 +1,90 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; +import org.apache.lucene.util.InfoStream; + +/** + * A very simple merged segment warmer that just ensures + * data structures are initialized. + */ +public class SimpleMergedSegmentWarmer extends IndexReaderWarmer { + private final InfoStream infoStream; + + /** + * Creates a new SimpleMergedSegmentWarmer + * @param infoStream InfoStream to log statistics about warming. + */ + public SimpleMergedSegmentWarmer(InfoStream infoStream) { + this.infoStream = infoStream; + } + + @Override + public void warm(AtomicReader reader) throws IOException { + long startTime = System.currentTimeMillis(); + int indexedCount = 0; + int docValuesCount = 0; + int normsCount = 0; + for (FieldInfo info : reader.getFieldInfos()) { + if (info.isIndexed()) { + reader.terms(info.name); + indexedCount++; + + if (info.hasNorms()) { + reader.getNormValues(info.name); + normsCount++; + } + } + + if (info.hasDocValues()) { + switch(info.getDocValuesType()) { + case NUMERIC: + reader.getNumericDocValues(info.name); + break; + case BINARY: + reader.getBinaryDocValues(info.name); + break; + case SORTED: + reader.getSortedDocValues(info.name); + break; + case SORTED_SET: + reader.getSortedSetDocValues(info.name); + break; + default: + assert false; // unknown dv type + } + docValuesCount++; + } + } + + reader.document(0); + reader.getTermVectors(0); + + if (infoStream.isEnabled("SMSW")) { + infoStream.message("SMSW", + "Finished warming segment: " + reader + + ", indexed=" + indexedCount + + ", docValues=" + docValuesCount + + ", norms=" + normsCount + + ", time=" + (System.currentTimeMillis() - startTime)); + } + } +} diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java index 99307334e33..94cb8d61b4c 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java @@ -39,6 +39,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util._TestUtil; @@ -965,6 +966,46 @@ public class TestIndexWriterReader extends LuceneTestCase { assertTrue(didWarm.get()); } + public void testSimpleMergedSegmentWramer() throws Exception { + Directory dir = newDirectory(); + final AtomicBoolean didWarm = new AtomicBoolean(); + InfoStream infoStream = new InfoStream() { + @Override + public void close() throws IOException {} + + @Override + public void message(String component, String message) { + if ("SMSW".equals(component)) { + didWarm.set(true); + } + } + + @Override + public boolean isEnabled(String component) { + return true; + } + }; + IndexWriter w = new IndexWriter( + dir, + newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())). + setMaxBufferedDocs(2). + setReaderPooling(true). + setInfoStream(infoStream). + setMergedSegmentWarmer(new SimpleMergedSegmentWarmer(infoStream)). + setMergePolicy(newLogMergePolicy(10)) + ); + + Document doc = new Document(); + doc.add(newStringField("foo", "bar", Field.Store.NO)); + for(int i=0;i<20;i++) { + w.addDocument(doc); + } + w.waitForMerges(); + w.close(); + dir.close(); + assertTrue(didWarm.get()); + } + public void testNoTermsIndex() throws Exception { // Some Codecs don't honor the ReaderTermsIndexDivisor, so skip the test if // they're picked. diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java index 1efa453817f..67900210ef9 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java @@ -774,6 +774,9 @@ public abstract class LuceneTestCase extends Assert { } else { c.setMergePolicy(newLogMergePolicy()); } + if (rarely(r)) { + c.setMergedSegmentWarmer(new SimpleMergedSegmentWarmer(c.getInfoStream())); + } c.setReaderPooling(r.nextBoolean()); c.setReaderTermsIndexDivisor(_TestUtil.nextInt(r, 1, 4)); return c; diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 21c5725a5b4..31dbdb55252 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -59,6 +59,9 @@ New Features * SOLR-3251: Dynamically add fields to schema. (Steve Rowe, Robert Muir, yonik) +* SOLR-4761: Add option to plugin a merged segment warmer into solrconfig.xml + (Mark Miller, Mike McCandless, Robert Muir) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/core/SolrConfig.java b/solr/core/src/java/org/apache/solr/core/SolrConfig.java index 90be8f04d8b..e609844d481 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrConfig.java +++ b/solr/core/src/java/org/apache/solr/core/SolrConfig.java @@ -142,11 +142,10 @@ public class SolrConfig extends Config { defaultIndexConfig = mainIndexConfig = null; indexConfigPrefix = "indexConfig"; } + reopenReaders = getBool(indexConfigPrefix+"/reopenReaders", true); // Parse indexConfig section, using mainIndex as backup in case old config is used indexConfig = new SolrIndexConfig(this, "indexConfig", mainIndexConfig); - - reopenReaders = getBool(indexConfigPrefix+"/reopenReaders", true); - + booleanQueryMaxClauseCount = getInt("query/maxBooleanClauses", BooleanQuery.getMaxClauseCount()); log.info("Using Lucene MatchVersion: " + luceneMatchVersion); diff --git a/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java b/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java index 983eddfdea0..6beb7b4a997 100644 --- a/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java +++ b/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java @@ -18,6 +18,8 @@ package org.apache.solr.update; import org.apache.lucene.index.*; +import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; +import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.Version; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; @@ -55,6 +57,8 @@ public class SolrIndexConfig { public final PluginInfo mergeSchedulerInfo; public final int termIndexInterval; + public final PluginInfo mergedSegmentWarmerInfo; + public String infoStreamFile = null; // Available lock types @@ -81,6 +85,7 @@ public class SolrIndexConfig { mergePolicyInfo = null; mergeSchedulerInfo = null; defaultMergePolicyClassName = TieredMergePolicy.class.getName(); + mergedSegmentWarmerInfo = null; } /** @@ -135,6 +140,11 @@ public class SolrIndexConfig { infoStreamFile= solrConfig.get(prefix + "/infoStream/@file", null); log.info("IndexWriter infoStream debug log is enabled: " + infoStreamFile); } + + mergedSegmentWarmerInfo = getPluginInfo(prefix + "/mergedSegmentWarmer", solrConfig, def.mergedSegmentWarmerInfo); + if (mergedSegmentWarmerInfo != null && solrConfig.reopenReaders == false) { + throw new IllegalArgumentException("Supplying a mergedSegmentWarmer will do nothing since reopenReaders is false"); + } } /* @@ -182,6 +192,16 @@ public class SolrIndexConfig { if (maxIndexingThreads != -1) { iwc.setMaxThreadStates(maxIndexingThreads); } + + if (mergedSegmentWarmerInfo != null) { + // TODO: add infostream -> normal logging system (there is an issue somewhere) + IndexReaderWarmer warmer = schema.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className, + IndexReaderWarmer.class, + null, + new Class[] { InfoStream.class }, + new Object[] { InfoStream.NO_OUTPUT }); + iwc.setMergedSegmentWarmer(warmer); + } return iwc; } diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-warmer.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-warmer.xml new file mode 100644 index 00000000000..3e1a46545be --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-warmer.xml @@ -0,0 +1,31 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + diff --git a/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java b/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java index a601dd46bec..247b3523917 100644 --- a/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java +++ b/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java @@ -19,6 +19,8 @@ package org.apache.solr.update; import java.io.File; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.SimpleMergedSegmentWarmer; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.core.SolrConfig; import org.apache.solr.schema.IndexSchema; @@ -55,5 +57,18 @@ public class SolrIndexConfigTest extends SolrTestCaseJ4 { IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig); solrIndexConfig.toIndexWriterConfig(indexSchema); } + + public void testMergedSegmentWarmerIndexConfigCreation() throws Exception { + SolrConfig solrConfig = new SolrConfig("solr" + File.separator + + "collection1", "solrconfig-warmer.xml", null); + SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null); + assertNotNull(solrIndexConfig); + assertNotNull(solrIndexConfig.mergedSegmentWarmerInfo); + assertEquals(SimpleMergedSegmentWarmer.class.getName(), + solrIndexConfig.mergedSegmentWarmerInfo.className); + IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig); + IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(indexSchema); + assertEquals(SimpleMergedSegmentWarmer.class, iwc.getMergedSegmentWarmer().getClass()); + } }