From 9b936ce650f4ed98dc6fc7d47351952caac121d0 Mon Sep 17 00:00:00 2001 From: Grant Ingersoll Date: Sat, 12 Sep 2009 12:27:42 +0000 Subject: [PATCH] SOLR-1296: Add support for termInfosIndexDivisor and termIndexInterval git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@814160 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 4 +- example/solr/conf/solrconfig.xml | 15 +- .../apache/solr/core/IndexReaderFactory.java | 19 +- .../solr/core/StandardIndexReaderFactory.java | 5 +- .../apache/solr/update/SolrIndexConfig.java | 4 + .../apache/solr/update/SolrIndexWriter.java | 4 + .../solr/core/IndexReaderFactoryTest.java | 48 ++ src/test/org/apache/solr/core/TestConfig.java | 59 ++- .../solr/conf/solrconfig-termindex.xml | 459 ++++++++++++++++++ 9 files changed, 602 insertions(+), 15 deletions(-) create mode 100644 src/test/org/apache/solr/core/IndexReaderFactoryTest.java create mode 100644 src/test/test-files/solr/conf/solrconfig-termindex.xml diff --git a/CHANGES.txt b/CHANGES.txt index 778afd70a2e..966703f215b 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -350,7 +350,7 @@ Optimizations 11. SOLR-1169: SortedIntDocSet - a new small set implementation that saves memory over HashDocSet, is faster to construct, - is ordered for easier impelemntation of skipTo, and is faster + is ordered for easier implementation of skipTo, and is faster in the general case. (yonik) 12. SOLR-1165: Use Lucene Filters and pass them down to the Lucene @@ -367,6 +367,8 @@ Optimizations 16. SOLR-1353: Implement and use reusable token streams for analysis. (yonik) +17. SOLR-1296: Enables setting IndexReader's termInfosIndexDivisor via a new attribute to StandardIndexReaderFactory. Enables + setting termIndexInterval to IndexWriter via SolrIndexConfig. (Jason Rutherglen, gsingers) Bug Fixes ---------------------- diff --git a/example/solr/conf/solrconfig.xml b/example/solr/conf/solrconfig.xml index dde1635f334..9ba21d00c45 100755 --- a/example/solr/conf/solrconfig.xml +++ b/example/solr/conf/solrconfig.xml @@ -99,6 +99,10 @@ if not specified.) --> native + + @@ -122,7 +126,12 @@ true - + + + + + + diff --git a/src/java/org/apache/solr/core/IndexReaderFactory.java b/src/java/org/apache/solr/core/IndexReaderFactory.java index 900e7fffc29..25cc477b7ef 100644 --- a/src/java/org/apache/solr/core/IndexReaderFactory.java +++ b/src/java/org/apache/solr/core/IndexReaderFactory.java @@ -27,15 +27,30 @@ import org.apache.solr.util.plugin.NamedListInitializedPlugin; * Factory used to build a new IndexReader instance. */ public abstract class IndexReaderFactory implements NamedListInitializedPlugin { - + protected int termInfosIndexDivisor = 1;//IndexReader.DEFAULT_TERMS_INDEX_DIVISOR; Set this once Lucene makes this public. /** + * Potentially initializes {@link #termInfosIndexDivisor}. Overriding classes should call super.init() in order + * to make sure termInfosIndexDivisor is set. + *

* init will be called just once, immediately after creation. *

* The args are user-level initialization parameters that may be specified * when declaring an indexReaderFactory in solrconfig.xml + * */ public void init(NamedList args) { - /* :NOOP: */ + Integer v = (Integer)args.get("termInfosIndexDivisor"); + if (v != null) { + termInfosIndexDivisor = v.intValue(); + } + } + + /** + * + * @return The setting of {@link #termInfosIndexDivisor} + */ + public int getTermInfosIndexDivisor() { + return termInfosIndexDivisor; } /** diff --git a/src/java/org/apache/solr/core/StandardIndexReaderFactory.java b/src/java/org/apache/solr/core/StandardIndexReaderFactory.java index 971f3222987..8d510b43479 100644 --- a/src/java/org/apache/solr/core/StandardIndexReaderFactory.java +++ b/src/java/org/apache/solr/core/StandardIndexReaderFactory.java @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.store.Directory; +import org.apache.solr.common.util.NamedList; /** * Default IndexReaderFactory implementation. Returns a standard Lucene @@ -28,12 +29,12 @@ import org.apache.lucene.store.Directory; * @see IndexReader#open(Directory) */ public class StandardIndexReaderFactory extends IndexReaderFactory { - + /* (non-Javadoc) * @see org.apache.solr.core.IndexReaderFactory#newReader(org.apache.lucene.store.Directory, boolean) */ public IndexReader newReader(Directory indexDir, boolean readOnly) throws IOException { - return IndexReader.open(indexDir, readOnly); + return IndexReader.open(indexDir, null, readOnly, termInfosIndexDivisor); } } diff --git a/src/java/org/apache/solr/update/SolrIndexConfig.java b/src/java/org/apache/solr/update/SolrIndexConfig.java index a2eefc75232..9e6fb86e035 100644 --- a/src/java/org/apache/solr/update/SolrIndexConfig.java +++ b/src/java/org/apache/solr/update/SolrIndexConfig.java @@ -20,6 +20,7 @@ package org.apache.solr.update; import org.apache.solr.core.SolrConfig; import org.apache.lucene.index.LogByteSizeMergePolicy; import org.apache.lucene.index.ConcurrentMergeScheduler; +import org.apache.lucene.index.IndexWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -53,6 +54,7 @@ public class SolrIndexConfig { mergePolicyClassName = DEFAULT_MERGE_POLICY_CLASSNAME; mergeSchedulerClassname = DEFAULT_MERGE_SCHEDULER_CLASSNAME; luceneAutoCommit = false; + termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL; } public final boolean useCompoundFile; @@ -69,6 +71,7 @@ public class SolrIndexConfig { public final String mergePolicyClassName; public final String mergeSchedulerClassname; public final boolean luceneAutoCommit; + public final int termIndexInterval; public String infoStreamFile = null; @@ -90,6 +93,7 @@ public class SolrIndexConfig { mergePolicyClassName = solrConfig.get(prefix + "/mergePolicy", def.mergePolicyClassName); mergeSchedulerClassname = solrConfig.get(prefix + "/mergeScheduler", def.mergeSchedulerClassname); luceneAutoCommit = solrConfig.getBool(prefix + "/luceneAutoCommit", def.luceneAutoCommit); + termIndexInterval = solrConfig.getInt(prefix + "/termIndexInterval", def.termIndexInterval); boolean infoStreamEnabled = solrConfig.getBool(prefix + "/infoStream", false); if(infoStreamEnabled) { diff --git a/src/java/org/apache/solr/update/SolrIndexWriter.java b/src/java/org/apache/solr/update/SolrIndexWriter.java index b40c61a6579..855b77eb7d0 100644 --- a/src/java/org/apache/solr/update/SolrIndexWriter.java +++ b/src/java/org/apache/solr/update/SolrIndexWriter.java @@ -69,6 +69,10 @@ public class SolrIndexWriter extends IndexWriter { if (config.ramBufferSizeMB != -1) { setRAMBufferSizeMB(config.ramBufferSizeMB); } + if (config.termIndexInterval != -1) { + setTermIndexInterval(config.termIndexInterval); + + } if (config.maxMergeDocs != -1) setMaxMergeDocs(config.maxMergeDocs); if (config.maxFieldLength != -1) setMaxFieldLength(config.maxFieldLength); if (config.mergePolicyClassName != null && SolrIndexConfig.DEFAULT_MERGE_POLICY_CLASSNAME.equals(config.mergePolicyClassName) == false) { diff --git a/src/test/org/apache/solr/core/IndexReaderFactoryTest.java b/src/test/org/apache/solr/core/IndexReaderFactoryTest.java new file mode 100644 index 00000000000..2f1e88bebba --- /dev/null +++ b/src/test/org/apache/solr/core/IndexReaderFactoryTest.java @@ -0,0 +1,48 @@ +package org.apache.solr.core; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.store.Directory; +import org.apache.solr.util.AbstractSolrTestCase; + +import java.io.IOException; + +public class IndexReaderFactoryTest extends AbstractSolrTestCase { + + public String getSchemaFile() { + return "schema.xml"; + } + + public String getSolrConfigFile() { + return "solrconfig-termindex.xml"; + } + + /** + * Simple test to ensure that alternate IndexReaderFactory is being used. + * + * @throws Exception + */ + public void testAltReaderUsed() throws Exception { + IndexReaderFactory readerFactory = h.getCore().getIndexReaderFactory(); + assertNotNull("Factory is null", readerFactory); + assertTrue("readerFactory is not an instanceof " + AlternateIndexReaderTest.TestIndexReaderFactory.class, readerFactory instanceof StandardIndexReaderFactory); + assertTrue("termInfoIndexDivisor not set to 12", readerFactory.getTermInfosIndexDivisor() == 12); + + + } +} \ No newline at end of file diff --git a/src/test/org/apache/solr/core/TestConfig.java b/src/test/org/apache/solr/core/TestConfig.java index c412363e9c9..4824df071c6 100644 --- a/src/test/org/apache/solr/core/TestConfig.java +++ b/src/test/org/apache/solr/core/TestConfig.java @@ -17,18 +17,30 @@ package org.apache.solr.core; +import org.apache.lucene.index.IndexWriter; import org.apache.solr.handler.admin.ShowFileRequestHandler; -import org.apache.solr.util.AbstractSolrTestCase; +import org.apache.solr.search.SolrIndexReader; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.update.DirectUpdateHandler2; import org.apache.solr.update.SolrIndexConfig; +import org.apache.solr.util.AbstractSolrTestCase; +import org.apache.solr.util.RefCounted; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import javax.xml.xpath.XPathConstants; +import java.io.IOException; public class TestConfig extends AbstractSolrTestCase { - public String getSchemaFile() { return "schema.xml"; } - public String getSolrConfigFile() { return "solrconfig.xml"; } + public String getSchemaFile() { + return "schema.xml"; + } + + //public String getSolrConfigFile() { return "solrconfig.xml"; } + public String getSolrConfigFile() { + return "solrconfig-termindex.xml"; + } public void testJavaProperty() { // property values defined in build.xml @@ -65,15 +77,44 @@ public class TestConfig extends AbstractSolrTestCase { } // sometime if the config referes to old things, it must be replaced with new stuff - public void testAutomaticDeprecationSupport() - { + public void testAutomaticDeprecationSupport() { // make sure the "admin/file" handler is registered - ShowFileRequestHandler handler = (ShowFileRequestHandler) h.getCore().getRequestHandler( "/admin/file" ); - assertTrue( "file handler should have been automatically registered", handler!=null ); + ShowFileRequestHandler handler = (ShowFileRequestHandler) h.getCore().getRequestHandler("/admin/file"); + assertTrue("file handler should have been automatically registered", handler != null); //System.out.println( handler.getHiddenFiles() ); // should not contain: solrconfig.xml scheam.xml admin-extra.html - assertFalse( handler.getHiddenFiles().contains( "scheam.xml".toUpperCase() ) ); - assertTrue( handler.getHiddenFiles().contains( "PROTWORDS.TXT" ) ); + assertFalse(handler.getHiddenFiles().contains("scheam.xml".toUpperCase())); + assertTrue(handler.getHiddenFiles().contains("PROTWORDS.TXT")); } + + public void testTermIndexInterval() throws Exception { + class ExposeWriterHandler extends DirectUpdateHandler2 { + public ExposeWriterHandler() throws IOException { + super(h.getCore()); + } + + public IndexWriter getWriter() throws IOException { + forceOpenWriter(); + return writer; + } + } + + IndexWriter writer = new ExposeWriterHandler().getWriter(); + int interval = writer.getTermIndexInterval(); + assertEquals(256, interval); + } + + public void testTermIndexDivisor() throws Exception { + IndexReaderFactory irf = h.getCore().getIndexReaderFactory(); + StandardIndexReaderFactory sirf = (StandardIndexReaderFactory) irf; + assertEquals(12, sirf.termInfosIndexDivisor); + RefCounted refCounted = h.getCore().getSearcher(); + SolrIndexReader solrReader = refCounted.get().getReader(); + assertEquals(12, solrReader.getTermInfosIndexDivisor()); + } + + } + + diff --git a/src/test/test-files/solr/conf/solrconfig-termindex.xml b/src/test/test-files/solr/conf/solrconfig-termindex.xml new file mode 100644 index 00000000000..65537ea2ed2 --- /dev/null +++ b/src/test/test-files/solr/conf/solrconfig-termindex.xml @@ -0,0 +1,459 @@ + + + + + + + + + + + + ${solr.data.dir:./solr/data} + + + + + false + 10 + + + + 32 + 2147483647 + 10000 + 1000 + 10000 + + + false + + + org.apache.lucene.index.LogByteSizeMergePolicy + + + org.apache.lucene.index.ConcurrentMergeScheduler + + 1000 + 10000 + + 256 + + single + + + + + false + 10 + + 10 + + 256 + + 2147483647 + 10000 + org.apache.lucene.index.LogDocMergePolicy + + true + + + + + + + + + + + + + + + 12 + + + + + 1024 + + + + + + + + + + + true + + + + + + + 10 + + + + + + + + + + + + + + + + + + + + + + + + + true + + + + 0.01 + + text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0 + + + text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5 + + + ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3 + + + 3<-1 5<-2 6<90% + + 100 + + + + *:* + 0.01 + + text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0 + + + text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5 + + + ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3 + + + 3<-1 5<-2 6<90% + + 100 + + + + 1000 + 1.4142135 + 12 + foo + + + sqrt 2 + log 10 + + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + false + + + + + string + elevate.xml + + + + + explicit + + + elevate + + + + + lowerfilt + + + default + lowerfilt + spellchecker1 + true + + + + jarowinkler + lowerfilt + + org.apache.lucene.search.spell.JaroWinklerDistance + spellchecker2 + + + + solr.FileBasedSpellChecker + external + spellings.txt + UTF-8 + spellchecker3 + + + + + + + + termsComp + + + + + + + + + false + + false + + 1 + + + spellcheck + + + + + + + + + + + + tvComponent + + + + + + + + 100 + + + + + + 70 + + + + + + + ]]> + ]]> + + + + + + + + + + max-age=30, public + + + + + solr + solrconfig.xml scheam.xml admin-extra.html + + + + prefix-${solr.test.sys.prop2}-suffix + + + + + + false + true + v_t,t_field + org.apache.solr.update.processor.TextProfileSignature + + + + +