From 77a4e2fa4f081de0245afc0c90527c7c34c01de2 Mon Sep 17 00:00:00 2001 From: Grant Ingersoll Date: Thu, 26 Jun 2008 21:14:06 +0000 Subject: [PATCH] SOLR-603: Add partial optimize capabilities and deprecate DirectUpdateHandler git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@672031 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 + .../apache/solr/client/solrj/SolrServer.java | 8 +- .../client/solrj/request/UpdateRequest.java | 6 + .../solr/common/params/UpdateParams.java | 4 + .../solr/handler/RequestHandlerUtils.java | 2 + .../solr/handler/XmlUpdateRequestHandler.java | 9 +- .../solr/update/CommitUpdateCommand.java | 7 + .../solr/update/DirectUpdateHandler.java | 4 +- .../solr/update/DirectUpdateHandler2.java | 9 +- .../DirectUpdateHandlerOptimizeTest.java | 94 ++++ .../solr/conf/solrconfig-duh-optimize.xml | 413 ++++++++++++++++++ 11 files changed, 548 insertions(+), 10 deletions(-) create mode 100644 src/test/org/apache/solr/update/DirectUpdateHandlerOptimizeTest.java create mode 100644 src/test/test-files/solr/conf/solrconfig-duh-optimize.xml diff --git a/CHANGES.txt b/CHANGES.txt index 012d62b80e3..45d7928d837 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -298,6 +298,8 @@ New Features (Shalin Shekhar Mangar, Bojan Smid, gsingers) 54. SOLR-423: Added Request Handler close hook notification so that RequestHandlers can be notified when a core is closing. (gsingers, ryan) + +55. SOLR-603: Added ability to partially optimize. (gsingers) Changes in runtime behavior 1. SOLR-559: use Lucene updateDocument, deleteDocuments methods. This diff --git a/client/java/solrj/src/org/apache/solr/client/solrj/SolrServer.java b/client/java/solrj/src/org/apache/solr/client/solrj/SolrServer.java index fb8fe6d90a2..425b1d21411 100644 --- a/client/java/solrj/src/org/apache/solr/client/solrj/SolrServer.java +++ b/client/java/solrj/src/org/apache/solr/client/solrj/SolrServer.java @@ -95,7 +95,7 @@ public abstract class SolrServer implements Serializable * @throws IOException */ public UpdateResponse optimize( ) throws SolrServerException, IOException { - return optimize(true, true); + return optimize(true, true, 1); } public UpdateResponse commit( boolean waitFlush, boolean waitSearcher ) throws SolrServerException, IOException { @@ -103,7 +103,11 @@ public abstract class SolrServer implements Serializable } public UpdateResponse optimize( boolean waitFlush, boolean waitSearcher ) throws SolrServerException, IOException { - return new UpdateRequest().setAction( UpdateRequest.ACTION.OPTIMIZE, waitFlush, waitSearcher ).process( this ); + return optimize(waitFlush, waitSearcher, 1); + } + + public UpdateResponse optimize(boolean waitFlush, boolean waitSearcher, int maxSegments ) throws SolrServerException, IOException { + return new UpdateRequest().setAction( UpdateRequest.ACTION.OPTIMIZE, waitFlush, waitSearcher, maxSegments ).process( this ); } public UpdateResponse deleteById(String id) throws SolrServerException, IOException { diff --git a/client/java/solrj/src/org/apache/solr/client/solrj/request/UpdateRequest.java b/client/java/solrj/src/org/apache/solr/client/solrj/request/UpdateRequest.java index d13acfabbce..e7b224492ed 100644 --- a/client/java/solrj/src/org/apache/solr/client/solrj/request/UpdateRequest.java +++ b/client/java/solrj/src/org/apache/solr/client/solrj/request/UpdateRequest.java @@ -127,11 +127,16 @@ public class UpdateRequest extends SolrRequest /** Sets appropriate parameters for the given ACTION */ public UpdateRequest setAction(ACTION action, boolean waitFlush, boolean waitSearcher ) { + return setAction(action, waitFlush, waitSearcher, 1); + } + + public UpdateRequest setAction(ACTION action, boolean waitFlush, boolean waitSearcher, int maxSegments ) { if (params == null) params = new ModifiableSolrParams(); if( action == ACTION.OPTIMIZE ) { params.set( UpdateParams.OPTIMIZE, "true" ); + params.set(UpdateParams.MAX_OPTIMIZE_SEGMENTS, maxSegments); } else if( action == ACTION.COMMIT ) { params.set( UpdateParams.COMMIT, "true" ); @@ -140,6 +145,7 @@ public class UpdateRequest extends SolrRequest params.set( UpdateParams.WAIT_SEARCHER, waitSearcher+"" ); return this; } + public void setParam(String param, String value) { if (params == null) diff --git a/src/java/org/apache/solr/common/params/UpdateParams.java b/src/java/org/apache/solr/common/params/UpdateParams.java index 50fa1931e3a..918767d6006 100644 --- a/src/java/org/apache/solr/common/params/UpdateParams.java +++ b/src/java/org/apache/solr/common/params/UpdateParams.java @@ -42,4 +42,8 @@ public interface UpdateParams /** Select the update processor to use. A RequestHandler may or may not respect this parameter */ public static final String UPDATE_PROCESSOR = "update.processor"; + /** + * If optimizing, set the maximum number of segments left in the index after optimization. 1 is the default (and is equivalent to calling IndexWriter.optimize() in Lucene). + */ + public static final String MAX_OPTIMIZE_SEGMENTS = "maxSegments"; } diff --git a/src/java/org/apache/solr/handler/RequestHandlerUtils.java b/src/java/org/apache/solr/handler/RequestHandlerUtils.java index 8492234137f..ebc83dbef1c 100755 --- a/src/java/org/apache/solr/handler/RequestHandlerUtils.java +++ b/src/java/org/apache/solr/handler/RequestHandlerUtils.java @@ -67,6 +67,7 @@ public class RequestHandlerUtils CommitUpdateCommand cmd = new CommitUpdateCommand( optimize ); cmd.waitFlush = params.getBool( UpdateParams.WAIT_FLUSH, cmd.waitFlush ); cmd.waitSearcher = params.getBool( UpdateParams.WAIT_SEARCHER, cmd.waitSearcher ); + cmd.maxOptimizeSegments = params.getInt(UpdateParams.MAX_OPTIMIZE_SEGMENTS, cmd.maxOptimizeSegments); req.getCore().getUpdateHandler().commit( cmd ); // Lets wait till after solr1.2 to define consistent output format @@ -99,6 +100,7 @@ public class RequestHandlerUtils CommitUpdateCommand cmd = new CommitUpdateCommand( optimize ); cmd.waitFlush = params.getBool( UpdateParams.WAIT_FLUSH, cmd.waitFlush ); cmd.waitSearcher = params.getBool( UpdateParams.WAIT_SEARCHER, cmd.waitSearcher ); + cmd.maxOptimizeSegments = params.getInt(UpdateParams.MAX_OPTIMIZE_SEGMENTS, cmd.maxOptimizeSegments); processor.processCommit( cmd ); return true; } diff --git a/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java b/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java index 2f8026c32cf..93f9a8d4320 100644 --- a/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java +++ b/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java @@ -20,6 +20,7 @@ package org.apache.solr.handler; import java.io.IOException; import java.io.Reader; import java.io.Writer; +import java.io.File; import java.util.HashMap; import java.util.logging.Logger; @@ -75,7 +76,8 @@ public class XmlUpdateRequestHandler extends RequestHandlerBase public static final String ALLOW_DUPS = "allowDups"; XMLInputFactory inputFactory; - + + @Override public void init(NamedList args) { @@ -210,7 +212,10 @@ public class XmlUpdateRequestHandler extends RequestHandlerBase } else if (WAIT_SEARCHER.equals(attrName)) { cmd.waitSearcher = StrUtils.parseBoolean(attrVal); sawWaitSearcher = true; - } else { + } else if (UpdateParams.MAX_OPTIMIZE_SEGMENTS.equals(attrName)){ + cmd.maxOptimizeSegments = Integer.parseInt(attrVal); + } + else { log.warning("unexpected attribute commit/@" + attrName); } } diff --git a/src/java/org/apache/solr/update/CommitUpdateCommand.java b/src/java/org/apache/solr/update/CommitUpdateCommand.java index 2f47da79490..171efc409f6 100644 --- a/src/java/org/apache/solr/update/CommitUpdateCommand.java +++ b/src/java/org/apache/solr/update/CommitUpdateCommand.java @@ -24,6 +24,13 @@ public class CommitUpdateCommand extends UpdateCommand { public boolean waitFlush; public boolean waitSearcher=true; + /** + * During optimize, optimize down to <= this many segments. Must be >= 1 + * + * @see {@link org.apache.lucene.index.IndexWriter#optimize(int)} + */ + public int maxOptimizeSegments = 1; + public CommitUpdateCommand(boolean optimize) { super("commit"); this.optimize=optimize; diff --git a/src/java/org/apache/solr/update/DirectUpdateHandler.java b/src/java/org/apache/solr/update/DirectUpdateHandler.java index 824272eeb8e..6744d617feb 100644 --- a/src/java/org/apache/solr/update/DirectUpdateHandler.java +++ b/src/java/org/apache/solr/update/DirectUpdateHandler.java @@ -49,6 +49,8 @@ import org.apache.solr.core.SolrCore; * * @version $Id$ * @since solr 0.9 + * + * @deprecated Use {@link DirectUpdateHandler2} instead. This is only kept around for back-compatibility (way back). */ public class DirectUpdateHandler extends UpdateHandler { @@ -236,7 +238,7 @@ public class DirectUpdateHandler extends UpdateHandler { closeSearcher(); // flush any deletes if (cmd.optimize) { openWriter(); // writer needs to be open to optimize - writer.optimize(); + writer.optimize(cmd.maxOptimizeSegments); } closeWriter(); diff --git a/src/java/org/apache/solr/update/DirectUpdateHandler2.java b/src/java/org/apache/solr/update/DirectUpdateHandler2.java index 6ff321240df..2c5170a94b6 100644 --- a/src/java/org/apache/solr/update/DirectUpdateHandler2.java +++ b/src/java/org/apache/solr/update/DirectUpdateHandler2.java @@ -360,8 +360,8 @@ public class DirectUpdateHandler2 extends UpdateHandler { if (cmd.optimize) { closeSearcher(); - openWriter(); - writer.optimize(); + openWriter(); + writer.optimize(cmd.maxOptimizeSegments); } closeSearcher(); @@ -402,8 +402,6 @@ public class DirectUpdateHandler2 extends UpdateHandler { SolrException.log(log,e); } } - - return; } @@ -521,7 +519,8 @@ public class DirectUpdateHandler2 extends UpdateHandler { try { CommitUpdateCommand command = new CommitUpdateCommand( false ); command.waitFlush = true; - command.waitSearcher = true; + command.waitSearcher = true; + //no need for command.maxOptimizeSegments = 1; since it is not optimizing commit( command ); autoCommitCount++; } diff --git a/src/test/org/apache/solr/update/DirectUpdateHandlerOptimizeTest.java b/src/test/org/apache/solr/update/DirectUpdateHandlerOptimizeTest.java new file mode 100644 index 00000000000..aba696edc2d --- /dev/null +++ b/src/test/org/apache/solr/update/DirectUpdateHandlerOptimizeTest.java @@ -0,0 +1,94 @@ +package org.apache.solr.update; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.solr.core.SolrCore; +import org.apache.solr.util.AbstractSolrTestCase; + +import java.io.File; +import java.io.FileFilter; + + +/** + * + * + **/ +public class DirectUpdateHandlerOptimizeTest extends AbstractSolrTestCase { + + public String getSchemaFile() { + return "schema.xml"; + } + + public String getSolrConfigFile() { + return "solrconfig-duh-optimize.xml"; + } + + + public void testOptimize() throws Exception { + SolrCore core = h.getCore(); + + UpdateHandler updater = core.getUpdateHandler(); + AddUpdateCommand cmd = new AddUpdateCommand(); + cmd.overwriteCommitted = true; + cmd.overwritePending = true; + cmd.allowDups = false; + //add just under the merge factor, so no segments are merged + //the merge factor is 1000 and the maxBufferedDocs is 2, so there should be 500 segments (498 segs each w/ 2 docs, and 1 segment with 1 doc) + for (int i = 0; i < 999; i++) { + // Add a valid document + cmd.doc = new Document(); + cmd.doc.add(new Field("id", "id_" + i, Field.Store.YES, Field.Index.UN_TOKENIZED)); + cmd.doc.add(new Field("subject", "subject_" + i, Field.Store.NO, Field.Index.TOKENIZED)); + updater.addDoc(cmd); + } + + CommitUpdateCommand cmtCmd = new CommitUpdateCommand(false); + updater.commit(cmtCmd); + + String indexDir = core.getIndexDir(); + assertNumSegments(indexDir, 500); + + //now do an optimize + cmtCmd = new CommitUpdateCommand(true); + cmtCmd.maxOptimizeSegments = 250; + updater.commit(cmtCmd); + assertNumSegments(indexDir, 250); + + cmtCmd.maxOptimizeSegments = -1; + try { + updater.commit(cmtCmd); + assertTrue(false); + } catch (IllegalArgumentException e) { + } + cmtCmd.maxOptimizeSegments = 1; + updater.commit(cmtCmd); + assertNumSegments(indexDir, 1); + } + + private void assertNumSegments(String indexDir, int numSegs) { + File file = new File(indexDir); + File[] segs = file.listFiles(new FileFilter() { + public boolean accept(File file) { + return file.getName().endsWith("tii"); + } + }); + assertTrue("Wrong number of segments: " + segs.length + " does not equal: " + numSegs, segs.length == numSegs); + } + +} diff --git a/src/test/test-files/solr/conf/solrconfig-duh-optimize.xml b/src/test/test-files/solr/conf/solrconfig-duh-optimize.xml new file mode 100644 index 00000000000..d5fa26e3128 --- /dev/null +++ b/src/test/test-files/solr/conf/solrconfig-duh-optimize.xml @@ -0,0 +1,413 @@ + + + + + + + + + + ${solr.data.dir:./solr/data} + + + + + false + + 1000 + 2 + + + + + 2147483647 + 10000 + 1000 + 10000 + + + false + + + org.apache.lucene.index.LogByteSizeMergePolicy + + + org.apache.lucene.index.ConcurrentMergeScheduler + + 1000 + 10000 + + single + + + + + false + 1000 + 2 + 2147483647 + 10000 + + true + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + true + + + + + true + + 10 + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.01 + + text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0 + + + text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5 + + + ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3 + + + 3<-1 5<-2 6<90% + + 100 + + + + *:* + 0.01 + + text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0 + + + text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5 + + + ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3 + + + 3<-1 5<-2 6<90% + + 100 + + + + 1000 + 1.4142135 + 12 + foo + + + sqrt 2 + log 10 + + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + + + string + elevate.xml + + + + + explicit + + + elevate + + + + + + + false + + false + + 1 + + lowerfilt + + + default + lowerfilt + ./spellchecker + + + + + jarowinkler + lowerfilt + + org.apache.lucene.search.spell.JaroWinklerDistance + ./spellchecker + + + + solr.FileBasedSpellChecker + external + spellings.txt + UTF-8 + ./spellchecker + + + + + + + + spellcheck + + + + + + + + 100 + + + + + + 70 + + + + + + + ]]> + ]]> + + + + + + + + + + max-age=30, public + + + + + solr + solrconfig.xml scheam.xml admin-extra.html + + + + prefix-${solr.test.sys.prop2}-suffix + + + + + +