SOLR-3634: CoreContainer and CoreAdminHandler will now remember and report back information about failures to initialize SolrCores

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1366241 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2012-07-27 00:01:00 +00:00
parent a5f1159a6f
commit a7cd5f3384
5 changed files with 496 additions and 3 deletions

View File

@ -88,6 +88,10 @@ New Features
lib folder. The factories are automatically made available with SPI.
(Chris Male, Robert Muir, Uwe Schindler)
* SOLR-3634: CoreContainer and CoreAdminHandler will now remember and report
back information about failures to initialize SolrCores. These failures will
be accessible from the STATUS command until they are "reset" by
creating/renaming a SolrCore with the same name. (hossman)
Bug Fixes
----------------------

View File

@ -109,6 +109,10 @@ public class CoreContainer
protected final Map<String, SolrCore> cores = new LinkedHashMap<String, SolrCore>();
protected final Map<String,Exception> coreInitFailures =
Collections.synchronizedMap(new LinkedHashMap<String,Exception>());
protected boolean persistent = false;
protected String adminPath = null;
protected String managementPath = null;
@ -676,6 +680,7 @@ public class CoreContainer
throw new IllegalStateException("This CoreContainer has been shutdown");
}
old = cores.put(name, core);
coreInitFailures.remove(name);
/*
* set both the name of the descriptor and the name of the
* core, since the descriptors name is used for persisting.
@ -750,6 +755,44 @@ public class CoreContainer
* @throws org.xml.sax.SAXException
*/
public SolrCore create(CoreDescriptor dcore) throws ParserConfigurationException, IOException, SAXException {
// :TODO: would be really nice if this method wrapped any underlying errors and only threw SolrException
final String name = dcore.getName();
Exception failure = null;
try {
// :nocommit: refactor doCreate completley into this method - only did it this way so patch would be straight forward w/o huge amounts of indenting changes
SolrCore core = doCreate(dcore);
coreInitFailures.remove(name);
return core;
// :TODO: Java7...
// http://docs.oracle.com/javase/7/docs/technotes/guides/language/catch-multiple.html
} catch (ParserConfigurationException e1) {
failure = e1;
throw e1;
} catch (IOException e2) {
failure = e2;
throw e2;
} catch (SAXException e3) {
failure = e3;
throw e3;
} catch (RuntimeException e4) {
failure = e4;
throw e4;
} finally {
if (null != failure) {
synchronized (coreInitFailures) {
// remove first so insertion order is updated and newest is last
coreInitFailures.remove(name);
coreInitFailures.put(name, failure);
}
}
}
}
private SolrCore doCreate(CoreDescriptor dcore) throws ParserConfigurationException, IOException, SAXException {
// Make the instanceDir relative to the cores instanceDir if not absolute
File idir = new File(dcore.getInstanceDir());
if (!idir.isAbsolute()) {
@ -886,6 +929,32 @@ public class CoreContainer
return lst;
}
/**
* Returns an immutable Map of Exceptions that occured when initializing
* SolrCores (either at startup, or do to runtime requests to create cores)
* keyed off of the name (String) of the SolrCore that had the Exception
* during initialization.
* <p>
* While the Map returned by this method is immutable and will not change
* once returned to the client, the source data used to generate this Map
* can be changed as various SolrCore operations are performed:
* </p>
* <ul>
* <li>Failed attempts to create new SolrCores will add new Exceptions.</li>
* <li>Failed attempts to re-create a SolrCore using a name already contained in this Map will replace the Exception.</li>
* <li>Failed attempts to reload a SolrCore will cause an Exception to be added to this list -- even though the existing SolrCore with that name will continue to be available.</li>
* <li>Successful attempts to re-created a SolrCore using a name already contained in this Map will remove the Exception.</li>
* <li>Registering an existing SolrCore with a name already contained in this Map (ie: ALIAS or SWAP) will remove the Exception.</li>
* </ul>
*/
public Map<String,Exception> getCoreInitFailures() {
synchronized ( coreInitFailures ) {
return Collections.unmodifiableMap(new LinkedHashMap<String,Exception>
(coreInitFailures));
}
}
// ---------------- Core name related methods ---------------
/**
* Recreates a SolrCore.
@ -897,8 +966,43 @@ public class CoreContainer
* @throws IOException
* @throws SAXException
*/
public void reload(String name) throws ParserConfigurationException, IOException, SAXException {
// :TODO: would be really nice if this method wrapped any underlying errors and only threw SolrException
Exception failure = null;
try {
// :nocommit: refactor doReload completley into this method - only did it this way so patch would be straight forward w/o huge amounts of indenting changes
doReload(name);
coreInitFailures.remove(name);
return;
// :TODO: Java7...
// http://docs.oracle.com/javase/7/docs/technotes/guides/language/catch-multiple.html
} catch (ParserConfigurationException e1) {
failure = e1;
throw e1;
} catch (IOException e2) {
failure = e2;
throw e2;
} catch (SAXException e3) {
failure = e3;
throw e3;
} catch (RuntimeException e4) {
failure = e4;
throw e4;
} finally {
if (null != failure) {
synchronized (coreInitFailures) {
// remove first so insertion order is updated and newest is last
coreInitFailures.remove(name);
coreInitFailures.put(name, failure);
}
}
}
}
private void doReload(String name) throws ParserConfigurationException, IOException, SAXException {
name= checkDefault(name);
SolrCore core;
synchronized(cores) {

View File

@ -19,11 +19,13 @@ package org.apache.solr.handler.admin;
import java.io.File;
import java.io.IOException;
import java.util.Map;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.util.Collections;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.index.DirectoryReader;
@ -567,13 +569,19 @@ public class CoreAdminHandler extends RequestHandlerBase {
String cname = params.get(CoreAdminParams.CORE);
boolean doPersist = false;
NamedList<Object> status = new SimpleOrderedMap<Object>();
Map<String,Exception> allFailures = coreContainer.getCoreInitFailures();
try {
if (cname == null) {
rsp.add("defaultCoreName", coreContainer.getDefaultCoreName());
for (String name : coreContainer.getCoreNames()) {
status.add(name, getCoreStatus(coreContainer, name));
}
rsp.add("initFailures", allFailures);
} else {
Map failures = allFailures.containsKey(cname)
? Collections.singletonMap(cname, allFailures.get(cname))
: Collections.emptyMap();
rsp.add("initFailures", failures);
status.add(cname, getCoreStatus(coreContainer, cname));
}
rsp.add("status", status);

View File

@ -0,0 +1,331 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core;
import java.util.Map;
import java.util.Collection;
import java.util.regex.Pattern;
import java.io.File;
import org.apache.solr.common.SolrException;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.lucene.util.IOUtils;
import org.apache.commons.io.FileUtils;
import org.junit.Before;
import org.junit.After;
public class CoreContainerCoreInitFailuresTest extends SolrTestCaseJ4 {
File solrHome = null;
CoreContainer cc = null;
private void init(final String dirSuffix) {
// would be nice to do this in an @Before method,
// but junit doesn't let @Before methods have test names
solrHome = new File(TEMP_DIR, this.getClass().getName() + "_" + dirSuffix);
assertTrue("Failed to mkdirs solrhome", solrHome.mkdirs());
cc = new CoreContainer(solrHome.getAbsolutePath());
}
@After
public void cleanUp() throws Exception {
if (cc != null) {
cc.shutdown();
cc = null;
}
if (null != solrHome) {
if (solrHome.exists()) {
FileUtils.deleteDirectory(solrHome);
}
solrHome = null;
}
}
public void testFlowWithEmpty() throws Exception {
// reused state
Map<String,Exception> failures = null;
Collection<String> cores = null;
Exception fail = null;
init("empty_flow");
// solr.xml
File solrXml = new File(solrHome, "solr.xml");
FileUtils.write(solrXml, EMPTY_SOLR_XML, IOUtils.CHARSET_UTF_8.toString());
// ----
// init the CoreContainer
cc.load(solrHome.getAbsolutePath(), solrXml);
// check that we have the cores we expect
cores = cc.getCoreNames();
assertNotNull("core names is null", cores);
assertEquals("wrong number of cores", 0, cores.size());
// check that we have the failures we expect
failures = cc.getCoreInitFailures();
assertNotNull("core failures is a null map", failures);
assertEquals("wrong number of core failures", 0, failures.size());
// -----
// try to add a collection with a path that doesn't exist
final CoreDescriptor bogus = new CoreDescriptor(cc, "bogus", "bogus_path");
try {
cc.create(bogus);
fail("bogus inst dir failed to trigger exception from create");
} catch (Exception e) {
// :TODO: should really tighten up the exceptions CoreContainer throws (ie: just SolrException)
assertTrue("init exception doesn't mention bogus dir: " + e.getMessage(),
0 < e.getMessage().indexOf("bogus_path"));
}
// check that we have the cores we expect
cores = cc.getCoreNames();
assertNotNull("core names is null", cores);
assertEquals("wrong number of cores", 0, cores.size());
// check that we have the failures we expect
failures = cc.getCoreInitFailures();
assertNotNull("core failures is a null map", failures);
assertEquals("wrong number of core failures", 1, failures.size());
fail = failures.get("bogus");
assertNotNull("null failure for test core", fail);
assertTrue("init failure doesn't mention problem: " + fail.getMessage(),
0 < fail.getMessage().indexOf("bogus_path"));
// let the test end here, with some recorded failures, and let cleanUp()
// verify that there is no problem shuting down CoreContainer with known
// SolrCore failures
}
public void testFlowBadFromStart() throws Exception {
// reused state
Map<String,Exception> failures = null;
Collection<String> cores = null;
Exception fail = null;
init("bad_flow");
// start with two collections: one valid, and one broken
File solrXml = new File(solrHome, "solr.xml");
FileUtils.write(solrXml, BAD_SOLR_XML, IOUtils.CHARSET_UTF_8.toString());
// our "ok" collection
FileUtils.copyFile(getFile("solr/collection1/conf/solrconfig-basic.xml"),
FileUtils.getFile(solrHome, "col_ok", "conf", "solrconfig.xml"));
FileUtils.copyFile(getFile("solr/collection1/conf/schema-minimal.xml"),
FileUtils.getFile(solrHome, "col_ok", "conf", "schema.xml"));
// our "bad" collection
ignoreException(Pattern.quote("DummyMergePolicy"));
FileUtils.copyFile(getFile("solr/collection1/conf/bad-mp-solrconfig.xml"),
FileUtils.getFile(solrHome, "col_bad", "conf", "solrconfig.xml"));
FileUtils.copyFile(getFile("solr/collection1/conf/schema-minimal.xml"),
FileUtils.getFile(solrHome, "col_bad", "conf", "schema.xml"));
// -----
// init the CoreContainer with the mix of ok/bad cores
cc.load(solrHome.getAbsolutePath(), solrXml);
// check that we have the cores we expect
cores = cc.getCoreNames();
assertNotNull("core names is null", cores);
assertEquals("wrong number of cores", 1, cores.size());
assertTrue("col_ok not found", cores.contains("col_ok"));
// check that we have the failures we expect
failures = cc.getCoreInitFailures();
assertNotNull("core failures is a null map", failures);
assertEquals("wrong number of core failures", 1, failures.size());
fail = failures.get("col_bad");
assertNotNull("null failure for test core", fail);
assertTrue("init failure doesn't mention problem: " + fail.getMessage(),
0 < fail.getMessage().indexOf("DummyMergePolicy"));
// -----
// "fix" the bad collection
FileUtils.copyFile(getFile("solr/collection1/conf/solrconfig-basic.xml"),
FileUtils.getFile(solrHome, "col_bad", "conf", "solrconfig.xml"));
final CoreDescriptor fixed = new CoreDescriptor(cc, "col_bad", "col_bad");
cc.register("col_bad", cc.create(fixed), false);
// check that we have the cores we expect
cores = cc.getCoreNames();
assertNotNull("core names is null", cores);
assertEquals("wrong number of cores", 2, cores.size());
assertTrue("col_ok not found", cores.contains("col_ok"));
assertTrue("col_bad not found", cores.contains("col_bad"));
// check that we have the failures we expect
failures = cc.getCoreInitFailures();
assertNotNull("core failures is a null map", failures);
assertEquals("wrong number of core failures", 0, failures.size());
// -----
// try to add a collection with a path that doesn't exist
final CoreDescriptor bogus = new CoreDescriptor(cc, "bogus", "bogus_path");
try {
cc.create(bogus);
fail("bogus inst dir failed to trigger exception from create");
} catch (Exception e) {
// :TODO: should really tighten up the exceptions CoreContainer throws (ie: just SolrException)
assertTrue("init exception doesn't mention bogus dir: " + e.getMessage(),
0 < e.getMessage().indexOf("bogus_path"));
}
// check that we have the cores we expect
cores = cc.getCoreNames();
assertNotNull("core names is null", cores);
assertEquals("wrong number of cores", 2, cores.size());
assertTrue("col_ok not found", cores.contains("col_ok"));
assertTrue("col_bad not found", cores.contains("col_bad"));
// check that we have the failures we expect
failures = cc.getCoreInitFailures();
assertNotNull("core failures is a null map", failures);
assertEquals("wrong number of core failures", 1, failures.size());
fail = failures.get("bogus");
assertNotNull("null failure for test core", fail);
assertTrue("init failure doesn't mention problem: " + fail.getMessage(),
0 < fail.getMessage().indexOf("bogus_path"));
// -----
// register bogus as an alias for col_ok and confirm failure goes away
cc.register("bogus", cc.getCore("col_ok"), false);
// check that we have the cores we expect
cores = cc.getCoreNames();
assertNotNull("core names is null", cores);
assertEquals("wrong number of cores", 3, cores.size());
assertTrue("col_ok not found", cores.contains("col_ok"));
assertTrue("col_bad not found", cores.contains("col_bad"));
assertTrue("bogus not found", cores.contains("bogus"));
// check that we have the failures we expect
failures = cc.getCoreInitFailures();
assertNotNull("core failures is a null map", failures);
assertEquals("wrong number of core failures", 0, failures.size());
// -----
// break col_bad's config and try to RELOAD to add failure
final long col_bad_old_start = getCoreStartTime(cc, "col_bad");
FileUtils.write
(FileUtils.getFile(solrHome, "col_bad", "conf", "solrconfig.xml"),
"This is giberish, not valid XML <",
IOUtils.CHARSET_UTF_8.toString());
try {
cc.reload("col_bad");
fail("corrupd solrconfig.xml failed to trigger exception from reload");
} catch (Exception e) {
// :TODO: should really tighten up the exceptions CoreContainer throws (ie: just SolrException)
assertTrue("reload exception doesn't mention bad prolog: " + e.getMessage(),
0 < e.getMessage().indexOf("prolog"));
}
assertEquals("Failed core reload should not have changed start time",
col_bad_old_start, getCoreStartTime(cc, "col_bad"));
// check that we have the cores we expect
cores = cc.getCoreNames();
assertNotNull("core names is null", cores);
assertEquals("wrong number of cores", 3, cores.size());
assertTrue("col_ok not found", cores.contains("col_ok"));
assertTrue("col_bad not found", cores.contains("col_bad"));
assertTrue("bogus not found", cores.contains("bogus"));
// check that we have the failures we expect
failures = cc.getCoreInitFailures();
assertNotNull("core failures is a null map", failures);
assertEquals("wrong number of core failures", 1, failures.size());
fail = failures.get("col_bad");
assertNotNull("null failure for test core", fail);
assertTrue("init failure doesn't mention problem: " + fail.getMessage(),
0 < fail.getMessage().indexOf("prolog"));
// ----
// fix col_bad's config (again) and RELOAD to fix failure
FileUtils.copyFile(getFile("solr/collection1/conf/solrconfig-basic.xml"),
FileUtils.getFile(solrHome, "col_bad", "conf", "solrconfig.xml"));
cc.reload("col_bad");
assertTrue("Core reload should have changed start time",
col_bad_old_start < getCoreStartTime(cc, "col_bad"));
// check that we have the cores we expect
cores = cc.getCoreNames();
assertNotNull("core names is null", cores);
assertEquals("wrong number of cores", 3, cores.size());
assertTrue("col_ok not found", cores.contains("col_ok"));
assertTrue("col_bad not found", cores.contains("col_bad"));
assertTrue("bogus not found", cores.contains("bogus"));
// check that we have the failures we expect
failures = cc.getCoreInitFailures();
assertNotNull("core failures is a null map", failures);
assertEquals("wrong number of core failures", 0, failures.size());
}
private final long getCoreStartTime(final CoreContainer cc,
final String name) {
SolrCore tmp = cc.getCore(name);
try {
return tmp.getStartTime();
} finally {
tmp.close();
}
}
private static final String EMPTY_SOLR_XML ="<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" +
"<solr persistent=\"false\">\n" +
" <cores adminPath=\"/admin/cores\">\n" +
" </cores>\n" +
"</solr>";
private static final String BAD_SOLR_XML =
"<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" +
"<solr persistent=\"false\">\n" +
" <cores adminPath=\"/admin/cores\">\n" +
" <core name=\"col_ok\" instanceDir=\"col_ok\" />\n" +
" <core name=\"col_bad\" instanceDir=\"col_bad\" />\n" +
" </cores>\n" +
"</solr>";
}

View File

@ -20,16 +20,20 @@ package org.apache.solr.handler.admin;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.admin.CoreAdminHandler;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.SolrTestCaseJ4;
import java.util.Map;
import java.io.File;
import java.io.IOException;
import javax.xml.xpath.XPathExpressionException;
import org.apache.commons.io.FileUtils;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.BeforeClass;
import org.junit.Test;
import org.xml.sax.SAXException;
@ -94,7 +98,49 @@ public class CoreAdminHandlerTest extends SolrTestCaseJ4 {
,"/solr/cores/core[@name='props']/property[@name='hoss' and @value='man']"
,"/solr/cores/core[@name='props']/property[@name='foo' and @value='baz']"
);
// attempt to create a bogus core and confirm failure
try {
resp = new SolrQueryResponse();
admin.handleRequestBody
(req(CoreAdminParams.ACTION,
CoreAdminParams.CoreAdminAction.CREATE.toString(),
CoreAdminParams.NAME, "bogus_dir_core",
CoreAdminParams.INSTANCE_DIR, "dir_does_not_exist_127896"),
resp);
fail("bogus collection created ok");
} catch (SolrException e) {
// :NOOP:
// :TODO: CoreAdminHandler's exception messages are terrible, otherwise we could asert something useful here
}
// check specificly for status of the failed core name
resp = new SolrQueryResponse();
admin.handleRequestBody
(req(CoreAdminParams.ACTION,
CoreAdminParams.CoreAdminAction.STATUS.toString(),
CoreAdminParams.CORE, "bogus_dir_core"),
resp);
Map<String,Exception> failures =
(Map<String,Exception>) resp.getValues().get("initFailures");
assertNotNull("core failures is null", failures);
NamedList<Object> status =
(NamedList<Object>)resp.getValues().get("status");
assertNotNull("core status is null", status);
assertEquals("wrong number of core failures", 1, failures.size());
Exception fail = failures.get("bogus_dir_core");
assertNotNull("null failure for test core", fail);
assertTrue("init failure doesn't mention problem: " + fail.getMessage(),
0 < fail.getMessage().indexOf("dir_does_not_exist"));
assertEquals("bogus_dir_core status isn't empty",
0, ((NamedList)status.get("bogus_dir_core")).size());
// :TODO: because of SOLR-3665 we can't ask for status from all cores
}