SOLR-5729: Merge in bug fixes and minor improvements for mapreduce and morphlines contribs.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1569794 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2014-02-19 15:46:24 +00:00
parent 86ee00c352
commit c4b4582dda
31 changed files with 417 additions and 199 deletions

View File

@ -191,7 +191,7 @@ org.gagravarr.vorbis.java.version = 0.1
/org.gagravarr/vorbis-java-core = ${org.gagravarr.vorbis.java.version}
/org.gagravarr/vorbis-java-tika = ${org.gagravarr.vorbis.java.version}
org.kitesdk.kite-morphlines.version = 0.10.0
org.kitesdk.kite-morphlines.version = 0.11.0
/org.kitesdk/kite-morphlines-avro = ${org.kitesdk.kite-morphlines.version}
/org.kitesdk/kite-morphlines-core = ${org.kitesdk.kite-morphlines.version}
/org.kitesdk/kite-morphlines-hadoop-sequencefile = ${org.kitesdk.kite-morphlines.version}

View File

@ -56,6 +56,30 @@ Other Changes
* SOLR-4792: Stop shipping a .war. (Robert Muir)
================== 4.8.0 ==================
Versions of Major Components
---------------------
Apache Tika 1.4
Carrot2 3.8.0
Velocity 1.7 and Velocity Tools 2.0
Apache UIMA 2.3.1
Apache ZooKeeper 3.4.5
Detailed Change List
----------------------
New Features
----------------------
Bug Fixes
----------------------
* SOLR-5729: Merge in bug fixes and minor improvements for mapreduce and
morphlines contribs. (Patrick Hunt via Mark Miller)
================== 4.7.0 ==================
Versions of Major Components

View File

@ -18,7 +18,6 @@ package org.apache.solr.morphlines.cell;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@ -45,15 +44,12 @@ import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.TeeContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.tika.sax.xpath.Matcher;
import org.apache.tika.sax.xpath.MatchingContentHandler;
import org.apache.tika.sax.xpath.XPathParser;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.kitesdk.morphline.api.Command;
import org.kitesdk.morphline.api.CommandBuilder;
@ -64,6 +60,9 @@ import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Configs;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.stdio.AbstractParser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.ArrayListMultimap;
@ -102,6 +101,7 @@ public final class SolrCellBuilder implements CommandBuilder {
private final String xpathExpr;
private final List<Parser> parsers = new ArrayList();
private final SolrContentHandlerFactory solrContentHandlerFactory;
private final Locale locale;
private final SolrParams solrParams;
private final Map<MediaType, Parser> mediaTypeToParserMap;
@ -163,6 +163,8 @@ public final class SolrCellBuilder implements CommandBuilder {
}
this.solrContentHandlerFactory = getSolrContentHandlerFactory(factoryClass, dateFormats, config);
this.locale = getLocale(getConfigs().getString(config, "locale", ""));
this.mediaTypeToParserMap = new HashMap<MediaType, Parser>();
//MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(); // FIXME getMediaTypeRegistry.normalize()
@ -222,6 +224,7 @@ public final class SolrCellBuilder implements CommandBuilder {
}
ParseContext parseContext = new ParseContext();
parseContext.set(Locale.class, locale);
Metadata metadata = new Metadata();
for (Entry<String, Object> entry : record.getFields().entries()) {
@ -233,12 +236,6 @@ public final class SolrCellBuilder implements CommandBuilder {
inputStream = TikaInputStream.get(inputStream);
ContentHandler parsingHandler = handler;
StringWriter debugWriter = null;
if (LOG.isTraceEnabled()) {
debugWriter = new StringWriter();
ContentHandler serializer = new XMLSerializer(debugWriter, new OutputFormat("XML", "UTF-8", true));
parsingHandler = new TeeContentHandler(parsingHandler, serializer);
}
// String xpathExpr = "/xhtml:html/xhtml:body/xhtml:div/descendant:node()";
if (xpathExpr != null) {
@ -255,8 +252,6 @@ public final class SolrCellBuilder implements CommandBuilder {
} catch (TikaException e) {
throw new MorphlineRuntimeException("Cannot parse", e);
}
LOG.trace("debug XML doc: {}", debugWriter);
} finally {
if (inputStream != null) {
Closeables.closeQuietly(inputStream);
@ -336,6 +331,18 @@ public final class SolrCellBuilder implements CommandBuilder {
return record;
}
private Locale getLocale(String name) {
for (Locale locale : Locale.getAvailableLocales()) {
if (locale.toString().equals(name)) {
return locale;
}
}
assert Locale.ROOT.toString().equals("");
if (name.equals(Locale.ROOT.toString())) {
return Locale.ROOT;
}
throw new MorphlineCompilationException("Unknown locale: " + name, getConfig());
}
}
}

View File

@ -1,4 +1,4 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@ -50,23 +50,23 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
public void setUp() throws Exception {
super.setUp();
String path = RESOURCES_DIR + "/test-documents";
expectedRecords.put(path + "/sample-statuses-20120906-141433.avro", 2);
expectedRecords.put(path + "/sample-statuses-20120906-141433", 2);
expectedRecords.put(path + "/sample-statuses-20120906-141433.gz", 2);
expectedRecords.put(path + "/sample-statuses-20120906-141433.bz2", 2);
expectedRecords.put(path + "/cars.csv", 6);
expectedRecords.put(path + "/cars.csv.gz", 6);
expectedRecords.put(path + "/cars.tar.gz", 4);
expectedRecords.put(path + "/cars.tsv", 6);
expectedRecords.put(path + "/cars.ssv", 6);
expectedRecords.put(path + "/test-documents.7z", 9);
expectedRecords.put(path + "/test-documents.cpio", 9);
expectedRecords.put(path + "/test-documents.tar", 9);
expectedRecords.put(path + "/test-documents.tbz2", 9);
expectedRecords.put(path + "/test-documents.tgz", 9);
expectedRecords.put(path + "/test-documents.zip", 9);
expectedRecords.put(path + "/multiline-stacktrace.log", 4);
String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
expectedRecords.put(path + "sample-statuses-20120906-141433.avro", 2);
expectedRecords.put(path + "sample-statuses-20120906-141433", 2);
expectedRecords.put(path + "sample-statuses-20120906-141433.gz", 2);
expectedRecords.put(path + "sample-statuses-20120906-141433.bz2", 2);
expectedRecords.put(path + "cars.csv", 6);
expectedRecords.put(path + "cars.csv.gz", 6);
expectedRecords.put(path + "cars.tar.gz", 4);
expectedRecords.put(path + "cars.tsv", 6);
expectedRecords.put(path + "cars.ssv", 6);
expectedRecords.put(path + "test-documents.7z", 9);
expectedRecords.put(path + "test-documents.cpio", 9);
expectedRecords.put(path + "test-documents.tar", 9);
expectedRecords.put(path + "test-documents.tbz2", 9);
expectedRecords.put(path + "test-documents.tgz", 9);
expectedRecords.put(path + "test-documents.zip", 9);
expectedRecords.put(path + "multiline-stacktrace.log", 4);
{
Map<String, Object> record = new LinkedHashMap();
@ -81,7 +81,7 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
}
{
String file = path + "/testWORD_various.doc";
String file = path + "testWORD_various.doc";
Map<String, Object> record = new LinkedHashMap();
record.put("ignored__attachment_mimetype", "application/msword");
record.put("ignored_author", "Michael McCandless");
@ -94,7 +94,7 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
}
{
String file = path + "/testPDF.pdf";
String file = path + "testPDF.pdf";
Map<String, Object> record = new LinkedHashMap();
record.put("ignored__attachment_mimetype", "application/pdf");
record.put("ignored_author", "Bertrand Delacrétaz");
@ -106,7 +106,7 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
}
{
String file = path + "/email.eml";
String file = path + "email.eml";
Map<String, Object> record = new LinkedHashMap();
String name = "Patrick Foo <foo@cloudera.com>";
record.put("ignored__attachment_mimetype", "message/rfc822");
@ -127,7 +127,7 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
}
{
String file = path + "/testEXCEL.xlsx";
String file = path + "testEXCEL.xlsx";
Map<String, Object> record = new LinkedHashMap();
record.put("ignored__attachment_mimetype", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
record.put("ignored_author", "Keith Bennett");
@ -142,23 +142,23 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
@Test
public void testSolrCellJPGCompressed() throws Exception {
morphline = createMorphline("test-morphlines/solrCellJPGCompressed");
String path = RESOURCES_DIR + "/test-documents";
morphline = createMorphline("test-morphlines" + File.separator + "solrCellJPGCompressed");
String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
String[] files = new String[] {
path + "/testJPEG_EXIF.jpg",
path + "/testJPEG_EXIF.jpg.gz",
path + "/testJPEG_EXIF.jpg.tar.gz",
//path + "/jpeg2000.jp2",
path + "testJPEG_EXIF.jpg",
path + "testJPEG_EXIF.jpg.gz",
path + "testJPEG_EXIF.jpg.tar.gz",
//path + "jpeg2000.jp2",
};
testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
}
@Test
public void testSolrCellXML() throws Exception {
morphline = createMorphline("test-morphlines/solrCellXML");
String path = RESOURCES_DIR + "/test-documents";
morphline = createMorphline("test-morphlines" + File.separator + "solrCellXML");
String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
String[] files = new String[] {
path + "/testXML2.xml",
path + "testXML2.xml",
};
testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
}
@ -168,27 +168,27 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", false);
morphline = createMorphline(new File(tempDir).getAbsolutePath() + "/test-morphlines/solrCellDocumentTypes");
String path = RESOURCES_DIR + "/test-documents";
String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
String[] files = new String[] {
path + "/testBMPfp.txt",
path + "/boilerplate.html",
path + "/NullHeader.docx",
path + "/testWORD_various.doc",
path + "/testPDF.pdf",
path + "/testJPEG_EXIF.jpg",
path + "/testJPEG_EXIF.jpg.gz",
path + "/testJPEG_EXIF.jpg.tar.gz",
path + "/testXML.xml",
path + "/cars.csv",
// path + "/cars.tsv",
// path + "/cars.ssv",
path + "/cars.csv.gz",
path + "/cars.tar.gz",
path + "/sample-statuses-20120906-141433.avro",
path + "/sample-statuses-20120906-141433",
path + "/sample-statuses-20120906-141433.gz",
path + "/sample-statuses-20120906-141433.bz2",
path + "/email.eml",
path + "testBMPfp.txt",
path + "boilerplate.html",
path + "NullHeader.docx",
path + "testWORD_various.doc",
path + "testPDF.pdf",
path + "testJPEG_EXIF.jpg",
path + "testJPEG_EXIF.jpg.gz",
path + "testJPEG_EXIF.jpg.tar.gz",
path + "testXML.xml",
path + "cars.csv",
// path + "cars.tsv",
// path + "cars.ssv",
path + "cars.csv.gz",
path + "cars.tar.gz",
path + "sample-statuses-20120906-141433.avro",
path + "sample-statuses-20120906-141433",
path + "sample-statuses-20120906-141433.gz",
path + "sample-statuses-20120906-141433.bz2",
path + "email.eml",
};
testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
}
@ -199,58 +199,58 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", false);
morphline = createMorphline(new File(tempDir).getAbsolutePath() + "/test-morphlines/solrCellDocumentTypes");
String path = RESOURCES_DIR + "/test-documents";
String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
String[] files = new String[] {
path + "/testPPT_various.ppt",
path + "/testPPT_various.pptx",
path + "/testEXCEL.xlsx",
path + "/testEXCEL.xls",
path + "/testPages.pages",
//path + "/testNumbers.numbers",
//path + "/testKeynote.key",
path + "testPPT_various.ppt",
path + "testPPT_various.pptx",
path + "testEXCEL.xlsx",
path + "testEXCEL.xls",
path + "testPages.pages",
//path + "testNumbers.numbers",
//path + "testKeynote.key",
path + "/testRTFVarious.rtf",
path + "/complex.mbox",
path + "/test-outlook.msg",
path + "/testEMLX.emlx",
path + "/testRFC822",
path + "/rsstest.rss",
// path + "/testDITA.dita",
path + "testRTFVarious.rtf",
path + "complex.mbox",
path + "test-outlook.msg",
path + "testEMLX.emlx",
path + "testRFC822",
path + "rsstest.rss",
// path + "testDITA.dita",
path + "/testMP3i18n.mp3",
path + "/testAIFF.aif",
path + "/testFLAC.flac",
// path + "/testFLAC.oga",
// path + "/testVORBIS.ogg",
path + "/testMP4.m4a",
path + "/testWAV.wav",
// path + "/testWMA.wma",
path + "testMP3i18n.mp3",
path + "testAIFF.aif",
path + "testFLAC.flac",
// path + "testFLAC.oga",
// path + "testVORBIS.ogg",
path + "testMP4.m4a",
path + "testWAV.wav",
// path + "testWMA.wma",
path + "/testFLV.flv",
// path + "/testWMV.wmv",
path + "testFLV.flv",
// path + "testWMV.wmv",
path + "/testBMP.bmp",
path + "/testPNG.png",
path + "/testPSD.psd",
path + "/testSVG.svg",
path + "/testTIFF.tif",
path + "testBMP.bmp",
path + "testPNG.png",
path + "testPSD.psd",
path + "testSVG.svg",
path + "testTIFF.tif",
// path + "/test-documents.7z",
// path + "/test-documents.cpio",
// path + "/test-documents.tar",
// path + "/test-documents.tbz2",
// path + "/test-documents.tgz",
// path + "/test-documents.zip",
// path + "/test-zip-of-zip.zip",
// path + "/testJAR.jar",
// path + "test-documents.7z",
// path + "test-documents.cpio",
// path + "test-documents.tar",
// path + "test-documents.tbz2",
// path + "test-documents.tgz",
// path + "test-documents.zip",
// path + "test-zip-of-zip.zip",
// path + "testJAR.jar",
// path + "/testKML.kml",
// path + "/testRDF.rdf",
path + "/testVISIO.vsd",
// path + "/testWAR.war",
// path + "/testWindows-x86-32.exe",
// path + "/testWINMAIL.dat",
// path + "/testWMF.wmf",
// path + "testKML.kml",
// path + "testRDF.rdf",
path + "testVISIO.vsd",
// path + "testWAR.war",
// path + "testWindows-x86-32.exe",
// path + "testWINMAIL.dat",
// path + "testWMF.wmf",
};
testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
}

View File

@ -0,0 +1,151 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.morphlines.solr;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
class FileUtils {
//-----------------------------------------------------------------------
/**
* Deletes a directory recursively.
*
* @param directory directory to delete
* @throws IOException in case deletion is unsuccessful
*/
public static void deleteDirectory(File directory) throws IOException {
if (!directory.exists()) {
return;
}
if (!isSymlink(directory)) {
cleanDirectory(directory);
}
if (!directory.delete()) {
String message =
"Unable to delete directory " + directory + ".";
throw new IOException(message);
}
}
/**
* Determines whether the specified file is a Symbolic Link rather than an actual file.
* <p>
* Will not return true if there is a Symbolic Link anywhere in the path,
* only if the specific file is.
*
* @param file the file to check
* @return true if the file is a Symbolic Link
* @throws IOException if an IO error occurs while checking the file
* @since Commons IO 2.0
*/
public static boolean isSymlink(File file) throws IOException {
if (file == null) {
throw new NullPointerException("File must not be null");
}
// if (FilenameUtils.isSystemWindows()) {
if (File.separatorChar == '\\') {
return false;
}
File fileInCanonicalDir = null;
if (file.getParent() == null) {
fileInCanonicalDir = file;
} else {
File canonicalDir = file.getParentFile().getCanonicalFile();
fileInCanonicalDir = new File(canonicalDir, file.getName());
}
if (fileInCanonicalDir.getCanonicalFile().equals(fileInCanonicalDir.getAbsoluteFile())) {
return false;
} else {
return true;
}
}
/**
* Cleans a directory without deleting it.
*
* @param directory directory to clean
* @throws IOException in case cleaning is unsuccessful
*/
public static void cleanDirectory(File directory) throws IOException {
if (!directory.exists()) {
String message = directory + " does not exist";
throw new IllegalArgumentException(message);
}
if (!directory.isDirectory()) {
String message = directory + " is not a directory";
throw new IllegalArgumentException(message);
}
File[] files = directory.listFiles();
if (files == null) { // null if security restricted
throw new IOException("Failed to list contents of " + directory);
}
IOException exception = null;
for (File file : files) {
try {
forceDelete(file);
} catch (IOException ioe) {
exception = ioe;
}
}
if (null != exception) {
throw exception;
}
}
//-----------------------------------------------------------------------
/**
* Deletes a file. If file is a directory, delete it and all sub-directories.
* <p>
* The difference between File.delete() and this method are:
* <ul>
* <li>A directory to be deleted does not have to be empty.</li>
* <li>You get exceptions when a file or directory cannot be deleted.
* (java.io.File methods returns a boolean)</li>
* </ul>
*
* @param file file or directory to delete, must not be <code>null</code>
* @throws NullPointerException if the directory is <code>null</code>
* @throws FileNotFoundException if the file was not found
* @throws IOException in case deletion is unsuccessful
*/
public static void forceDelete(File file) throws IOException {
if (file.isDirectory()) {
deleteDirectory(file);
} else {
boolean filePresent = file.exists();
if (!file.delete()) {
if (!filePresent){
throw new FileNotFoundException("File does not exist: " + file);
}
String message =
"Unable to delete file: " + file;
throw new IOException(message);
}
}
}
}

View File

@ -16,16 +16,10 @@
*/
package org.apache.solr.morphlines.solr;
import org.kitesdk.morphline.api.MorphlineCompilationException;
import org.kitesdk.morphline.api.MorphlineContext;
import org.kitesdk.morphline.api.MorphlineRuntimeException;
import org.kitesdk.morphline.base.Configs;
import java.io.File;
import java.io.IOException;
import com.google.common.base.Preconditions;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import com.typesafe.config.ConfigRenderOptions;
import com.typesafe.config.ConfigUtil;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.impl.CloudSolrServer;
@ -35,16 +29,21 @@ import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.util.SystemIdResolver;
import org.apache.zookeeper.KeeperException;
import org.kitesdk.morphline.api.MorphlineCompilationException;
import org.kitesdk.morphline.api.MorphlineContext;
import org.kitesdk.morphline.api.MorphlineRuntimeException;
import org.kitesdk.morphline.base.Configs;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import com.google.common.base.Preconditions;
import com.google.common.io.Files;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import com.typesafe.config.ConfigRenderOptions;
import com.typesafe.config.ConfigUtil;
/**
* Set of configuration parameters that identify the location and schema of a Solr server or
@ -120,53 +119,65 @@ public class SolrLocator {
}
}
// If solrHomeDir isn't defined and zkHost and collectionName are defined
// then download schema.xml and solrconfig.xml, etc from zk and use that as solrHomeDir
String mySolrHomeDir = solrHomeDir;
if (solrHomeDir == null || solrHomeDir.length() == 0) {
if (zkHost == null || zkHost.length() == 0) {
// TODO: implement download from solrUrl if specified
throw new MorphlineCompilationException(
"Downloading a Solr schema requires either parameter 'solrHomeDir' or parameters 'zkHost' and 'collection'",
config);
}
if (collectionName == null || collectionName.length() == 0) {
throw new MorphlineCompilationException(
"Parameter 'zkHost' requires that you also pass parameter 'collection'", config);
}
ZooKeeperDownloader zki = new ZooKeeperDownloader();
SolrZkClient zkClient = zki.getZkClient(zkHost);
try {
String configName = zki.readConfigName(zkClient, collectionName);
File downloadedSolrHomeDir = zki.downloadConfigDir(zkClient, configName);
mySolrHomeDir = downloadedSolrHomeDir.getAbsolutePath();
} catch (KeeperException e) {
throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e);
} catch (InterruptedException e) {
throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e);
} catch (IOException e) {
throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e);
} finally {
zkClient.close();
}
}
LOG.debug("SolrLocator loading IndexSchema from dir {}", mySolrHomeDir);
File downloadedSolrHomeDir = null;
try {
SolrResourceLoader loader = new SolrResourceLoader(mySolrHomeDir);
SolrConfig solrConfig = new SolrConfig(loader, "solrconfig.xml", null);
InputSource is = new InputSource(loader.openSchema("schema.xml"));
is.setSystemId(SystemIdResolver.createSystemIdFromResourceName("schema.xml"));
// If solrHomeDir isn't defined and zkHost and collectionName are defined
// then download schema.xml and solrconfig.xml, etc from zk and use that as solrHomeDir
String mySolrHomeDir = solrHomeDir;
if (solrHomeDir == null || solrHomeDir.length() == 0) {
if (zkHost == null || zkHost.length() == 0) {
// TODO: implement download from solrUrl if specified
throw new MorphlineCompilationException(
"Downloading a Solr schema requires either parameter 'solrHomeDir' or parameters 'zkHost' and 'collection'",
config);
}
if (collectionName == null || collectionName.length() == 0) {
throw new MorphlineCompilationException(
"Parameter 'zkHost' requires that you also pass parameter 'collection'", config);
}
ZooKeeperDownloader zki = new ZooKeeperDownloader();
SolrZkClient zkClient = zki.getZkClient(zkHost);
try {
String configName = zki.readConfigName(zkClient, collectionName);
downloadedSolrHomeDir = Files.createTempDir();
downloadedSolrHomeDir = zki.downloadConfigDir(zkClient, configName, downloadedSolrHomeDir);
mySolrHomeDir = downloadedSolrHomeDir.getAbsolutePath();
} catch (KeeperException e) {
throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e);
} catch (InterruptedException e) {
throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e);
} catch (IOException e) {
throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e);
} finally {
zkClient.close();
}
}
IndexSchema schema = new IndexSchema(solrConfig, "schema.xml", is);
validateSchema(schema);
return schema;
} catch (ParserConfigurationException e) {
throw new MorphlineRuntimeException(e);
} catch (IOException e) {
throw new MorphlineRuntimeException(e);
} catch (SAXException e) {
throw new MorphlineRuntimeException(e);
LOG.debug("SolrLocator loading IndexSchema from dir {}", mySolrHomeDir);
try {
SolrResourceLoader loader = new SolrResourceLoader(mySolrHomeDir);
SolrConfig solrConfig = new SolrConfig(loader, "solrconfig.xml", null);
InputSource is = new InputSource(loader.openSchema("schema.xml"));
is.setSystemId(SystemIdResolver.createSystemIdFromResourceName("schema.xml"));
IndexSchema schema = new IndexSchema(solrConfig, "schema.xml", is);
validateSchema(schema);
return schema;
} catch (ParserConfigurationException e) {
throw new MorphlineRuntimeException(e);
} catch (IOException e) {
throw new MorphlineRuntimeException(e);
} catch (SAXException e) {
throw new MorphlineRuntimeException(e);
}
} finally {
if (downloadedSolrHomeDir != null) {
try {
FileUtils.deleteDirectory(downloadedSolrHomeDir);
} catch (IOException e) {
LOG.warn("Cannot delete tmp directory", e);
}
}
}
}

View File

@ -31,6 +31,7 @@ import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
import com.google.common.io.Files;
/**
@ -100,10 +101,10 @@ final class ZooKeeperDownloader {
/**
* Download and return the config directory from ZK
*/
public File downloadConfigDir(SolrZkClient zkClient, String configName)
public File downloadConfigDir(SolrZkClient zkClient, String configName, File dir)
throws IOException, InterruptedException, KeeperException {
File dir = Files.createTempDir();
dir.deleteOnExit();
Preconditions.checkArgument(dir.exists());
Preconditions.checkArgument(dir.isDirectory());
ZkController.downloadConfigDir(zkClient, configName, dir);
File confDir = new File(dir, "conf");
if (!confDir.isDirectory()) {
@ -116,7 +117,23 @@ final class ZooKeeperDownloader {
Files.move(dir, confDir);
dir = confDir.getParentFile();
}
verifyConfigDir(confDir);
return dir;
}
private void verifyConfigDir(File confDir) throws IOException {
File solrConfigFile = new File(confDir, "solrconfig.xml");
if (!solrConfigFile.exists()) {
throw new IOException("Detected invalid Solr config dir in ZooKeeper - Reason: File not found: "
+ solrConfigFile.getName());
}
if (!solrConfigFile.isFile()) {
throw new IOException("Detected invalid Solr config dir in ZooKeeper - Reason: Not a file: "
+ solrConfigFile.getName());
}
if (!solrConfigFile.canRead()) {
throw new IOException("Insufficient permissions to read file: " + solrConfigFile);
}
}
}

View File

@ -54,6 +54,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.codahale.metrics.MetricRegistry;
import com.google.common.base.Joiner;
import com.google.common.io.Files;
import com.typesafe.config.Config;
@ -85,10 +86,11 @@ public class AbstractSolrMorphlineTestBase extends SolrTestCaseJ4 {
}
protected static void myInitCore(String baseDirName) throws Exception {
Joiner joiner = Joiner.on(File.separator);
initCore(
RESOURCES_DIR + "/" + baseDirName + "/collection1/conf/solrconfig.xml",
RESOURCES_DIR + "/" + baseDirName + "/collection1/conf/schema.xml",
RESOURCES_DIR + "/" + baseDirName
joiner.join(RESOURCES_DIR, baseDirName, "collection1", "conf", "solrconfig.xml"),
joiner.join(RESOURCES_DIR, baseDirName, "collection1", "conf", "schema.xml"),
joiner.join(RESOURCES_DIR, baseDirName)
);
}

View File

@ -42,6 +42,7 @@ import org.kitesdk.morphline.base.Compiler;
import org.kitesdk.morphline.base.FaultTolerance;
import org.kitesdk.morphline.base.Notifications;
import org.kitesdk.morphline.stdlib.PipeBuilder;
import com.codahale.metrics.MetricRegistry;
import com.google.common.collect.ListMultimap;
import com.typesafe.config.Config;

View File

@ -16,6 +16,7 @@
*/
package org.apache.solr.morphlines.solr;
import java.io.File;
import java.util.Arrays;
import org.apache.lucene.util.Constants;
@ -57,7 +58,7 @@ public class SolrMorphlineTest extends AbstractSolrMorphlineTestBase {
@Test
public void testTokenizeText() throws Exception {
morphline = createMorphline("test-morphlines/tokenizeText");
morphline = createMorphline("test-morphlines" + File.separator + "tokenizeText");
for (int i = 0; i < 3; i++) {
Record record = new Record();
record.put(Fields.MESSAGE, "Hello World!");

View File

@ -16,6 +16,7 @@
*/
package org.apache.solr.morphlines.solr;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
@ -31,6 +32,9 @@ import org.apache.solr.common.params.CollectionParams.CollectionAction;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.junit.BeforeClass;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.base.Notifications;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
@ -39,9 +43,6 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.base.Notifications;
@ThreadLeakAction({Action.WARN})
@ThreadLeakLingering(linger = 0)
@ -64,7 +65,7 @@ public class SolrMorphlineZkAliasTest extends AbstractSolrMorphlineZkTestBase {
createAlias("aliascollection", "collection1");
morphline = parse("test-morphlines/loadSolrBasic", "aliascollection");
morphline = parse("test-morphlines" + File.separator + "loadSolrBasic", "aliascollection");
Record record = new Record();
record.put(Fields.ID, "id0-innsbruck");
record.put("text", "mytext");
@ -113,7 +114,7 @@ public class SolrMorphlineZkAliasTest extends AbstractSolrMorphlineZkTestBase {
createAlias("aliascollection", "collection1,collection2");
try {
parse("test-morphlines/loadSolrBasic", "aliascollection");
parse("test-morphlines" + File.separator + "loadSolrBasic", "aliascollection");
fail("Expected IAE because update alias maps to multiple collections");
} catch (IllegalArgumentException e) {

View File

@ -35,6 +35,9 @@ import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.junit.BeforeClass;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.base.Notifications;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
@ -43,9 +46,7 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.base.Notifications;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.io.Files;
@ -65,12 +66,13 @@ public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTestBase {
@Override
public void doTest() throws Exception {
File file = new File(RESOURCES_DIR + "/test-documents/sample-statuses-20120906-141433-medium.avro");
Joiner joiner = Joiner.on(File.separator);
File file = new File(joiner.join(RESOURCES_DIR, "test-documents", "sample-statuses-20120906-141433-medium.avro"));
waitForRecoveriesToFinish(false);
// load avro records via morphline and zk into solr
morphline = parse("test-morphlines/tutorialReadAvroContainer");
morphline = parse("test-morphlines" + File.separator + "tutorialReadAvroContainer");
Record record = new Record();
byte[] body = Files.toByteArray(file);
record.put(Fields.ATTACHMENT_BODY, body);

View File

@ -16,6 +16,7 @@
*/
package org.apache.solr.morphlines.solr;
import java.io.File;
import java.util.Iterator;
import org.apache.lucene.util.Constants;
@ -25,6 +26,9 @@ import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.junit.BeforeClass;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.base.Notifications;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
@ -33,9 +37,6 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.base.Notifications;
@ThreadLeakAction({Action.WARN})
@ThreadLeakLingering(linger = 0)
@ -56,7 +57,7 @@ public class SolrMorphlineZkTest extends AbstractSolrMorphlineZkTestBase {
waitForRecoveriesToFinish(false);
morphline = parse("test-morphlines/loadSolrBasic");
morphline = parse("test-morphlines" + File.separator + "loadSolrBasic");
Record record = new Record();
record.put(Fields.ID, "id0-innsbruck");
record.put("text", "mytext");

View File

@ -1 +0,0 @@
ab317793098d82d2a4259b5fe80bcde9ef745d5d

View File

@ -0,0 +1 @@
ac24f3b61248bd7527ba5d0ac55ab4a6c1e99dec

View File

@ -1 +0,0 @@
295045584531070ba9770db1fbd7bdb1518ece4e

View File

@ -1 +0,0 @@
a1752aeef0a9f66840b7c1a33613ac9d9b07052b

View File

@ -0,0 +1 @@
13a473a5f3fc4b9c0cb6348313c9595219a5593b

View File

@ -0,0 +1 @@
6dfd4325dcd202e24104ff1eb604fcf4322b917c

View File

@ -1 +0,0 @@
4dc6c186534f8f3e9555b050e20066a58159e880

View File

@ -0,0 +1 @@
16d6b1b848e7f3de2f4de79bbe12ef02ca29ad7c

View File

@ -1 +0,0 @@
d672b83e1e779b2c967aa457baab89e1b544c18e

View File

@ -0,0 +1 @@
c9ebdc525368e809f705495aff50ad1a21725a07

View File

@ -1 +0,0 @@
16c27edf447563b5674138a2fcede66298e9b8f9

View File

@ -0,0 +1 @@
296ffafea1f5160e1db56e6daa6dc86be7a9d8b4

View File

@ -1 +0,0 @@
aee7a5f61f66e1e39ff0ade73c07ab68c2bcd9b6

View File

@ -0,0 +1 @@
9861e7400a27214ece16cb94cb6637eef3284a21

View File

@ -1 +0,0 @@
36a014a433ca59935f0eb1ea7ace3111a3bd20bf

View File

@ -0,0 +1 @@
d21a33105ce6df0fd519c8fb1882549dbafff21e

View File

@ -1 +0,0 @@
d967d2e4e6517e87316a24e1931243436bbc1ba0

View File

@ -0,0 +1 @@
299e78d3fbb8e61c950d0a589fd31689753997c4