mirror of https://github.com/apache/lucene.git
SOLR-5729: Merge in bug fixes and minor improvements for mapreduce and morphlines contribs.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1569794 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
86ee00c352
commit
c4b4582dda
lucene
solr
CHANGES.txt
contrib
morphlines-cell/src
java/org/apache/solr/morphlines/cell
test/org/apache/solr/morphlines/cell
morphlines-core/src
java/org/apache/solr/morphlines/solr
test/org/apache/solr/morphlines/solr
licenses
kite-morphlines-avro-0.10.0.jar.sha1kite-morphlines-avro-0.11.0.jar.sha1kite-morphlines-core-0.10.0-tests.jar.sha1kite-morphlines-core-0.10.0.jar.sha1kite-morphlines-core-0.11.0-tests.jar.sha1kite-morphlines-core-0.11.0.jar.sha1kite-morphlines-hadoop-sequencefile-0.10.0.jar.sha1kite-morphlines-hadoop-sequencefile-0.11.0.jar.sha1kite-morphlines-json-0.10.0.jar.sha1kite-morphlines-json-0.11.0.jar.sha1kite-morphlines-saxon-0.10.0.jar.sha1kite-morphlines-saxon-0.11.0.jar.sha1kite-morphlines-tika-core-0.10.0.jar.sha1kite-morphlines-tika-core-0.11.0.jar.sha1kite-morphlines-tika-decompress-0.10.0.jar.sha1kite-morphlines-tika-decompress-0.11.0.jar.sha1kite-morphlines-twitter-0.10.0.jar.sha1kite-morphlines-twitter-0.11.0.jar.sha1
|
@ -191,7 +191,7 @@ org.gagravarr.vorbis.java.version = 0.1
|
||||||
/org.gagravarr/vorbis-java-core = ${org.gagravarr.vorbis.java.version}
|
/org.gagravarr/vorbis-java-core = ${org.gagravarr.vorbis.java.version}
|
||||||
/org.gagravarr/vorbis-java-tika = ${org.gagravarr.vorbis.java.version}
|
/org.gagravarr/vorbis-java-tika = ${org.gagravarr.vorbis.java.version}
|
||||||
|
|
||||||
org.kitesdk.kite-morphlines.version = 0.10.0
|
org.kitesdk.kite-morphlines.version = 0.11.0
|
||||||
/org.kitesdk/kite-morphlines-avro = ${org.kitesdk.kite-morphlines.version}
|
/org.kitesdk/kite-morphlines-avro = ${org.kitesdk.kite-morphlines.version}
|
||||||
/org.kitesdk/kite-morphlines-core = ${org.kitesdk.kite-morphlines.version}
|
/org.kitesdk/kite-morphlines-core = ${org.kitesdk.kite-morphlines.version}
|
||||||
/org.kitesdk/kite-morphlines-hadoop-sequencefile = ${org.kitesdk.kite-morphlines.version}
|
/org.kitesdk/kite-morphlines-hadoop-sequencefile = ${org.kitesdk.kite-morphlines.version}
|
||||||
|
|
|
@ -56,6 +56,30 @@ Other Changes
|
||||||
|
|
||||||
* SOLR-4792: Stop shipping a .war. (Robert Muir)
|
* SOLR-4792: Stop shipping a .war. (Robert Muir)
|
||||||
|
|
||||||
|
================== 4.8.0 ==================
|
||||||
|
|
||||||
|
Versions of Major Components
|
||||||
|
---------------------
|
||||||
|
Apache Tika 1.4
|
||||||
|
Carrot2 3.8.0
|
||||||
|
Velocity 1.7 and Velocity Tools 2.0
|
||||||
|
Apache UIMA 2.3.1
|
||||||
|
Apache ZooKeeper 3.4.5
|
||||||
|
|
||||||
|
|
||||||
|
Detailed Change List
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
New Features
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
Bug Fixes
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
* SOLR-5729: Merge in bug fixes and minor improvements for mapreduce and
|
||||||
|
morphlines contribs. (Patrick Hunt via Mark Miller)
|
||||||
|
|
||||||
|
|
||||||
================== 4.7.0 ==================
|
================== 4.7.0 ==================
|
||||||
|
|
||||||
Versions of Major Components
|
Versions of Major Components
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.solr.morphlines.cell;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.StringWriter;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
@ -45,15 +44,12 @@ import org.apache.tika.metadata.Metadata;
|
||||||
import org.apache.tika.mime.MediaType;
|
import org.apache.tika.mime.MediaType;
|
||||||
import org.apache.tika.parser.ParseContext;
|
import org.apache.tika.parser.ParseContext;
|
||||||
import org.apache.tika.parser.Parser;
|
import org.apache.tika.parser.Parser;
|
||||||
import org.apache.tika.sax.TeeContentHandler;
|
|
||||||
import org.apache.tika.sax.XHTMLContentHandler;
|
import org.apache.tika.sax.XHTMLContentHandler;
|
||||||
import org.apache.tika.sax.xpath.Matcher;
|
import org.apache.tika.sax.xpath.Matcher;
|
||||||
import org.apache.tika.sax.xpath.MatchingContentHandler;
|
import org.apache.tika.sax.xpath.MatchingContentHandler;
|
||||||
import org.apache.tika.sax.xpath.XPathParser;
|
import org.apache.tika.sax.xpath.XPathParser;
|
||||||
import org.apache.xml.serialize.OutputFormat;
|
import org.apache.xml.serialize.OutputFormat;
|
||||||
import org.apache.xml.serialize.XMLSerializer;
|
import org.apache.xml.serialize.XMLSerializer;
|
||||||
import org.xml.sax.ContentHandler;
|
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
import org.kitesdk.morphline.api.Command;
|
import org.kitesdk.morphline.api.Command;
|
||||||
import org.kitesdk.morphline.api.CommandBuilder;
|
import org.kitesdk.morphline.api.CommandBuilder;
|
||||||
|
@ -64,6 +60,9 @@ import org.kitesdk.morphline.api.Record;
|
||||||
import org.kitesdk.morphline.base.Configs;
|
import org.kitesdk.morphline.base.Configs;
|
||||||
import org.kitesdk.morphline.base.Fields;
|
import org.kitesdk.morphline.base.Fields;
|
||||||
import org.kitesdk.morphline.stdio.AbstractParser;
|
import org.kitesdk.morphline.stdio.AbstractParser;
|
||||||
|
import org.xml.sax.ContentHandler;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
import com.google.common.base.Joiner;
|
import com.google.common.base.Joiner;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.collect.ArrayListMultimap;
|
import com.google.common.collect.ArrayListMultimap;
|
||||||
|
@ -102,6 +101,7 @@ public final class SolrCellBuilder implements CommandBuilder {
|
||||||
private final String xpathExpr;
|
private final String xpathExpr;
|
||||||
private final List<Parser> parsers = new ArrayList();
|
private final List<Parser> parsers = new ArrayList();
|
||||||
private final SolrContentHandlerFactory solrContentHandlerFactory;
|
private final SolrContentHandlerFactory solrContentHandlerFactory;
|
||||||
|
private final Locale locale;
|
||||||
|
|
||||||
private final SolrParams solrParams;
|
private final SolrParams solrParams;
|
||||||
private final Map<MediaType, Parser> mediaTypeToParserMap;
|
private final Map<MediaType, Parser> mediaTypeToParserMap;
|
||||||
|
@ -163,6 +163,8 @@ public final class SolrCellBuilder implements CommandBuilder {
|
||||||
}
|
}
|
||||||
this.solrContentHandlerFactory = getSolrContentHandlerFactory(factoryClass, dateFormats, config);
|
this.solrContentHandlerFactory = getSolrContentHandlerFactory(factoryClass, dateFormats, config);
|
||||||
|
|
||||||
|
this.locale = getLocale(getConfigs().getString(config, "locale", ""));
|
||||||
|
|
||||||
this.mediaTypeToParserMap = new HashMap<MediaType, Parser>();
|
this.mediaTypeToParserMap = new HashMap<MediaType, Parser>();
|
||||||
//MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(); // FIXME getMediaTypeRegistry.normalize()
|
//MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(); // FIXME getMediaTypeRegistry.normalize()
|
||||||
|
|
||||||
|
@ -222,6 +224,7 @@ public final class SolrCellBuilder implements CommandBuilder {
|
||||||
}
|
}
|
||||||
|
|
||||||
ParseContext parseContext = new ParseContext();
|
ParseContext parseContext = new ParseContext();
|
||||||
|
parseContext.set(Locale.class, locale);
|
||||||
|
|
||||||
Metadata metadata = new Metadata();
|
Metadata metadata = new Metadata();
|
||||||
for (Entry<String, Object> entry : record.getFields().entries()) {
|
for (Entry<String, Object> entry : record.getFields().entries()) {
|
||||||
|
@ -233,12 +236,6 @@ public final class SolrCellBuilder implements CommandBuilder {
|
||||||
inputStream = TikaInputStream.get(inputStream);
|
inputStream = TikaInputStream.get(inputStream);
|
||||||
|
|
||||||
ContentHandler parsingHandler = handler;
|
ContentHandler parsingHandler = handler;
|
||||||
StringWriter debugWriter = null;
|
|
||||||
if (LOG.isTraceEnabled()) {
|
|
||||||
debugWriter = new StringWriter();
|
|
||||||
ContentHandler serializer = new XMLSerializer(debugWriter, new OutputFormat("XML", "UTF-8", true));
|
|
||||||
parsingHandler = new TeeContentHandler(parsingHandler, serializer);
|
|
||||||
}
|
|
||||||
|
|
||||||
// String xpathExpr = "/xhtml:html/xhtml:body/xhtml:div/descendant:node()";
|
// String xpathExpr = "/xhtml:html/xhtml:body/xhtml:div/descendant:node()";
|
||||||
if (xpathExpr != null) {
|
if (xpathExpr != null) {
|
||||||
|
@ -255,8 +252,6 @@ public final class SolrCellBuilder implements CommandBuilder {
|
||||||
} catch (TikaException e) {
|
} catch (TikaException e) {
|
||||||
throw new MorphlineRuntimeException("Cannot parse", e);
|
throw new MorphlineRuntimeException("Cannot parse", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.trace("debug XML doc: {}", debugWriter);
|
|
||||||
} finally {
|
} finally {
|
||||||
if (inputStream != null) {
|
if (inputStream != null) {
|
||||||
Closeables.closeQuietly(inputStream);
|
Closeables.closeQuietly(inputStream);
|
||||||
|
@ -336,6 +331,18 @@ public final class SolrCellBuilder implements CommandBuilder {
|
||||||
return record;
|
return record;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Locale getLocale(String name) {
|
||||||
|
for (Locale locale : Locale.getAvailableLocales()) {
|
||||||
|
if (locale.toString().equals(name)) {
|
||||||
|
return locale;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert Locale.ROOT.toString().equals("");
|
||||||
|
if (name.equals(Locale.ROOT.toString())) {
|
||||||
|
return Locale.ROOT;
|
||||||
|
}
|
||||||
|
throw new MorphlineCompilationException("Unknown locale: " + name, getConfig());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
* this work for additional information regarding copyright ownership.
|
* this work for additional information regarding copyright ownership.
|
||||||
|
@ -50,23 +50,23 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
|
|
||||||
String path = RESOURCES_DIR + "/test-documents";
|
String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
|
||||||
expectedRecords.put(path + "/sample-statuses-20120906-141433.avro", 2);
|
expectedRecords.put(path + "sample-statuses-20120906-141433.avro", 2);
|
||||||
expectedRecords.put(path + "/sample-statuses-20120906-141433", 2);
|
expectedRecords.put(path + "sample-statuses-20120906-141433", 2);
|
||||||
expectedRecords.put(path + "/sample-statuses-20120906-141433.gz", 2);
|
expectedRecords.put(path + "sample-statuses-20120906-141433.gz", 2);
|
||||||
expectedRecords.put(path + "/sample-statuses-20120906-141433.bz2", 2);
|
expectedRecords.put(path + "sample-statuses-20120906-141433.bz2", 2);
|
||||||
expectedRecords.put(path + "/cars.csv", 6);
|
expectedRecords.put(path + "cars.csv", 6);
|
||||||
expectedRecords.put(path + "/cars.csv.gz", 6);
|
expectedRecords.put(path + "cars.csv.gz", 6);
|
||||||
expectedRecords.put(path + "/cars.tar.gz", 4);
|
expectedRecords.put(path + "cars.tar.gz", 4);
|
||||||
expectedRecords.put(path + "/cars.tsv", 6);
|
expectedRecords.put(path + "cars.tsv", 6);
|
||||||
expectedRecords.put(path + "/cars.ssv", 6);
|
expectedRecords.put(path + "cars.ssv", 6);
|
||||||
expectedRecords.put(path + "/test-documents.7z", 9);
|
expectedRecords.put(path + "test-documents.7z", 9);
|
||||||
expectedRecords.put(path + "/test-documents.cpio", 9);
|
expectedRecords.put(path + "test-documents.cpio", 9);
|
||||||
expectedRecords.put(path + "/test-documents.tar", 9);
|
expectedRecords.put(path + "test-documents.tar", 9);
|
||||||
expectedRecords.put(path + "/test-documents.tbz2", 9);
|
expectedRecords.put(path + "test-documents.tbz2", 9);
|
||||||
expectedRecords.put(path + "/test-documents.tgz", 9);
|
expectedRecords.put(path + "test-documents.tgz", 9);
|
||||||
expectedRecords.put(path + "/test-documents.zip", 9);
|
expectedRecords.put(path + "test-documents.zip", 9);
|
||||||
expectedRecords.put(path + "/multiline-stacktrace.log", 4);
|
expectedRecords.put(path + "multiline-stacktrace.log", 4);
|
||||||
|
|
||||||
{
|
{
|
||||||
Map<String, Object> record = new LinkedHashMap();
|
Map<String, Object> record = new LinkedHashMap();
|
||||||
|
@ -81,7 +81,7 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
String file = path + "/testWORD_various.doc";
|
String file = path + "testWORD_various.doc";
|
||||||
Map<String, Object> record = new LinkedHashMap();
|
Map<String, Object> record = new LinkedHashMap();
|
||||||
record.put("ignored__attachment_mimetype", "application/msword");
|
record.put("ignored__attachment_mimetype", "application/msword");
|
||||||
record.put("ignored_author", "Michael McCandless");
|
record.put("ignored_author", "Michael McCandless");
|
||||||
|
@ -94,7 +94,7 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
String file = path + "/testPDF.pdf";
|
String file = path + "testPDF.pdf";
|
||||||
Map<String, Object> record = new LinkedHashMap();
|
Map<String, Object> record = new LinkedHashMap();
|
||||||
record.put("ignored__attachment_mimetype", "application/pdf");
|
record.put("ignored__attachment_mimetype", "application/pdf");
|
||||||
record.put("ignored_author", "Bertrand Delacrétaz");
|
record.put("ignored_author", "Bertrand Delacrétaz");
|
||||||
|
@ -106,7 +106,7 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
String file = path + "/email.eml";
|
String file = path + "email.eml";
|
||||||
Map<String, Object> record = new LinkedHashMap();
|
Map<String, Object> record = new LinkedHashMap();
|
||||||
String name = "Patrick Foo <foo@cloudera.com>";
|
String name = "Patrick Foo <foo@cloudera.com>";
|
||||||
record.put("ignored__attachment_mimetype", "message/rfc822");
|
record.put("ignored__attachment_mimetype", "message/rfc822");
|
||||||
|
@ -127,7 +127,7 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
String file = path + "/testEXCEL.xlsx";
|
String file = path + "testEXCEL.xlsx";
|
||||||
Map<String, Object> record = new LinkedHashMap();
|
Map<String, Object> record = new LinkedHashMap();
|
||||||
record.put("ignored__attachment_mimetype", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
|
record.put("ignored__attachment_mimetype", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
|
||||||
record.put("ignored_author", "Keith Bennett");
|
record.put("ignored_author", "Keith Bennett");
|
||||||
|
@ -142,23 +142,23 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSolrCellJPGCompressed() throws Exception {
|
public void testSolrCellJPGCompressed() throws Exception {
|
||||||
morphline = createMorphline("test-morphlines/solrCellJPGCompressed");
|
morphline = createMorphline("test-morphlines" + File.separator + "solrCellJPGCompressed");
|
||||||
String path = RESOURCES_DIR + "/test-documents";
|
String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
|
||||||
String[] files = new String[] {
|
String[] files = new String[] {
|
||||||
path + "/testJPEG_EXIF.jpg",
|
path + "testJPEG_EXIF.jpg",
|
||||||
path + "/testJPEG_EXIF.jpg.gz",
|
path + "testJPEG_EXIF.jpg.gz",
|
||||||
path + "/testJPEG_EXIF.jpg.tar.gz",
|
path + "testJPEG_EXIF.jpg.tar.gz",
|
||||||
//path + "/jpeg2000.jp2",
|
//path + "jpeg2000.jp2",
|
||||||
};
|
};
|
||||||
testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
|
testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSolrCellXML() throws Exception {
|
public void testSolrCellXML() throws Exception {
|
||||||
morphline = createMorphline("test-morphlines/solrCellXML");
|
morphline = createMorphline("test-morphlines" + File.separator + "solrCellXML");
|
||||||
String path = RESOURCES_DIR + "/test-documents";
|
String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
|
||||||
String[] files = new String[] {
|
String[] files = new String[] {
|
||||||
path + "/testXML2.xml",
|
path + "testXML2.xml",
|
||||||
};
|
};
|
||||||
testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
|
testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
|
||||||
}
|
}
|
||||||
|
@ -168,27 +168,27 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
|
||||||
AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", false);
|
AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", false);
|
||||||
|
|
||||||
morphline = createMorphline(new File(tempDir).getAbsolutePath() + "/test-morphlines/solrCellDocumentTypes");
|
morphline = createMorphline(new File(tempDir).getAbsolutePath() + "/test-morphlines/solrCellDocumentTypes");
|
||||||
String path = RESOURCES_DIR + "/test-documents";
|
String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
|
||||||
String[] files = new String[] {
|
String[] files = new String[] {
|
||||||
path + "/testBMPfp.txt",
|
path + "testBMPfp.txt",
|
||||||
path + "/boilerplate.html",
|
path + "boilerplate.html",
|
||||||
path + "/NullHeader.docx",
|
path + "NullHeader.docx",
|
||||||
path + "/testWORD_various.doc",
|
path + "testWORD_various.doc",
|
||||||
path + "/testPDF.pdf",
|
path + "testPDF.pdf",
|
||||||
path + "/testJPEG_EXIF.jpg",
|
path + "testJPEG_EXIF.jpg",
|
||||||
path + "/testJPEG_EXIF.jpg.gz",
|
path + "testJPEG_EXIF.jpg.gz",
|
||||||
path + "/testJPEG_EXIF.jpg.tar.gz",
|
path + "testJPEG_EXIF.jpg.tar.gz",
|
||||||
path + "/testXML.xml",
|
path + "testXML.xml",
|
||||||
path + "/cars.csv",
|
path + "cars.csv",
|
||||||
// path + "/cars.tsv",
|
// path + "cars.tsv",
|
||||||
// path + "/cars.ssv",
|
// path + "cars.ssv",
|
||||||
path + "/cars.csv.gz",
|
path + "cars.csv.gz",
|
||||||
path + "/cars.tar.gz",
|
path + "cars.tar.gz",
|
||||||
path + "/sample-statuses-20120906-141433.avro",
|
path + "sample-statuses-20120906-141433.avro",
|
||||||
path + "/sample-statuses-20120906-141433",
|
path + "sample-statuses-20120906-141433",
|
||||||
path + "/sample-statuses-20120906-141433.gz",
|
path + "sample-statuses-20120906-141433.gz",
|
||||||
path + "/sample-statuses-20120906-141433.bz2",
|
path + "sample-statuses-20120906-141433.bz2",
|
||||||
path + "/email.eml",
|
path + "email.eml",
|
||||||
};
|
};
|
||||||
testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
|
testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
|
||||||
}
|
}
|
||||||
|
@ -199,58 +199,58 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
|
||||||
AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", false);
|
AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", false);
|
||||||
|
|
||||||
morphline = createMorphline(new File(tempDir).getAbsolutePath() + "/test-morphlines/solrCellDocumentTypes");
|
morphline = createMorphline(new File(tempDir).getAbsolutePath() + "/test-morphlines/solrCellDocumentTypes");
|
||||||
String path = RESOURCES_DIR + "/test-documents";
|
String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
|
||||||
String[] files = new String[] {
|
String[] files = new String[] {
|
||||||
path + "/testPPT_various.ppt",
|
path + "testPPT_various.ppt",
|
||||||
path + "/testPPT_various.pptx",
|
path + "testPPT_various.pptx",
|
||||||
path + "/testEXCEL.xlsx",
|
path + "testEXCEL.xlsx",
|
||||||
path + "/testEXCEL.xls",
|
path + "testEXCEL.xls",
|
||||||
path + "/testPages.pages",
|
path + "testPages.pages",
|
||||||
//path + "/testNumbers.numbers",
|
//path + "testNumbers.numbers",
|
||||||
//path + "/testKeynote.key",
|
//path + "testKeynote.key",
|
||||||
|
|
||||||
path + "/testRTFVarious.rtf",
|
path + "testRTFVarious.rtf",
|
||||||
path + "/complex.mbox",
|
path + "complex.mbox",
|
||||||
path + "/test-outlook.msg",
|
path + "test-outlook.msg",
|
||||||
path + "/testEMLX.emlx",
|
path + "testEMLX.emlx",
|
||||||
path + "/testRFC822",
|
path + "testRFC822",
|
||||||
path + "/rsstest.rss",
|
path + "rsstest.rss",
|
||||||
// path + "/testDITA.dita",
|
// path + "testDITA.dita",
|
||||||
|
|
||||||
path + "/testMP3i18n.mp3",
|
path + "testMP3i18n.mp3",
|
||||||
path + "/testAIFF.aif",
|
path + "testAIFF.aif",
|
||||||
path + "/testFLAC.flac",
|
path + "testFLAC.flac",
|
||||||
// path + "/testFLAC.oga",
|
// path + "testFLAC.oga",
|
||||||
// path + "/testVORBIS.ogg",
|
// path + "testVORBIS.ogg",
|
||||||
path + "/testMP4.m4a",
|
path + "testMP4.m4a",
|
||||||
path + "/testWAV.wav",
|
path + "testWAV.wav",
|
||||||
// path + "/testWMA.wma",
|
// path + "testWMA.wma",
|
||||||
|
|
||||||
path + "/testFLV.flv",
|
path + "testFLV.flv",
|
||||||
// path + "/testWMV.wmv",
|
// path + "testWMV.wmv",
|
||||||
|
|
||||||
path + "/testBMP.bmp",
|
path + "testBMP.bmp",
|
||||||
path + "/testPNG.png",
|
path + "testPNG.png",
|
||||||
path + "/testPSD.psd",
|
path + "testPSD.psd",
|
||||||
path + "/testSVG.svg",
|
path + "testSVG.svg",
|
||||||
path + "/testTIFF.tif",
|
path + "testTIFF.tif",
|
||||||
|
|
||||||
// path + "/test-documents.7z",
|
// path + "test-documents.7z",
|
||||||
// path + "/test-documents.cpio",
|
// path + "test-documents.cpio",
|
||||||
// path + "/test-documents.tar",
|
// path + "test-documents.tar",
|
||||||
// path + "/test-documents.tbz2",
|
// path + "test-documents.tbz2",
|
||||||
// path + "/test-documents.tgz",
|
// path + "test-documents.tgz",
|
||||||
// path + "/test-documents.zip",
|
// path + "test-documents.zip",
|
||||||
// path + "/test-zip-of-zip.zip",
|
// path + "test-zip-of-zip.zip",
|
||||||
// path + "/testJAR.jar",
|
// path + "testJAR.jar",
|
||||||
|
|
||||||
// path + "/testKML.kml",
|
// path + "testKML.kml",
|
||||||
// path + "/testRDF.rdf",
|
// path + "testRDF.rdf",
|
||||||
path + "/testVISIO.vsd",
|
path + "testVISIO.vsd",
|
||||||
// path + "/testWAR.war",
|
// path + "testWAR.war",
|
||||||
// path + "/testWindows-x86-32.exe",
|
// path + "testWindows-x86-32.exe",
|
||||||
// path + "/testWINMAIL.dat",
|
// path + "testWINMAIL.dat",
|
||||||
// path + "/testWMF.wmf",
|
// path + "testWMF.wmf",
|
||||||
};
|
};
|
||||||
testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
|
testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,151 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.morphlines.solr;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
|
||||||
|
class FileUtils {
|
||||||
|
|
||||||
|
//-----------------------------------------------------------------------
|
||||||
|
/**
|
||||||
|
* Deletes a directory recursively.
|
||||||
|
*
|
||||||
|
* @param directory directory to delete
|
||||||
|
* @throws IOException in case deletion is unsuccessful
|
||||||
|
*/
|
||||||
|
public static void deleteDirectory(File directory) throws IOException {
|
||||||
|
if (!directory.exists()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isSymlink(directory)) {
|
||||||
|
cleanDirectory(directory);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!directory.delete()) {
|
||||||
|
String message =
|
||||||
|
"Unable to delete directory " + directory + ".";
|
||||||
|
throw new IOException(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines whether the specified file is a Symbolic Link rather than an actual file.
|
||||||
|
* <p>
|
||||||
|
* Will not return true if there is a Symbolic Link anywhere in the path,
|
||||||
|
* only if the specific file is.
|
||||||
|
*
|
||||||
|
* @param file the file to check
|
||||||
|
* @return true if the file is a Symbolic Link
|
||||||
|
* @throws IOException if an IO error occurs while checking the file
|
||||||
|
* @since Commons IO 2.0
|
||||||
|
*/
|
||||||
|
public static boolean isSymlink(File file) throws IOException {
|
||||||
|
if (file == null) {
|
||||||
|
throw new NullPointerException("File must not be null");
|
||||||
|
}
|
||||||
|
// if (FilenameUtils.isSystemWindows()) {
|
||||||
|
if (File.separatorChar == '\\') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
File fileInCanonicalDir = null;
|
||||||
|
if (file.getParent() == null) {
|
||||||
|
fileInCanonicalDir = file;
|
||||||
|
} else {
|
||||||
|
File canonicalDir = file.getParentFile().getCanonicalFile();
|
||||||
|
fileInCanonicalDir = new File(canonicalDir, file.getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fileInCanonicalDir.getCanonicalFile().equals(fileInCanonicalDir.getAbsoluteFile())) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cleans a directory without deleting it.
|
||||||
|
*
|
||||||
|
* @param directory directory to clean
|
||||||
|
* @throws IOException in case cleaning is unsuccessful
|
||||||
|
*/
|
||||||
|
public static void cleanDirectory(File directory) throws IOException {
|
||||||
|
if (!directory.exists()) {
|
||||||
|
String message = directory + " does not exist";
|
||||||
|
throw new IllegalArgumentException(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!directory.isDirectory()) {
|
||||||
|
String message = directory + " is not a directory";
|
||||||
|
throw new IllegalArgumentException(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
File[] files = directory.listFiles();
|
||||||
|
if (files == null) { // null if security restricted
|
||||||
|
throw new IOException("Failed to list contents of " + directory);
|
||||||
|
}
|
||||||
|
|
||||||
|
IOException exception = null;
|
||||||
|
for (File file : files) {
|
||||||
|
try {
|
||||||
|
forceDelete(file);
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
exception = ioe;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (null != exception) {
|
||||||
|
throw exception;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//-----------------------------------------------------------------------
|
||||||
|
/**
|
||||||
|
* Deletes a file. If file is a directory, delete it and all sub-directories.
|
||||||
|
* <p>
|
||||||
|
* The difference between File.delete() and this method are:
|
||||||
|
* <ul>
|
||||||
|
* <li>A directory to be deleted does not have to be empty.</li>
|
||||||
|
* <li>You get exceptions when a file or directory cannot be deleted.
|
||||||
|
* (java.io.File methods returns a boolean)</li>
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* @param file file or directory to delete, must not be <code>null</code>
|
||||||
|
* @throws NullPointerException if the directory is <code>null</code>
|
||||||
|
* @throws FileNotFoundException if the file was not found
|
||||||
|
* @throws IOException in case deletion is unsuccessful
|
||||||
|
*/
|
||||||
|
public static void forceDelete(File file) throws IOException {
|
||||||
|
if (file.isDirectory()) {
|
||||||
|
deleteDirectory(file);
|
||||||
|
} else {
|
||||||
|
boolean filePresent = file.exists();
|
||||||
|
if (!file.delete()) {
|
||||||
|
if (!filePresent){
|
||||||
|
throw new FileNotFoundException("File does not exist: " + file);
|
||||||
|
}
|
||||||
|
String message =
|
||||||
|
"Unable to delete file: " + file;
|
||||||
|
throw new IOException(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -16,16 +16,10 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.morphlines.solr;
|
package org.apache.solr.morphlines.solr;
|
||||||
|
|
||||||
import org.kitesdk.morphline.api.MorphlineCompilationException;
|
import java.io.File;
|
||||||
import org.kitesdk.morphline.api.MorphlineContext;
|
import java.io.IOException;
|
||||||
import org.kitesdk.morphline.api.MorphlineRuntimeException;
|
|
||||||
import org.kitesdk.morphline.base.Configs;
|
|
||||||
|
|
||||||
import com.google.common.base.Preconditions;
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
import com.typesafe.config.Config;
|
|
||||||
import com.typesafe.config.ConfigFactory;
|
|
||||||
import com.typesafe.config.ConfigRenderOptions;
|
|
||||||
import com.typesafe.config.ConfigUtil;
|
|
||||||
|
|
||||||
import org.apache.solr.client.solrj.SolrServer;
|
import org.apache.solr.client.solrj.SolrServer;
|
||||||
import org.apache.solr.client.solrj.impl.CloudSolrServer;
|
import org.apache.solr.client.solrj.impl.CloudSolrServer;
|
||||||
|
@ -35,16 +29,21 @@ import org.apache.solr.core.SolrResourceLoader;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
import org.apache.solr.util.SystemIdResolver;
|
import org.apache.solr.util.SystemIdResolver;
|
||||||
import org.apache.zookeeper.KeeperException;
|
import org.apache.zookeeper.KeeperException;
|
||||||
|
import org.kitesdk.morphline.api.MorphlineCompilationException;
|
||||||
|
import org.kitesdk.morphline.api.MorphlineContext;
|
||||||
|
import org.kitesdk.morphline.api.MorphlineRuntimeException;
|
||||||
|
import org.kitesdk.morphline.base.Configs;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.xml.sax.InputSource;
|
import org.xml.sax.InputSource;
|
||||||
import org.xml.sax.SAXException;
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
import javax.xml.parsers.ParserConfigurationException;
|
import com.google.common.base.Preconditions;
|
||||||
|
import com.google.common.io.Files;
|
||||||
import java.io.File;
|
import com.typesafe.config.Config;
|
||||||
import java.io.IOException;
|
import com.typesafe.config.ConfigFactory;
|
||||||
import java.net.MalformedURLException;
|
import com.typesafe.config.ConfigRenderOptions;
|
||||||
|
import com.typesafe.config.ConfigUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set of configuration parameters that identify the location and schema of a Solr server or
|
* Set of configuration parameters that identify the location and schema of a Solr server or
|
||||||
|
@ -119,54 +118,66 @@ public class SolrLocator {
|
||||||
return schema;
|
return schema;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If solrHomeDir isn't defined and zkHost and collectionName are defined
|
File downloadedSolrHomeDir = null;
|
||||||
// then download schema.xml and solrconfig.xml, etc from zk and use that as solrHomeDir
|
|
||||||
String mySolrHomeDir = solrHomeDir;
|
|
||||||
if (solrHomeDir == null || solrHomeDir.length() == 0) {
|
|
||||||
if (zkHost == null || zkHost.length() == 0) {
|
|
||||||
// TODO: implement download from solrUrl if specified
|
|
||||||
throw new MorphlineCompilationException(
|
|
||||||
"Downloading a Solr schema requires either parameter 'solrHomeDir' or parameters 'zkHost' and 'collection'",
|
|
||||||
config);
|
|
||||||
}
|
|
||||||
if (collectionName == null || collectionName.length() == 0) {
|
|
||||||
throw new MorphlineCompilationException(
|
|
||||||
"Parameter 'zkHost' requires that you also pass parameter 'collection'", config);
|
|
||||||
}
|
|
||||||
ZooKeeperDownloader zki = new ZooKeeperDownloader();
|
|
||||||
SolrZkClient zkClient = zki.getZkClient(zkHost);
|
|
||||||
try {
|
|
||||||
String configName = zki.readConfigName(zkClient, collectionName);
|
|
||||||
File downloadedSolrHomeDir = zki.downloadConfigDir(zkClient, configName);
|
|
||||||
mySolrHomeDir = downloadedSolrHomeDir.getAbsolutePath();
|
|
||||||
} catch (KeeperException e) {
|
|
||||||
throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e);
|
|
||||||
} finally {
|
|
||||||
zkClient.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG.debug("SolrLocator loading IndexSchema from dir {}", mySolrHomeDir);
|
|
||||||
try {
|
try {
|
||||||
SolrResourceLoader loader = new SolrResourceLoader(mySolrHomeDir);
|
// If solrHomeDir isn't defined and zkHost and collectionName are defined
|
||||||
SolrConfig solrConfig = new SolrConfig(loader, "solrconfig.xml", null);
|
// then download schema.xml and solrconfig.xml, etc from zk and use that as solrHomeDir
|
||||||
InputSource is = new InputSource(loader.openSchema("schema.xml"));
|
String mySolrHomeDir = solrHomeDir;
|
||||||
is.setSystemId(SystemIdResolver.createSystemIdFromResourceName("schema.xml"));
|
if (solrHomeDir == null || solrHomeDir.length() == 0) {
|
||||||
|
if (zkHost == null || zkHost.length() == 0) {
|
||||||
|
// TODO: implement download from solrUrl if specified
|
||||||
|
throw new MorphlineCompilationException(
|
||||||
|
"Downloading a Solr schema requires either parameter 'solrHomeDir' or parameters 'zkHost' and 'collection'",
|
||||||
|
config);
|
||||||
|
}
|
||||||
|
if (collectionName == null || collectionName.length() == 0) {
|
||||||
|
throw new MorphlineCompilationException(
|
||||||
|
"Parameter 'zkHost' requires that you also pass parameter 'collection'", config);
|
||||||
|
}
|
||||||
|
ZooKeeperDownloader zki = new ZooKeeperDownloader();
|
||||||
|
SolrZkClient zkClient = zki.getZkClient(zkHost);
|
||||||
|
try {
|
||||||
|
String configName = zki.readConfigName(zkClient, collectionName);
|
||||||
|
downloadedSolrHomeDir = Files.createTempDir();
|
||||||
|
downloadedSolrHomeDir = zki.downloadConfigDir(zkClient, configName, downloadedSolrHomeDir);
|
||||||
|
mySolrHomeDir = downloadedSolrHomeDir.getAbsolutePath();
|
||||||
|
} catch (KeeperException e) {
|
||||||
|
throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e);
|
||||||
|
} finally {
|
||||||
|
zkClient.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
IndexSchema schema = new IndexSchema(solrConfig, "schema.xml", is);
|
LOG.debug("SolrLocator loading IndexSchema from dir {}", mySolrHomeDir);
|
||||||
validateSchema(schema);
|
try {
|
||||||
return schema;
|
SolrResourceLoader loader = new SolrResourceLoader(mySolrHomeDir);
|
||||||
} catch (ParserConfigurationException e) {
|
SolrConfig solrConfig = new SolrConfig(loader, "solrconfig.xml", null);
|
||||||
throw new MorphlineRuntimeException(e);
|
InputSource is = new InputSource(loader.openSchema("schema.xml"));
|
||||||
} catch (IOException e) {
|
is.setSystemId(SystemIdResolver.createSystemIdFromResourceName("schema.xml"));
|
||||||
throw new MorphlineRuntimeException(e);
|
|
||||||
} catch (SAXException e) {
|
IndexSchema schema = new IndexSchema(solrConfig, "schema.xml", is);
|
||||||
throw new MorphlineRuntimeException(e);
|
validateSchema(schema);
|
||||||
|
return schema;
|
||||||
|
} catch (ParserConfigurationException e) {
|
||||||
|
throw new MorphlineRuntimeException(e);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new MorphlineRuntimeException(e);
|
||||||
|
} catch (SAXException e) {
|
||||||
|
throw new MorphlineRuntimeException(e);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (downloadedSolrHomeDir != null) {
|
||||||
|
try {
|
||||||
|
FileUtils.deleteDirectory(downloadedSolrHomeDir);
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.warn("Cannot delete tmp directory", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.zookeeper.KeeperException;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.io.Files;
|
import com.google.common.io.Files;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -100,10 +101,10 @@ final class ZooKeeperDownloader {
|
||||||
/**
|
/**
|
||||||
* Download and return the config directory from ZK
|
* Download and return the config directory from ZK
|
||||||
*/
|
*/
|
||||||
public File downloadConfigDir(SolrZkClient zkClient, String configName)
|
public File downloadConfigDir(SolrZkClient zkClient, String configName, File dir)
|
||||||
throws IOException, InterruptedException, KeeperException {
|
throws IOException, InterruptedException, KeeperException {
|
||||||
File dir = Files.createTempDir();
|
Preconditions.checkArgument(dir.exists());
|
||||||
dir.deleteOnExit();
|
Preconditions.checkArgument(dir.isDirectory());
|
||||||
ZkController.downloadConfigDir(zkClient, configName, dir);
|
ZkController.downloadConfigDir(zkClient, configName, dir);
|
||||||
File confDir = new File(dir, "conf");
|
File confDir = new File(dir, "conf");
|
||||||
if (!confDir.isDirectory()) {
|
if (!confDir.isDirectory()) {
|
||||||
|
@ -116,7 +117,23 @@ final class ZooKeeperDownloader {
|
||||||
Files.move(dir, confDir);
|
Files.move(dir, confDir);
|
||||||
dir = confDir.getParentFile();
|
dir = confDir.getParentFile();
|
||||||
}
|
}
|
||||||
|
verifyConfigDir(confDir);
|
||||||
return dir;
|
return dir;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void verifyConfigDir(File confDir) throws IOException {
|
||||||
|
File solrConfigFile = new File(confDir, "solrconfig.xml");
|
||||||
|
if (!solrConfigFile.exists()) {
|
||||||
|
throw new IOException("Detected invalid Solr config dir in ZooKeeper - Reason: File not found: "
|
||||||
|
+ solrConfigFile.getName());
|
||||||
|
}
|
||||||
|
if (!solrConfigFile.isFile()) {
|
||||||
|
throw new IOException("Detected invalid Solr config dir in ZooKeeper - Reason: Not a file: "
|
||||||
|
+ solrConfigFile.getName());
|
||||||
|
}
|
||||||
|
if (!solrConfigFile.canRead()) {
|
||||||
|
throw new IOException("Insufficient permissions to read file: " + solrConfigFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,6 +54,7 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.codahale.metrics.MetricRegistry;
|
import com.codahale.metrics.MetricRegistry;
|
||||||
|
import com.google.common.base.Joiner;
|
||||||
import com.google.common.io.Files;
|
import com.google.common.io.Files;
|
||||||
import com.typesafe.config.Config;
|
import com.typesafe.config.Config;
|
||||||
|
|
||||||
|
@ -85,10 +86,11 @@ public class AbstractSolrMorphlineTestBase extends SolrTestCaseJ4 {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static void myInitCore(String baseDirName) throws Exception {
|
protected static void myInitCore(String baseDirName) throws Exception {
|
||||||
|
Joiner joiner = Joiner.on(File.separator);
|
||||||
initCore(
|
initCore(
|
||||||
RESOURCES_DIR + "/" + baseDirName + "/collection1/conf/solrconfig.xml",
|
joiner.join(RESOURCES_DIR, baseDirName, "collection1", "conf", "solrconfig.xml"),
|
||||||
RESOURCES_DIR + "/" + baseDirName + "/collection1/conf/schema.xml",
|
joiner.join(RESOURCES_DIR, baseDirName, "collection1", "conf", "schema.xml"),
|
||||||
RESOURCES_DIR + "/" + baseDirName
|
joiner.join(RESOURCES_DIR, baseDirName)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,7 @@ import org.kitesdk.morphline.base.Compiler;
|
||||||
import org.kitesdk.morphline.base.FaultTolerance;
|
import org.kitesdk.morphline.base.FaultTolerance;
|
||||||
import org.kitesdk.morphline.base.Notifications;
|
import org.kitesdk.morphline.base.Notifications;
|
||||||
import org.kitesdk.morphline.stdlib.PipeBuilder;
|
import org.kitesdk.morphline.stdlib.PipeBuilder;
|
||||||
|
|
||||||
import com.codahale.metrics.MetricRegistry;
|
import com.codahale.metrics.MetricRegistry;
|
||||||
import com.google.common.collect.ListMultimap;
|
import com.google.common.collect.ListMultimap;
|
||||||
import com.typesafe.config.Config;
|
import com.typesafe.config.Config;
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.morphlines.solr;
|
package org.apache.solr.morphlines.solr;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
import org.apache.lucene.util.Constants;
|
import org.apache.lucene.util.Constants;
|
||||||
|
@ -57,7 +58,7 @@ public class SolrMorphlineTest extends AbstractSolrMorphlineTestBase {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTokenizeText() throws Exception {
|
public void testTokenizeText() throws Exception {
|
||||||
morphline = createMorphline("test-morphlines/tokenizeText");
|
morphline = createMorphline("test-morphlines" + File.separator + "tokenizeText");
|
||||||
for (int i = 0; i < 3; i++) {
|
for (int i = 0; i < 3; i++) {
|
||||||
Record record = new Record();
|
Record record = new Record();
|
||||||
record.put(Fields.MESSAGE, "Hello World!");
|
record.put(Fields.MESSAGE, "Hello World!");
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.morphlines.solr;
|
package org.apache.solr.morphlines.solr;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
@ -31,6 +32,9 @@ import org.apache.solr.common.params.CollectionParams.CollectionAction;
|
||||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
import org.kitesdk.morphline.api.Record;
|
||||||
|
import org.kitesdk.morphline.base.Fields;
|
||||||
|
import org.kitesdk.morphline.base.Notifications;
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
|
||||||
|
@ -39,9 +43,6 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
|
||||||
import org.kitesdk.morphline.api.Record;
|
|
||||||
import org.kitesdk.morphline.base.Fields;
|
|
||||||
import org.kitesdk.morphline.base.Notifications;
|
|
||||||
|
|
||||||
@ThreadLeakAction({Action.WARN})
|
@ThreadLeakAction({Action.WARN})
|
||||||
@ThreadLeakLingering(linger = 0)
|
@ThreadLeakLingering(linger = 0)
|
||||||
|
@ -64,7 +65,7 @@ public class SolrMorphlineZkAliasTest extends AbstractSolrMorphlineZkTestBase {
|
||||||
|
|
||||||
createAlias("aliascollection", "collection1");
|
createAlias("aliascollection", "collection1");
|
||||||
|
|
||||||
morphline = parse("test-morphlines/loadSolrBasic", "aliascollection");
|
morphline = parse("test-morphlines" + File.separator + "loadSolrBasic", "aliascollection");
|
||||||
Record record = new Record();
|
Record record = new Record();
|
||||||
record.put(Fields.ID, "id0-innsbruck");
|
record.put(Fields.ID, "id0-innsbruck");
|
||||||
record.put("text", "mytext");
|
record.put("text", "mytext");
|
||||||
|
@ -113,7 +114,7 @@ public class SolrMorphlineZkAliasTest extends AbstractSolrMorphlineZkTestBase {
|
||||||
createAlias("aliascollection", "collection1,collection2");
|
createAlias("aliascollection", "collection1,collection2");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
parse("test-morphlines/loadSolrBasic", "aliascollection");
|
parse("test-morphlines" + File.separator + "loadSolrBasic", "aliascollection");
|
||||||
fail("Expected IAE because update alias maps to multiple collections");
|
fail("Expected IAE because update alias maps to multiple collections");
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,9 @@ import org.apache.solr.client.solrj.SolrQuery;
|
||||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
import org.apache.solr.common.SolrDocument;
|
import org.apache.solr.common.SolrDocument;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
import org.kitesdk.morphline.api.Record;
|
||||||
|
import org.kitesdk.morphline.base.Fields;
|
||||||
|
import org.kitesdk.morphline.base.Notifications;
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
|
||||||
|
@ -43,9 +46,7 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
|
||||||
import org.kitesdk.morphline.api.Record;
|
import com.google.common.base.Joiner;
|
||||||
import org.kitesdk.morphline.base.Fields;
|
|
||||||
import org.kitesdk.morphline.base.Notifications;
|
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.io.Files;
|
import com.google.common.io.Files;
|
||||||
|
|
||||||
|
@ -65,12 +66,13 @@ public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTestBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void doTest() throws Exception {
|
public void doTest() throws Exception {
|
||||||
File file = new File(RESOURCES_DIR + "/test-documents/sample-statuses-20120906-141433-medium.avro");
|
Joiner joiner = Joiner.on(File.separator);
|
||||||
|
File file = new File(joiner.join(RESOURCES_DIR, "test-documents", "sample-statuses-20120906-141433-medium.avro"));
|
||||||
|
|
||||||
waitForRecoveriesToFinish(false);
|
waitForRecoveriesToFinish(false);
|
||||||
|
|
||||||
// load avro records via morphline and zk into solr
|
// load avro records via morphline and zk into solr
|
||||||
morphline = parse("test-morphlines/tutorialReadAvroContainer");
|
morphline = parse("test-morphlines" + File.separator + "tutorialReadAvroContainer");
|
||||||
Record record = new Record();
|
Record record = new Record();
|
||||||
byte[] body = Files.toByteArray(file);
|
byte[] body = Files.toByteArray(file);
|
||||||
record.put(Fields.ATTACHMENT_BODY, body);
|
record.put(Fields.ATTACHMENT_BODY, body);
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.morphlines.solr;
|
package org.apache.solr.morphlines.solr;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.apache.lucene.util.Constants;
|
import org.apache.lucene.util.Constants;
|
||||||
|
@ -25,6 +26,9 @@ import org.apache.solr.client.solrj.SolrQuery;
|
||||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
import org.apache.solr.common.SolrDocument;
|
import org.apache.solr.common.SolrDocument;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
import org.kitesdk.morphline.api.Record;
|
||||||
|
import org.kitesdk.morphline.base.Fields;
|
||||||
|
import org.kitesdk.morphline.base.Notifications;
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
|
||||||
|
@ -33,9 +37,6 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
|
||||||
import org.kitesdk.morphline.api.Record;
|
|
||||||
import org.kitesdk.morphline.base.Fields;
|
|
||||||
import org.kitesdk.morphline.base.Notifications;
|
|
||||||
|
|
||||||
@ThreadLeakAction({Action.WARN})
|
@ThreadLeakAction({Action.WARN})
|
||||||
@ThreadLeakLingering(linger = 0)
|
@ThreadLeakLingering(linger = 0)
|
||||||
|
@ -56,7 +57,7 @@ public class SolrMorphlineZkTest extends AbstractSolrMorphlineZkTestBase {
|
||||||
|
|
||||||
waitForRecoveriesToFinish(false);
|
waitForRecoveriesToFinish(false);
|
||||||
|
|
||||||
morphline = parse("test-morphlines/loadSolrBasic");
|
morphline = parse("test-morphlines" + File.separator + "loadSolrBasic");
|
||||||
Record record = new Record();
|
Record record = new Record();
|
||||||
record.put(Fields.ID, "id0-innsbruck");
|
record.put(Fields.ID, "id0-innsbruck");
|
||||||
record.put("text", "mytext");
|
record.put("text", "mytext");
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
ab317793098d82d2a4259b5fe80bcde9ef745d5d
|
|
|
@ -0,0 +1 @@
|
||||||
|
ac24f3b61248bd7527ba5d0ac55ab4a6c1e99dec
|
|
@ -1 +0,0 @@
|
||||||
295045584531070ba9770db1fbd7bdb1518ece4e
|
|
|
@ -1 +0,0 @@
|
||||||
a1752aeef0a9f66840b7c1a33613ac9d9b07052b
|
|
|
@ -0,0 +1 @@
|
||||||
|
13a473a5f3fc4b9c0cb6348313c9595219a5593b
|
|
@ -0,0 +1 @@
|
||||||
|
6dfd4325dcd202e24104ff1eb604fcf4322b917c
|
|
@ -1 +0,0 @@
|
||||||
4dc6c186534f8f3e9555b050e20066a58159e880
|
|
|
@ -0,0 +1 @@
|
||||||
|
16d6b1b848e7f3de2f4de79bbe12ef02ca29ad7c
|
|
@ -1 +0,0 @@
|
||||||
d672b83e1e779b2c967aa457baab89e1b544c18e
|
|
|
@ -0,0 +1 @@
|
||||||
|
c9ebdc525368e809f705495aff50ad1a21725a07
|
|
@ -1 +0,0 @@
|
||||||
16c27edf447563b5674138a2fcede66298e9b8f9
|
|
|
@ -0,0 +1 @@
|
||||||
|
296ffafea1f5160e1db56e6daa6dc86be7a9d8b4
|
|
@ -1 +0,0 @@
|
||||||
aee7a5f61f66e1e39ff0ade73c07ab68c2bcd9b6
|
|
|
@ -0,0 +1 @@
|
||||||
|
9861e7400a27214ece16cb94cb6637eef3284a21
|
|
@ -1 +0,0 @@
|
||||||
36a014a433ca59935f0eb1ea7ace3111a3bd20bf
|
|
|
@ -0,0 +1 @@
|
||||||
|
d21a33105ce6df0fd519c8fb1882549dbafff21e
|
|
@ -1 +0,0 @@
|
||||||
d967d2e4e6517e87316a24e1931243436bbc1ba0
|
|
|
@ -0,0 +1 @@
|
||||||
|
299e78d3fbb8e61c950d0a589fd31689753997c4
|
Loading…
Reference in New Issue