Fix security problems in mapper-attachments.

Many of the tests were not running, or did not check the exceptions.
I renamed all tests to meet *Tests* so they run, and assert exception messages.

Also because we must (currently) invoke tika with additional privileges, I added
the security logic, and fixed unit testing to call our static method directly.

This must be package private for security reasons, i simply put everything in
org.elasticsearch.mapper.attachments package.

I upgraded tika to the latest, so we are up to date, and removed logic around
tika == null and old locale issues.
This commit is contained in:
Robert Muir 2015-11-07 17:45:16 -05:00
parent cdab360992
commit 3c40ed22b3
18 changed files with 106 additions and 187 deletions

2
.gitignore vendored
View File

@ -13,5 +13,7 @@
/.local-*-execution-hints.log
/eclipse-build/
build/
**/.local*
generated-resources/
.gradle/
/bin/

View File

@ -38,7 +38,7 @@ apply plugin: 'elasticsearch.esplugin'
esplugin {
description 'The mapper attachments plugin adds the attachment type to Elasticsearch using Apache Tika.'
classname 'org.elasticsearch.plugin.mapper.attachments.MapperAttachmentsPlugin'
classname 'org.elasticsearch.mapper.attachments.MapperAttachmentsPlugin'
}
group = 'org.elasticsearch.plugin'
@ -60,7 +60,7 @@ repositories {
}
dependencies {
compile('org.apache.tika:tika-parsers:1.10') {
compile('org.apache.tika:tika-parsers:1.11') {
// TODO: is all of edu.ucar incompatibile with apache2 license? if so, we can exclude the group?
// Not Apache2 License compatible
exclude group: 'edu.ucar', module: 'netcdf'

View File

@ -17,32 +17,34 @@
* under the License.
*/
package org.elasticsearch.index.mapper.attachment;
package org.elasticsearch.mapper.attachments;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.util.Constants;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.language.LanguageIdentifier;
import org.apache.tika.metadata.Metadata;
import org.elasticsearch.SpecialPermission;
import org.elasticsearch.Version;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.*;
import java.io.IOException;
import java.security.AccessController;
import java.security.PrivilegedActionException;
import java.security.PrivilegedExceptionAction;
import java.util.*;
import static org.elasticsearch.index.mapper.MapperBuilders.*;
import static org.elasticsearch.index.mapper.core.TypeParsers.parseMultiField;
import static org.elasticsearch.index.mapper.core.TypeParsers.parsePathType;
import static org.elasticsearch.plugin.mapper.attachments.tika.TikaInstance.tika;
/**
* <pre>
@ -482,21 +484,12 @@ public class AttachmentMapper extends FieldMapper {
String parsedContent;
try {
Tika tika = tika();
if (tika == null) {
if (!ignoreErrors) {
throw new MapperParsingException("Tika can not be initialized with the current Locale [" +
Locale.getDefault().getLanguage() + "] on the current JVM [" +
Constants.JAVA_VERSION + "]");
} else {
logger.warn("Tika can not be initialized with the current Locale [{}] on the current JVM [{}]",
Locale.getDefault().getLanguage(), Constants.JAVA_VERSION);
return null;
}
}
// Set the maximum length of strings returned by the parseToString method, -1 sets no limit
parsedContent = tika.parseToString(StreamInput.wrap(content), metadata, indexedChars);
parsedContent = parseWithTika(content, metadata, indexedChars);
} catch (Throwable e) {
// unbox checked exception
if (e instanceof PrivilegedActionException) {
e = e.getCause();
}
// #18: we could ignore errors when Tika does not parse data
if (!ignoreErrors) {
logger.trace("exception caught", e);
@ -614,6 +607,40 @@ public class AttachmentMapper extends FieldMapper {
return null;
}
// singleton
private static final Tika TIKA_INSTANCE = new Tika();
/**
* parses with tika, throwing any exception hit while parsing the document
*/
// only package private for testing!
static String parseWithTika(final byte content[], final Metadata metadata, final int limit) throws TikaException, IOException {
// check that its not unprivileged code like a script
SecurityManager sm = System.getSecurityManager();
if (sm != null) {
sm.checkPermission(new SpecialPermission());
}
try {
return AccessController.doPrivileged(new PrivilegedExceptionAction<String>() {
@Override
public String run() throws TikaException, IOException {
return TIKA_INSTANCE.parseToString(StreamInput.wrap(content), metadata, limit);
}
});
} catch (PrivilegedActionException e) {
// checked exception from tika: unbox it
Throwable cause = e.getCause();
if (cause instanceof TikaException) {
throw (TikaException) cause;
} else if (cause instanceof IOException) {
throw (IOException) cause;
} else {
throw new AssertionError(cause);
}
}
}
@Override
protected void parseCreateField(ParseContext parseContext, List<Field> fields) throws IOException {

View File

@ -17,10 +17,9 @@
* under the License.
*/
package org.elasticsearch.plugin.mapper.attachments;
package org.elasticsearch.mapper.attachments;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.mapper.attachment.AttachmentMapper;
import org.elasticsearch.plugins.Plugin;
public class MapperAttachmentsPlugin extends Plugin {

View File

@ -1,53 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.plugin.mapper.attachments.tika;
import org.apache.lucene.util.Constants;
import java.util.StringTokenizer;
import static java.lang.Integer.parseInt;
public class LocaleChecker {
public static int JVM_MAJOR_VERSION = 0;
public static int JVM_MINOR_VERSION = 0;
public static int JVM_PATCH_MAJOR_VERSION = 0;
public static int JVM_PATCH_MINOR_VERSION = 0;
static {
StringTokenizer st = new StringTokenizer(Constants.JVM_SPEC_VERSION, ".");
JVM_MAJOR_VERSION = parseInt(st.nextToken());
if(st.hasMoreTokens()) {
JVM_MINOR_VERSION = parseInt(st.nextToken());
}
if(st.hasMoreTokens()) {
StringTokenizer stPatch = new StringTokenizer(st.nextToken(), "_");
JVM_PATCH_MAJOR_VERSION = parseInt(stPatch.nextToken());
JVM_PATCH_MINOR_VERSION = parseInt(stPatch.nextToken());
}
}
/**
* Tika 1.8 fixed currently known Locale issues with some JVMs
*/
public static boolean isLocaleCompatible() {
return true;
}
}

View File

@ -17,29 +17,10 @@
* under the License.
*/
package org.elasticsearch.plugin.mapper.attachments.tika;
import org.apache.tika.Tika;
import static org.elasticsearch.plugin.mapper.attachments.tika.LocaleChecker.isLocaleCompatible;
/**
*
*/
public class TikaInstance {
private static final Tika tika;
static {
if (isLocaleCompatible()) {
tika = new Tika();
} else {
tika = null;
}
}
public static Tika tika() {
return tika;
}
}
grant {
// TODO: fix tika not to actually install bouncy castle like this
permission java.security.SecurityPermission "putProviderProperty.BC";
permission java.security.SecurityPermission "insertProvider";
// TODO: fix POI XWPF to not do this
permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
};

View File

@ -17,26 +17,15 @@
* under the License.
*/
package org.elasticsearch.index.mapper.attachment.test.unit;
package org.elasticsearch.mapper.attachments;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.test.ESTestCase;
import org.junit.Before;
import org.junit.BeforeClass;
import static org.elasticsearch.index.mapper.attachment.test.MapperTestUtils.assumeCorrectLocale;
public class AttachmentUnitTestCase extends ESTestCase {
/**
* We can have issues with some JVMs and Locale
* See https://github.com/elasticsearch/elasticsearch-mapper-attachments/issues/105
*/
@BeforeClass
public static void checkLocale() {
assumeCorrectLocale();
}
protected Settings testSettings;

View File

@ -17,16 +17,14 @@
* under the License.
*/
package org.elasticsearch.index.mapper.attachment.test.unit;
package org.elasticsearch.mapper.attachments;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.attachment.AttachmentMapper;
import org.elasticsearch.index.mapper.attachment.test.MapperTestUtils;
import org.elasticsearch.index.mapper.core.StringFieldMapper;
import org.elasticsearch.mapper.attachments.AttachmentMapper;
import org.junit.Before;
import org.junit.Test;
import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
import static org.hamcrest.Matchers.instanceOf;

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.mapper.attachment.test.unit;
package org.elasticsearch.mapper.attachments;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.settings.Settings;
@ -25,9 +25,7 @@ import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.attachment.AttachmentMapper;
import org.elasticsearch.index.mapper.attachment.test.MapperTestUtils;
import org.junit.Test;
import org.elasticsearch.mapper.attachments.AttachmentMapper;
import java.io.IOException;
@ -41,7 +39,7 @@ import static org.hamcrest.Matchers.*;
* Note that we have converted /org/elasticsearch/index/mapper/xcontent/testContentLength.txt
* to a /org/elasticsearch/index/mapper/xcontent/encrypted.pdf with password `12345678`.
*/
public class EncryptedDocMapperTest extends AttachmentUnitTestCase {
public class EncryptedDocMapperTests extends AttachmentUnitTestCase {
public void testMultipleDocsEncryptedLast() throws IOException {
DocumentMapperParser mapperParser = MapperTestUtils.newMapperService(createTempDir(), Settings.EMPTY).documentMapperParser();
@ -127,8 +125,10 @@ public class EncryptedDocMapperTest extends AttachmentUnitTestCase {
docMapper.parse("person", "person", "1", json);
fail("Expected doc parsing exception");
} catch (MapperParsingException e) {
// TODO: check the error message...getting security problems atm
//assertTrue(e.getMessage(), e.getMessage().contains())
if (e.getMessage() == null || e.getMessage().contains("is encrypted") == false) {
// wrong exception
throw e;
}
}
}

View File

@ -17,18 +17,16 @@
* under the License.
*/
package org.elasticsearch.index.mapper.attachment.test.unit;
package org.elasticsearch.mapper.attachments;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.attachment.AttachmentMapper;
import org.elasticsearch.index.mapper.attachment.test.MapperTestUtils;
import org.elasticsearch.index.mapper.core.StringFieldMapper;
import org.elasticsearch.mapper.attachments.AttachmentMapper;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;

View File

@ -17,12 +17,12 @@
* under the License.
*/
package org.elasticsearch.index.mapper.attachment.test.integration;
package org.elasticsearch.mapper.attachments;
import com.carrotsearch.randomizedtesting.annotations.Name;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.plugin.mapper.attachments.MapperAttachmentsPlugin;
import org.elasticsearch.mapper.attachments.MapperAttachmentsPlugin;
import org.elasticsearch.test.rest.ESRestTestCase;
import org.elasticsearch.test.rest.RestTestCandidate;
import org.elasticsearch.test.rest.parser.RestTestParseException;

View File

@ -17,9 +17,8 @@
* under the License.
*/
package org.elasticsearch.index.mapper.attachment.test;
package org.elasticsearch.mapper.attachments;
import org.apache.lucene.util.Constants;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
@ -35,12 +34,8 @@ import org.elasticsearch.test.IndexSettingsModule;
import java.io.IOException;
import java.nio.file.Path;
import java.util.Collections;
import java.util.Locale;
import static com.carrotsearch.randomizedtesting.RandomizedTest.assumeTrue;
import static org.elasticsearch.plugin.mapper.attachments.tika.LocaleChecker.isLocaleCompatible;
public class MapperTestUtils {
class MapperTestUtils {
public static MapperService newMapperService(Path tempDir, Settings indexSettings) throws IOException {
Settings nodeSettings = Settings.builder()
@ -55,14 +50,4 @@ public class MapperTestUtils {
SimilarityService similarityService = new SimilarityService(idxSettings, Collections.emptyMap());
return new MapperService(idxSettings, analysisService, similarityService);
}
/**
* We can have issues with some JVMs and Locale
* See https://github.com/elasticsearch/elasticsearch-mapper-attachments/issues/105
*/
public static void assumeCorrectLocale() {
assumeTrue("Current Locale language " + Locale.getDefault().getLanguage() +" could cause an error with Java " +
Constants.JAVA_VERSION, isLocaleCompatible());
}
}

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.mapper.attachment.test.unit;
package org.elasticsearch.mapper.attachments;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.settings.Settings;
@ -25,9 +25,7 @@ import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.attachment.AttachmentMapper;
import org.elasticsearch.index.mapper.attachment.test.MapperTestUtils;
import org.junit.Test;
import org.elasticsearch.mapper.attachments.AttachmentMapper;
import java.io.IOException;
@ -39,7 +37,7 @@ import static org.hamcrest.Matchers.*;
/**
* Test for https://github.com/elasticsearch/elasticsearch-mapper-attachments/issues/38
*/
public class MetadataMapperTest extends AttachmentUnitTestCase {
public class MetadataMapperTests extends AttachmentUnitTestCase {
protected void checkMeta(String filename, Settings otherSettings, Long expectedDate, Long expectedLength) throws IOException {
Settings settings = Settings.builder()
@ -95,8 +93,8 @@ public class MetadataMapperTest extends AttachmentUnitTestCase {
public void testWithEmptyDate() throws Exception {
try {
checkMeta("htmlWithEmptyDateMeta.html", Settings.builder().put("index.mapping.attachment.ignore_errors", false).build(), null, null);
} catch (MapperParsingException e) {
throw e;
} catch (MapperParsingException expected) {
assertTrue(expected.getMessage().contains("failed to parse"));
}
}

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.mapper.attachment.test.unit;
package org.elasticsearch.mapper.attachments;
import org.elasticsearch.common.Base64;
import org.elasticsearch.common.settings.Settings;
@ -26,14 +26,12 @@ import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.attachment.AttachmentMapper;
import org.elasticsearch.index.mapper.attachment.test.MapperTestUtils;
import org.elasticsearch.index.mapper.core.DateFieldMapper;
import org.elasticsearch.index.mapper.core.StringFieldMapper;
import org.elasticsearch.mapper.attachments.AttachmentMapper;
import org.elasticsearch.threadpool.ThreadPool;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.nio.charset.StandardCharsets;

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.mapper.attachment.test.unit;
package org.elasticsearch.mapper.attachments;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
@ -26,8 +26,7 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.attachment.AttachmentMapper;
import org.elasticsearch.index.mapper.attachment.test.MapperTestUtils;
import org.elasticsearch.mapper.attachments.AttachmentMapper;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.test.StreamsUtils.copyToBytesFromClasspath;

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.mapper.attachment.test.standalone;
package org.elasticsearch.mapper.attachments;
import org.apache.commons.cli.CommandLine;
import org.elasticsearch.common.bytes.BytesReference;
@ -32,8 +32,7 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.attachment.AttachmentMapper;
import org.elasticsearch.index.mapper.attachment.test.MapperTestUtils;
import org.elasticsearch.mapper.attachments.AttachmentMapper;
import java.io.FileNotFoundException;
import java.io.IOException;

View File

@ -17,25 +17,23 @@
* under the License.
*/
package org.elasticsearch.index.mapper.attachment.test.unit;
package org.elasticsearch.mapper.attachments;
import org.apache.tika.Tika;
import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.attachment.AttachmentMapper;
import org.elasticsearch.index.mapper.attachment.test.MapperTestUtils;
import org.elasticsearch.mapper.attachments.AttachmentMapper;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.io.InputStream;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.mapper.attachment.AttachmentMapper.FieldNames.*;
import static org.elasticsearch.plugin.mapper.attachments.tika.TikaInstance.tika;
import static org.elasticsearch.mapper.attachments.AttachmentMapper.FieldNames.*;
import static org.elasticsearch.test.StreamsUtils.copyToBytesFromClasspath;
import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
import static org.hamcrest.Matchers.isEmptyOrNullString;
@ -44,7 +42,7 @@ import static org.hamcrest.Matchers.not;
/**
* Test for different documents
*/
public class VariousDocTest extends AttachmentUnitTestCase {
public class VariousDocTests extends AttachmentUnitTestCase {
protected DocumentMapper docMapper;
@ -69,7 +67,7 @@ public class VariousDocTest extends AttachmentUnitTestCase {
* Test for encrypted PDF
*/
public void testEncryptedPDFDocument() throws Exception {
assertException("encrypted.pdf");
assertException("encrypted.pdf", "is encrypted");
// TODO Remove when this will be fixed in Tika. See https://issues.apache.org/jira/browse/TIKA-1548
System.clearProperty("sun.font.fontmanager");
testMapper("encrypted.pdf", true);
@ -108,24 +106,25 @@ public class VariousDocTest extends AttachmentUnitTestCase {
testMapper("asciidoc.asciidoc", false);
}
void assertException(String filename) {
Tika tika = tika();
assumeTrue("Tika has been disabled. Ignoring test...", tika != null);
try (InputStream is = VariousDocTest.class.getResourceAsStream("/org/elasticsearch/index/mapper/attachment/test/sample-files/" + filename)) {
tika.parseToString(is);
void assertException(String filename, String expectedMessage) throws Exception {
try (InputStream is = VariousDocTests.class.getResourceAsStream("/org/elasticsearch/index/mapper/attachment/test/sample-files/" + filename)) {
byte bytes[] = IOUtils.toByteArray(is);
AttachmentMapper.parseWithTika(bytes, new Metadata(), -1);
fail("expected exception");
} catch (Exception e) {
// expected. TODO: check message
if (e.getMessage() != null && e.getMessage().contains(expectedMessage)) {
// ok
} else {
// unexpected
throw e;
}
}
}
protected void assertParseable(String filename) throws Exception {
Tika tika = tika();
assumeTrue("Tika has been disabled. Ignoring test...", tika != null);
try (InputStream is = VariousDocTest.class.getResourceAsStream("/org/elasticsearch/index/mapper/attachment/test/sample-files/" + filename)) {
String parsedContent = tika.parseToString(is);
try (InputStream is = VariousDocTests.class.getResourceAsStream("/org/elasticsearch/index/mapper/attachment/test/sample-files/" + filename)) {
byte bytes[] = IOUtils.toByteArray(is);
String parsedContent = AttachmentMapper.parseWithTika(bytes, new Metadata(), -1);
assertThat(parsedContent, not(isEmptyOrNullString()));
logger.debug("extracted content: {}", parsedContent);
}

View File

@ -10,5 +10,5 @@
- do:
nodes.info: {}
- match: { nodes.$master.plugins.0.name: mapper-attachments }
- match: { nodes.$master.plugins.0.name: elasticsearch-mapper-attachments }
- match: { nodes.$master.plugins.0.jvm: true }