SOLR-8477: Let users choose compression mode in SchemaCodecFactory

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1723427 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tomas Eduardo Fernandez Lobbe 2016-01-06 22:37:51 +00:00
parent 8ac7d3fce5
commit c87b01af02
9 changed files with 247 additions and 2 deletions

View File

@ -273,6 +273,8 @@ New Features
* SOLR-8470: Make TTL of PKIAuthenticationPlugin's tokens configurable through a system property
(pkiauth.ttl) (noble)
* SOLR-8477: Let users choose compression mode in SchemaCodecFactory (Tomás Fernández Löbbe)
Bug Fixes
----------------------

View File

@ -1,12 +1,21 @@
package org.apache.solr.core;
import java.lang.invoke.MethodHandles;
import java.util.Arrays;
import java.util.Locale;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
import org.apache.lucene.codecs.lucene60.Lucene60Codec;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -29,12 +38,28 @@ import org.apache.solr.util.plugin.SolrCoreAware;
* Per-field CodecFactory implementation, extends Lucene's
* and returns postings format implementations according to the
* schema configuration.
* <br>
* Also, a string argument with name <code>compressionMode</code> can be
* provided to chose between the different compression options for
* stored fields
*
* @lucene.experimental
*/
public class SchemaCodecFactory extends CodecFactory implements SolrCoreAware {
/**
* Key to use in init arguments to set the compression mode in the codec.
*/
public static final String COMPRESSION_MODE = "compressionMode";
public static final Mode SOLR_DEFAULT_COMPRESSION_MODE = Mode.BEST_SPEED;
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private Codec codec;
private volatile SolrCore core;
// TODO: we need to change how solr does this?
// rather than a string like "Direct" you need to be able to pass parameters
// and everything to a field in the schema, e.g. we should provide factories for
@ -51,7 +76,23 @@ public class SchemaCodecFactory extends CodecFactory implements SolrCoreAware {
@Override
public void init(NamedList args) {
super.init(args);
codec = new Lucene60Codec() {
assert codec == null;
String compressionModeStr = (String)args.get(COMPRESSION_MODE);
Mode compressionMode;
if (compressionModeStr != null) {
try {
compressionMode = Mode.valueOf(compressionModeStr.toUpperCase(Locale.ROOT));
} catch (IllegalArgumentException e) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Invalid compressionMode: '" + compressionModeStr +
"'. Value must be one of " + Arrays.toString(Mode.values()));
}
log.info("Using compressionMode: " + compressionMode);
} else {
compressionMode = SOLR_DEFAULT_COMPRESSION_MODE;
log.info("Using default compressionMode: " + compressionMode);
}
codec = new Lucene60Codec(compressionMode) {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
final SchemaField schemaField = core.getLatestSchema().getFieldOrNull(field);

View File

@ -25,6 +25,17 @@
<fieldType name="string_memory" class="solr.StrField" docValuesFormat="Memory" />
<fieldType name="string" class="solr.StrField" />
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
</types>
<fields>
@ -36,6 +47,7 @@
<field name="string_memory_f" type="string_memory" indexed="false" stored="false" docValues="true" default="" />
<field name="string_f" type="string" indexed="true" stored="true" docValues="true" required="true"/>
<field name="text" type="text_general" indexed="true" stored="true"/>
<dynamicField name="*_simple" type="string_simpletext" indexed="true" stored="true"/>
<dynamicField name="*_direct" type="string_direct" indexed="true" stored="true"/>

View File

@ -22,5 +22,7 @@
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
<schemaFactory class="ClassicIndexSchemaFactory"/>
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
<codecFactory class="solr.SchemaCodecFactory"/>
<codecFactory class="solr.SchemaCodecFactory">
<str name="compressionMode">${tests.COMPRESSION_MODE:BEST_COMPRESSION}</str>
</codecFactory>
</config>

View File

@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<config>
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
<xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
<schemaFactory class="ClassicIndexSchemaFactory"/>
<codecFactory class="solr.SchemaCodecFactory"/>
</config>

View File

@ -17,14 +17,26 @@ package org.apache.solr.core;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.IndexSchemaFactory;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.TestHarness;
import org.junit.BeforeClass;
public class TestCodecSupport extends SolrTestCaseJ4 {
@ -84,4 +96,145 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
assertEquals("Memory", format.getDocValuesFormatForField("foo_memory").getName());
assertEquals("Memory", format.getDocValuesFormatForField("bar_memory").getName());
}
private void reloadCoreAndRecreateIndex() {
h.getCoreContainer().reload(h.coreName);
assertU(delQ("*:*"));
assertU(commit());
assertU(add(doc("string_f", "foo")));
assertU(commit());
}
private void doTestCompressionMode(String propertyValue, String expectedModeString) throws IOException {
if (propertyValue != null) {
System.setProperty("tests.COMPRESSION_MODE", propertyValue);
}
try {
reloadCoreAndRecreateIndex();
assertCompressionMode(expectedModeString, h.getCore());
} finally {
System.clearProperty("tests.COMPRESSION_MODE");
}
}
protected void assertCompressionMode(String expectedModeString, SolrCore core) throws IOException {
RefCounted<SolrIndexSearcher> ref = null;
SolrIndexSearcher searcher = null;
try {
ref = core.getSearcher();
searcher = ref.get();
SegmentInfos infos = SegmentInfos.readLatestCommit(searcher.getIndexReader().directory());
SegmentInfo info = infos.info(infos.size() - 1).info;
assertEquals("Expecting compression mode string to be " + expectedModeString +
" but got: " + info.getAttribute(Lucene50StoredFieldsFormat.MODE_KEY) +
"\n SegmentInfo: " + info +
"\n SegmentInfos: " + infos +
"\n Codec: " + core.getCodec(),
expectedModeString, info.getAttribute(Lucene50StoredFieldsFormat.MODE_KEY));
} finally {
if (ref != null) ref.decref();
}
}
public void testCompressionMode() throws Exception {
assertEquals("incompatible change in compressionMode property",
"compressionMode", SchemaCodecFactory.COMPRESSION_MODE);
doTestCompressionMode("BEST_SPEED", "BEST_SPEED");
doTestCompressionMode("BEST_COMPRESSION", "BEST_COMPRESSION");
doTestCompressionMode("best_speed", "BEST_SPEED");
doTestCompressionMode("best_compression", "BEST_COMPRESSION");
}
public void testMixedCompressionMode() throws Exception {
assertU(delQ("*:*"));
assertU(commit());
System.setProperty("tests.COMPRESSION_MODE", "BEST_SPEED");
h.getCoreContainer().reload(h.coreName);
assertU(add(doc("string_f", "1", "text", "foo bar")));
assertU(commit());
assertCompressionMode("BEST_SPEED", h.getCore());
System.setProperty("tests.COMPRESSION_MODE", "BEST_COMPRESSION");
h.getCoreContainer().reload(h.coreName);
assertU(add(doc("string_f", "2", "text", "foo zar")));
assertU(commit());
assertCompressionMode("BEST_COMPRESSION", h.getCore());
System.setProperty("tests.COMPRESSION_MODE", "BEST_SPEED");
h.getCoreContainer().reload(h.coreName);
assertU(add(doc("string_f", "3", "text", "foo zoo")));
assertU(commit());
assertCompressionMode("BEST_SPEED", h.getCore());
assertQ(req("q", "*:*"),
"//*[@numFound='3']");
assertQ(req("q", "text:foo"),
"//*[@numFound='3']");
assertU(optimize());
assertCompressionMode("BEST_SPEED", h.getCore());
System.clearProperty("tests.COMPRESSION_MODE");
}
public void testBadCompressionMode() throws Exception {
try {
doTestCompressionMode("something_that_doesnt_exist", "something_that_doesnt_exist");
fail("Expecting exception");
} catch (SolrException e) {
assertEquals(SolrException.ErrorCode.SERVER_ERROR.code, e.code());
assertTrue("Unexpected Exception message: " + e.getMessage(),
e.getMessage().contains("Unable to reload core"));
}
SchemaCodecFactory factory = new SchemaCodecFactory();
NamedList<String> nl = new NamedList<>();
nl.add(SchemaCodecFactory.COMPRESSION_MODE, "something_that_doesnt_exist");
try {
factory.init(nl);
fail("Expecting exception");
} catch (SolrException e) {
assertEquals(SolrException.ErrorCode.SERVER_ERROR.code, e.code());
assertTrue("Unexpected Exception message: " + e.getMessage(),
e.getMessage().contains("Invalid compressionMode: 'something_that_doesnt_exist'"));
}
factory = new SchemaCodecFactory();
nl = new NamedList<>();
nl.add(SchemaCodecFactory.COMPRESSION_MODE, "");
try {
factory.init(nl);
fail("Expecting exception");
} catch (SolrException e) {
assertEquals(SolrException.ErrorCode.SERVER_ERROR.code, e.code());
assertTrue("Unexpected Exception message: " + e.getMessage(),
e.getMessage().contains("Invalid compressionMode: ''"));
}
}
public void testCompressionModeDefault() throws IOException {
assertEquals("Default Solr compression mode changed. Is this expected?",
SchemaCodecFactory.SOLR_DEFAULT_COMPRESSION_MODE, Mode.valueOf("BEST_SPEED"));
String previousCoreName = h.coreName;
String newCoreName = "core_with_default_compression";
SolrCore c = null;
SolrConfig config = TestHarness.createConfig(testSolrHome, previousCoreName, "solrconfig_codec2.xml");
assertEquals("Unexpected codec factory for this test.", "solr.SchemaCodecFactory", config.get("codecFactory/@class"));
assertNull("Unexpected configuration of codec factory for this test. Expecting empty element",
config.getNode("codecFactory", false).getFirstChild());
IndexSchema schema = IndexSchemaFactory.buildIndexSchema("schema_codec.xml", config);
try {
c = new SolrCore(new CoreDescriptor(h.getCoreContainer(), newCoreName, testSolrHome.resolve(newCoreName)),
new ConfigSet("fakeConfigset", config, schema, null));
assertNull(h.getCoreContainer().registerCore(newCoreName, c, false));
h.coreName = newCoreName;
assertEquals("We are not using the correct core", "solrconfig_codec2.xml", h.getCore().getConfigResource());
assertU(add(doc("string_f", "foo")));
assertU(commit());
assertCompressionMode(SchemaCodecFactory.SOLR_DEFAULT_COMPRESSION_MODE.name(), h.getCore());
} finally {
h.coreName = previousCoreName;
h.getCoreContainer().unload(newCoreName);
}
}
}

View File

@ -73,6 +73,9 @@
are experimental, so if you choose to customize the index format, it's a good
idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
before upgrading to a newer version to avoid unnecessary reindexing.
A "compressionMode" string element can be added to <codecFactory> to choose
between the existing compression modes in the default codec: "BEST_SPEED" (default)
or "BEST_COMPRESSION".
-->
<codecFactory class="solr.SchemaCodecFactory"/>

View File

@ -126,6 +126,9 @@
are experimental, so if you choose to customize the index format, it's a good
idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
before upgrading to a newer version to avoid unnecessary reindexing.
A "compressionMode" string element can be added to <codecFactory> to choose
between the existing compression modes in the default codec: "BEST_SPEED" (default)
or "BEST_COMPRESSION".
-->
<codecFactory class="solr.SchemaCodecFactory"/>

View File

@ -127,6 +127,9 @@
are experimental, so if you choose to customize the index format, it's a good
idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
before upgrading to a newer version to avoid unnecessary reindexing.
A "compressionMode" string element can be added to <codecFactory> to choose
between the existing compression modes in the default codec: "BEST_SPEED" (default)
or "BEST_COMPRESSION".
-->
<codecFactory class="solr.SchemaCodecFactory"/>