mirror of https://github.com/apache/lucene.git
SOLR-8477: Let users choose compression mode in SchemaCodecFactory
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1723427 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8ac7d3fce5
commit
c87b01af02
|
@ -273,6 +273,8 @@ New Features
|
|||
* SOLR-8470: Make TTL of PKIAuthenticationPlugin's tokens configurable through a system property
|
||||
(pkiauth.ttl) (noble)
|
||||
|
||||
* SOLR-8477: Let users choose compression mode in SchemaCodecFactory (Tomás Fernández Löbbe)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -1,12 +1,21 @@
|
|||
package org.apache.solr.core;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Arrays;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
|
||||
import org.apache.lucene.codecs.lucene60.Lucene60Codec;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -29,12 +38,28 @@ import org.apache.solr.util.plugin.SolrCoreAware;
|
|||
* Per-field CodecFactory implementation, extends Lucene's
|
||||
* and returns postings format implementations according to the
|
||||
* schema configuration.
|
||||
* <br>
|
||||
* Also, a string argument with name <code>compressionMode</code> can be
|
||||
* provided to chose between the different compression options for
|
||||
* stored fields
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SchemaCodecFactory extends CodecFactory implements SolrCoreAware {
|
||||
|
||||
/**
|
||||
* Key to use in init arguments to set the compression mode in the codec.
|
||||
*/
|
||||
public static final String COMPRESSION_MODE = "compressionMode";
|
||||
|
||||
public static final Mode SOLR_DEFAULT_COMPRESSION_MODE = Mode.BEST_SPEED;
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
private Codec codec;
|
||||
private volatile SolrCore core;
|
||||
|
||||
|
||||
// TODO: we need to change how solr does this?
|
||||
// rather than a string like "Direct" you need to be able to pass parameters
|
||||
// and everything to a field in the schema, e.g. we should provide factories for
|
||||
|
@ -51,7 +76,23 @@ public class SchemaCodecFactory extends CodecFactory implements SolrCoreAware {
|
|||
@Override
|
||||
public void init(NamedList args) {
|
||||
super.init(args);
|
||||
codec = new Lucene60Codec() {
|
||||
assert codec == null;
|
||||
String compressionModeStr = (String)args.get(COMPRESSION_MODE);
|
||||
Mode compressionMode;
|
||||
if (compressionModeStr != null) {
|
||||
try {
|
||||
compressionMode = Mode.valueOf(compressionModeStr.toUpperCase(Locale.ROOT));
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"Invalid compressionMode: '" + compressionModeStr +
|
||||
"'. Value must be one of " + Arrays.toString(Mode.values()));
|
||||
}
|
||||
log.info("Using compressionMode: " + compressionMode);
|
||||
} else {
|
||||
compressionMode = SOLR_DEFAULT_COMPRESSION_MODE;
|
||||
log.info("Using default compressionMode: " + compressionMode);
|
||||
}
|
||||
codec = new Lucene60Codec(compressionMode) {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
final SchemaField schemaField = core.getLatestSchema().getFieldOrNull(field);
|
||||
|
|
|
@ -25,6 +25,17 @@
|
|||
<fieldType name="string_memory" class="solr.StrField" docValuesFormat="Memory" />
|
||||
|
||||
<fieldType name="string" class="solr.StrField" />
|
||||
|
||||
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
</types>
|
||||
<fields>
|
||||
|
@ -36,6 +47,7 @@
|
|||
<field name="string_memory_f" type="string_memory" indexed="false" stored="false" docValues="true" default="" />
|
||||
|
||||
<field name="string_f" type="string" indexed="true" stored="true" docValues="true" required="true"/>
|
||||
<field name="text" type="text_general" indexed="true" stored="true"/>
|
||||
|
||||
<dynamicField name="*_simple" type="string_simpletext" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_direct" type="string_direct" indexed="true" stored="true"/>
|
||||
|
|
|
@ -22,5 +22,7 @@
|
|||
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
|
||||
<schemaFactory class="ClassicIndexSchemaFactory"/>
|
||||
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
|
||||
<codecFactory class="solr.SchemaCodecFactory"/>
|
||||
<codecFactory class="solr.SchemaCodecFactory">
|
||||
<str name="compressionMode">${tests.COMPRESSION_MODE:BEST_COMPRESSION}</str>
|
||||
</codecFactory>
|
||||
</config>
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<config>
|
||||
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
|
||||
<xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
|
||||
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
|
||||
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
|
||||
<schemaFactory class="ClassicIndexSchemaFactory"/>
|
||||
<codecFactory class="solr.SchemaCodecFactory"/>
|
||||
</config>
|
|
@ -17,14 +17,26 @@ package org.apache.solr.core;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.IndexSchemaFactory;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.apache.solr.util.TestHarness;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
public class TestCodecSupport extends SolrTestCaseJ4 {
|
||||
|
@ -84,4 +96,145 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
|
|||
assertEquals("Memory", format.getDocValuesFormatForField("foo_memory").getName());
|
||||
assertEquals("Memory", format.getDocValuesFormatForField("bar_memory").getName());
|
||||
}
|
||||
|
||||
private void reloadCoreAndRecreateIndex() {
|
||||
h.getCoreContainer().reload(h.coreName);
|
||||
assertU(delQ("*:*"));
|
||||
assertU(commit());
|
||||
assertU(add(doc("string_f", "foo")));
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
private void doTestCompressionMode(String propertyValue, String expectedModeString) throws IOException {
|
||||
if (propertyValue != null) {
|
||||
System.setProperty("tests.COMPRESSION_MODE", propertyValue);
|
||||
}
|
||||
try {
|
||||
reloadCoreAndRecreateIndex();
|
||||
assertCompressionMode(expectedModeString, h.getCore());
|
||||
} finally {
|
||||
System.clearProperty("tests.COMPRESSION_MODE");
|
||||
}
|
||||
}
|
||||
|
||||
protected void assertCompressionMode(String expectedModeString, SolrCore core) throws IOException {
|
||||
RefCounted<SolrIndexSearcher> ref = null;
|
||||
SolrIndexSearcher searcher = null;
|
||||
try {
|
||||
ref = core.getSearcher();
|
||||
searcher = ref.get();
|
||||
SegmentInfos infos = SegmentInfos.readLatestCommit(searcher.getIndexReader().directory());
|
||||
SegmentInfo info = infos.info(infos.size() - 1).info;
|
||||
assertEquals("Expecting compression mode string to be " + expectedModeString +
|
||||
" but got: " + info.getAttribute(Lucene50StoredFieldsFormat.MODE_KEY) +
|
||||
"\n SegmentInfo: " + info +
|
||||
"\n SegmentInfos: " + infos +
|
||||
"\n Codec: " + core.getCodec(),
|
||||
expectedModeString, info.getAttribute(Lucene50StoredFieldsFormat.MODE_KEY));
|
||||
} finally {
|
||||
if (ref != null) ref.decref();
|
||||
}
|
||||
}
|
||||
|
||||
public void testCompressionMode() throws Exception {
|
||||
assertEquals("incompatible change in compressionMode property",
|
||||
"compressionMode", SchemaCodecFactory.COMPRESSION_MODE);
|
||||
doTestCompressionMode("BEST_SPEED", "BEST_SPEED");
|
||||
doTestCompressionMode("BEST_COMPRESSION", "BEST_COMPRESSION");
|
||||
doTestCompressionMode("best_speed", "BEST_SPEED");
|
||||
doTestCompressionMode("best_compression", "BEST_COMPRESSION");
|
||||
}
|
||||
|
||||
public void testMixedCompressionMode() throws Exception {
|
||||
assertU(delQ("*:*"));
|
||||
assertU(commit());
|
||||
System.setProperty("tests.COMPRESSION_MODE", "BEST_SPEED");
|
||||
h.getCoreContainer().reload(h.coreName);
|
||||
assertU(add(doc("string_f", "1", "text", "foo bar")));
|
||||
assertU(commit());
|
||||
assertCompressionMode("BEST_SPEED", h.getCore());
|
||||
System.setProperty("tests.COMPRESSION_MODE", "BEST_COMPRESSION");
|
||||
h.getCoreContainer().reload(h.coreName);
|
||||
assertU(add(doc("string_f", "2", "text", "foo zar")));
|
||||
assertU(commit());
|
||||
assertCompressionMode("BEST_COMPRESSION", h.getCore());
|
||||
System.setProperty("tests.COMPRESSION_MODE", "BEST_SPEED");
|
||||
h.getCoreContainer().reload(h.coreName);
|
||||
assertU(add(doc("string_f", "3", "text", "foo zoo")));
|
||||
assertU(commit());
|
||||
assertCompressionMode("BEST_SPEED", h.getCore());
|
||||
assertQ(req("q", "*:*"),
|
||||
"//*[@numFound='3']");
|
||||
assertQ(req("q", "text:foo"),
|
||||
"//*[@numFound='3']");
|
||||
assertU(optimize());
|
||||
assertCompressionMode("BEST_SPEED", h.getCore());
|
||||
System.clearProperty("tests.COMPRESSION_MODE");
|
||||
}
|
||||
|
||||
public void testBadCompressionMode() throws Exception {
|
||||
try {
|
||||
doTestCompressionMode("something_that_doesnt_exist", "something_that_doesnt_exist");
|
||||
fail("Expecting exception");
|
||||
} catch (SolrException e) {
|
||||
assertEquals(SolrException.ErrorCode.SERVER_ERROR.code, e.code());
|
||||
assertTrue("Unexpected Exception message: " + e.getMessage(),
|
||||
e.getMessage().contains("Unable to reload core"));
|
||||
}
|
||||
|
||||
SchemaCodecFactory factory = new SchemaCodecFactory();
|
||||
NamedList<String> nl = new NamedList<>();
|
||||
nl.add(SchemaCodecFactory.COMPRESSION_MODE, "something_that_doesnt_exist");
|
||||
try {
|
||||
factory.init(nl);
|
||||
fail("Expecting exception");
|
||||
} catch (SolrException e) {
|
||||
assertEquals(SolrException.ErrorCode.SERVER_ERROR.code, e.code());
|
||||
assertTrue("Unexpected Exception message: " + e.getMessage(),
|
||||
e.getMessage().contains("Invalid compressionMode: 'something_that_doesnt_exist'"));
|
||||
}
|
||||
|
||||
factory = new SchemaCodecFactory();
|
||||
nl = new NamedList<>();
|
||||
nl.add(SchemaCodecFactory.COMPRESSION_MODE, "");
|
||||
try {
|
||||
factory.init(nl);
|
||||
fail("Expecting exception");
|
||||
} catch (SolrException e) {
|
||||
assertEquals(SolrException.ErrorCode.SERVER_ERROR.code, e.code());
|
||||
assertTrue("Unexpected Exception message: " + e.getMessage(),
|
||||
e.getMessage().contains("Invalid compressionMode: ''"));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void testCompressionModeDefault() throws IOException {
|
||||
assertEquals("Default Solr compression mode changed. Is this expected?",
|
||||
SchemaCodecFactory.SOLR_DEFAULT_COMPRESSION_MODE, Mode.valueOf("BEST_SPEED"));
|
||||
|
||||
String previousCoreName = h.coreName;
|
||||
String newCoreName = "core_with_default_compression";
|
||||
SolrCore c = null;
|
||||
|
||||
SolrConfig config = TestHarness.createConfig(testSolrHome, previousCoreName, "solrconfig_codec2.xml");
|
||||
assertEquals("Unexpected codec factory for this test.", "solr.SchemaCodecFactory", config.get("codecFactory/@class"));
|
||||
assertNull("Unexpected configuration of codec factory for this test. Expecting empty element",
|
||||
config.getNode("codecFactory", false).getFirstChild());
|
||||
IndexSchema schema = IndexSchemaFactory.buildIndexSchema("schema_codec.xml", config);
|
||||
|
||||
try {
|
||||
c = new SolrCore(new CoreDescriptor(h.getCoreContainer(), newCoreName, testSolrHome.resolve(newCoreName)),
|
||||
new ConfigSet("fakeConfigset", config, schema, null));
|
||||
assertNull(h.getCoreContainer().registerCore(newCoreName, c, false));
|
||||
h.coreName = newCoreName;
|
||||
assertEquals("We are not using the correct core", "solrconfig_codec2.xml", h.getCore().getConfigResource());
|
||||
assertU(add(doc("string_f", "foo")));
|
||||
assertU(commit());
|
||||
assertCompressionMode(SchemaCodecFactory.SOLR_DEFAULT_COMPRESSION_MODE.name(), h.getCore());
|
||||
} finally {
|
||||
h.coreName = previousCoreName;
|
||||
h.getCoreContainer().unload(newCoreName);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -73,6 +73,9 @@
|
|||
are experimental, so if you choose to customize the index format, it's a good
|
||||
idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
|
||||
before upgrading to a newer version to avoid unnecessary reindexing.
|
||||
A "compressionMode" string element can be added to <codecFactory> to choose
|
||||
between the existing compression modes in the default codec: "BEST_SPEED" (default)
|
||||
or "BEST_COMPRESSION".
|
||||
-->
|
||||
<codecFactory class="solr.SchemaCodecFactory"/>
|
||||
|
||||
|
|
|
@ -126,6 +126,9 @@
|
|||
are experimental, so if you choose to customize the index format, it's a good
|
||||
idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
|
||||
before upgrading to a newer version to avoid unnecessary reindexing.
|
||||
A "compressionMode" string element can be added to <codecFactory> to choose
|
||||
between the existing compression modes in the default codec: "BEST_SPEED" (default)
|
||||
or "BEST_COMPRESSION".
|
||||
-->
|
||||
<codecFactory class="solr.SchemaCodecFactory"/>
|
||||
|
||||
|
|
|
@ -127,6 +127,9 @@
|
|||
are experimental, so if you choose to customize the index format, it's a good
|
||||
idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
|
||||
before upgrading to a newer version to avoid unnecessary reindexing.
|
||||
A "compressionMode" string element can be added to <codecFactory> to choose
|
||||
between the existing compression modes in the default codec: "BEST_SPEED" (default)
|
||||
or "BEST_COMPRESSION".
|
||||
-->
|
||||
<codecFactory class="solr.SchemaCodecFactory"/>
|
||||
|
||||
|
|
Loading…
Reference in New Issue