mirror of https://github.com/apache/lucene.git
SOLR-10046: Add UninvertDocValuesMergePolicyFactory class. (Keith Laban, Christine Poerschke)
This commit is contained in:
parent
cce7ba9b2f
commit
0170df93f3
|
@ -185,6 +185,8 @@ New Features
|
|||
|
||||
* SOLR-10224: Add disk total and disk free metrics. (ab)
|
||||
|
||||
* SOLR-10046: Add UninvertDocValuesMergePolicyFactory class. (Keith Laban, Christine Poerschke)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -0,0 +1,218 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.CodecReader;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.FilterCodecReader;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.MergePolicy;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.OneMergeWrappingMergePolicy;
|
||||
import org.apache.lucene.index.SegmentCommitInfo;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
|
||||
/**
|
||||
* A merge policy that can detect schema changes and write docvalues into merging segments when a field has docvalues enabled
|
||||
* Using UninvertingReader.
|
||||
*
|
||||
* This merge policy will delegate to the wrapped merge policy for selecting merge segments
|
||||
*
|
||||
*/
|
||||
public class UninvertDocValuesMergePolicyFactory extends WrapperMergePolicyFactory {
|
||||
|
||||
final private boolean skipIntegrityCheck;
|
||||
|
||||
/**
|
||||
* Whether or not the wrapped docValues producer should check consistency
|
||||
*/
|
||||
public boolean getSkipIntegrityCheck() {
|
||||
return skipIntegrityCheck;
|
||||
}
|
||||
|
||||
public UninvertDocValuesMergePolicyFactory(SolrResourceLoader resourceLoader, MergePolicyFactoryArgs args, IndexSchema schema) {
|
||||
super(resourceLoader, args, schema);
|
||||
final Boolean sic = (Boolean)args.remove("skipIntegrityCheck");
|
||||
if (sic != null) {
|
||||
this.skipIntegrityCheck = sic.booleanValue();
|
||||
} else {
|
||||
this.skipIntegrityCheck = false;
|
||||
}
|
||||
if (!args.keys().isEmpty()) {
|
||||
throw new IllegalArgumentException("Arguments were "+args+" but "+getClass().getSimpleName()+" takes no arguments.");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected MergePolicy getMergePolicyInstance(MergePolicy wrappedMP) {
|
||||
return new OneMergeWrappingMergePolicy(wrappedMP, (merge) -> new UninvertDocValuesOneMerge(merge.segments));
|
||||
}
|
||||
|
||||
private UninvertingReader.Type getUninversionType(FieldInfo fi) {
|
||||
SchemaField sf = schema.getFieldOrNull(fi.name);
|
||||
|
||||
if (null != sf &&
|
||||
sf.hasDocValues() &&
|
||||
fi.getDocValuesType() == DocValuesType.NONE &&
|
||||
fi.getIndexOptions() != IndexOptions.NONE) {
|
||||
return sf.getType().getUninversionType(sf);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private class UninvertDocValuesOneMerge extends MergePolicy.OneMerge {
|
||||
|
||||
public UninvertDocValuesOneMerge(List<SegmentCommitInfo> segments) {
|
||||
super(segments);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CodecReader wrapForMerge(CodecReader reader) throws IOException {
|
||||
// Wrap the reader with an uninverting reader if any of the fields have no docvalues but the
|
||||
// Schema says there should be
|
||||
|
||||
|
||||
Map<String,UninvertingReader.Type> uninversionMap = null;
|
||||
|
||||
for(FieldInfo fi: reader.getFieldInfos()) {
|
||||
final UninvertingReader.Type type = getUninversionType(fi);
|
||||
if (type != null) {
|
||||
if (uninversionMap == null) {
|
||||
uninversionMap = new HashMap<>();
|
||||
}
|
||||
uninversionMap.put(fi.name, type);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(uninversionMap == null) {
|
||||
return reader; // Default to normal reader if nothing to uninvert
|
||||
} else {
|
||||
return new UninvertingFilterCodecReader(reader, uninversionMap);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Delegates to an Uninverting for fields with docvalues
|
||||
*
|
||||
* This is going to blow up FieldCache, look into an alternative implementation that uninverts without
|
||||
* fieldcache
|
||||
*/
|
||||
private class UninvertingFilterCodecReader extends FilterCodecReader {
|
||||
|
||||
private final UninvertingReader uninvertingReader;
|
||||
private final DocValuesProducer docValuesProducer;
|
||||
|
||||
public UninvertingFilterCodecReader(CodecReader in, Map<String,UninvertingReader.Type> uninversionMap) {
|
||||
super(in);
|
||||
|
||||
this.uninvertingReader = new UninvertingReader(in, uninversionMap);
|
||||
this.docValuesProducer = new DocValuesProducer() {
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
|
||||
return uninvertingReader.getNumericDocValues(field.name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
|
||||
return uninvertingReader.getBinaryDocValues(field.name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedDocValues getSorted(FieldInfo field) throws IOException {
|
||||
return uninvertingReader.getSortedDocValues(field.name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
||||
return uninvertingReader.getSortedNumericDocValues(field.name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
|
||||
return uninvertingReader.getSortedSetDocValues(field.name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
if (!skipIntegrityCheck) {
|
||||
uninvertingReader.checkIntegrity();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doClose() throws IOException {
|
||||
docValuesProducer.close();
|
||||
uninvertingReader.close();
|
||||
super.doClose();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesProducer getDocValuesReader() {
|
||||
return docValuesProducer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfos getFieldInfos() {
|
||||
return uninvertingReader.getFieldInfos();
|
||||
}
|
||||
|
||||
@Override
|
||||
public CacheHelper getCoreCacheHelper() {
|
||||
return in.getCoreCacheHelper();
|
||||
}
|
||||
|
||||
@Override
|
||||
public CacheHelper getReaderCacheHelper() {
|
||||
return in.getReaderCacheHelper();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -62,6 +62,7 @@
|
|||
<field name="datedv" type="date" indexed="false" stored="false" docValues="true" default="1995-12-31T23:59:59.999Z"/>
|
||||
|
||||
<field name="stringdv" type="string" indexed="false" stored="false" docValues="true" default="solr" />
|
||||
<field name="string_add_dv_later" type="string" indexed="true" stored="true" docValues="false"/>
|
||||
<field name="booldv" type="boolean" indexed="false" stored="false" docValues="true" default="true" />
|
||||
|
||||
<field name="floatdvs" type="float" indexed="false" stored="false" docValues="true" default="1"/>
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
<?xml version="1.0" ?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<config>
|
||||
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
|
||||
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
|
||||
<schemaFactory class="ClassicIndexSchemaFactory"/>
|
||||
|
||||
<indexConfig>
|
||||
<useCompoundFile>${useCompoundFile:false}</useCompoundFile>
|
||||
<mergePolicyFactory class="org.apache.solr.index.UninvertDocValuesMergePolicyFactory">
|
||||
<str name="wrapped.prefix">inner</str>
|
||||
<str name="inner.class">org.apache.solr.index.DefaultMergePolicyFactory</str>
|
||||
<bool name="skipIntegrityCheck">${solr.tests.skipIntegrityCheck:false}</bool>
|
||||
</mergePolicyFactory>
|
||||
|
||||
<mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
|
||||
</indexConfig>
|
||||
|
||||
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
|
||||
|
||||
</config>
|
|
@ -0,0 +1,243 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.index;
|
||||
|
||||
import java.util.Random;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.apache.solr.util.TestHarness;
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
public class UninvertDocValuesMergePolicyTest extends SolrTestCaseJ4 {
|
||||
|
||||
private static String SOLR_TESTS_SKIP_INTEGRITY_CHECK = "solr.tests.skipIntegrityCheck";
|
||||
private static String ID_FIELD = "id";
|
||||
private static String TEST_FIELD = "string_add_dv_later";
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeTests() throws Exception {
|
||||
System.setProperty(SOLR_TESTS_SKIP_INTEGRITY_CHECK, (random().nextBoolean() ? "true" : "false"));
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterTests() {
|
||||
System.clearProperty(SOLR_TESTS_SKIP_INTEGRITY_CHECK);
|
||||
}
|
||||
|
||||
@After
|
||||
public void after() throws Exception {
|
||||
deleteCore();
|
||||
}
|
||||
|
||||
@Before
|
||||
public void before() throws Exception {
|
||||
initCore("solrconfig-uninvertdocvaluesmergepolicyfactory.xml", "schema-docValues.xml");
|
||||
}
|
||||
|
||||
public void testIndexAndAddDocValues() throws Exception {
|
||||
Random rand = random();
|
||||
|
||||
for(int i=0; i < 100; i++) {
|
||||
assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));
|
||||
|
||||
if(rand.nextBoolean()) {
|
||||
assertU(commit());
|
||||
}
|
||||
}
|
||||
|
||||
assertU(commit());
|
||||
|
||||
// Assert everything has been indexed and there are no docvalues
|
||||
withNewRawReader(h, topReader -> {
|
||||
assertEquals(100, topReader.numDocs());
|
||||
|
||||
final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
|
||||
|
||||
// The global field type should not have docValues yet
|
||||
assertEquals(DocValuesType.NONE, infos.fieldInfo(TEST_FIELD).getDocValuesType());
|
||||
});
|
||||
|
||||
|
||||
addDocValuesTo(h, TEST_FIELD);
|
||||
|
||||
|
||||
// Add some more documents with doc values turned on including updating some
|
||||
for(int i=90; i < 110; i++) {
|
||||
assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));
|
||||
|
||||
if(rand.nextBoolean()) {
|
||||
assertU(commit());
|
||||
}
|
||||
}
|
||||
|
||||
assertU(commit());
|
||||
|
||||
withNewRawReader(h, topReader -> {
|
||||
assertEquals(110, topReader.numDocs());
|
||||
|
||||
final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
|
||||
// The global field type should have docValues because a document with dvs was added
|
||||
assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
|
||||
});
|
||||
|
||||
int optimizeSegments = 1;
|
||||
assertU(optimize("maxSegments", String.valueOf(optimizeSegments)));
|
||||
|
||||
|
||||
// Assert all docs have the right docvalues
|
||||
withNewRawReader(h, topReader -> {
|
||||
// Assert merged into one segment
|
||||
assertEquals(110, topReader.numDocs());
|
||||
assertEquals(optimizeSegments, topReader.leaves().size());
|
||||
|
||||
|
||||
final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
|
||||
// The global field type should have docValues because a document with dvs was added
|
||||
assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
|
||||
|
||||
|
||||
// Check that all segments have the right docvalues type with the correct value
|
||||
// Also check that other fields (e.g. the id field) didn't mistakenly get docvalues added
|
||||
for (LeafReaderContext ctx : topReader.leaves()) {
|
||||
LeafReader r = ctx.reader();
|
||||
SortedDocValues docvalues = r.getSortedDocValues(TEST_FIELD);
|
||||
for(int i = 0; i < r.numDocs(); ++i) {
|
||||
Document doc = r.document(i);
|
||||
String v = doc.getField(TEST_FIELD).stringValue();
|
||||
String id = doc.getField(ID_FIELD).stringValue();
|
||||
assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(TEST_FIELD).getDocValuesType());
|
||||
assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(ID_FIELD).getDocValuesType());
|
||||
assertEquals(v, id);
|
||||
|
||||
docvalues.nextDoc();
|
||||
assertEquals(v, docvalues.binaryValue().utf8ToString());
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
// When an non-indexed field gets merged, it exhibit the old behavior
|
||||
// The field will be merged, docvalues headers updated, but no docvalues for this field
|
||||
public void testNonIndexedFieldDoesNonFail() throws Exception {
|
||||
// Remove Indexed from fieldType
|
||||
removeIndexFrom(h, TEST_FIELD);
|
||||
|
||||
assertU(adoc(ID_FIELD, String.valueOf(1), TEST_FIELD, String.valueOf(1)));
|
||||
assertU(commit());
|
||||
|
||||
addDocValuesTo(h, TEST_FIELD);
|
||||
|
||||
assertU(adoc(ID_FIELD, String.valueOf(2), TEST_FIELD, String.valueOf(2)));
|
||||
assertU(commit());
|
||||
|
||||
assertU(optimize("maxSegments", "1"));
|
||||
|
||||
withNewRawReader(h, topReader -> {
|
||||
// Assert merged into one segment
|
||||
assertEquals(2, topReader.numDocs());
|
||||
assertEquals(1, topReader.leaves().size());
|
||||
|
||||
|
||||
final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
|
||||
// The global field type should have docValues because a document with dvs was added
|
||||
assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
|
||||
|
||||
for (LeafReaderContext ctx : topReader.leaves()) {
|
||||
LeafReader r = ctx.reader();
|
||||
SortedDocValues docvalues = r.getSortedDocValues(TEST_FIELD);
|
||||
for(int i = 0; i < r.numDocs(); ++i) {
|
||||
Document doc = r.document(i);
|
||||
String v = doc.getField(TEST_FIELD).stringValue();
|
||||
String id = doc.getField(ID_FIELD).stringValue();
|
||||
assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(TEST_FIELD).getDocValuesType());
|
||||
assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(ID_FIELD).getDocValuesType());
|
||||
|
||||
|
||||
if(id.equals("2")) {
|
||||
assertTrue(docvalues.advanceExact(i));
|
||||
assertEquals(v, docvalues.binaryValue().utf8ToString());
|
||||
} else {
|
||||
assertFalse(docvalues.advanceExact(i));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
private static void addDocValuesTo(TestHarness h, String fieldName) {
|
||||
implUpdateSchemaField(h, fieldName, (p) -> (p | 0x00008000)); // FieldProperties.DOC_VALUES
|
||||
}
|
||||
|
||||
private static void removeIndexFrom(TestHarness h, String fieldName) {
|
||||
implUpdateSchemaField(h, fieldName, (p) -> (p ^ 0x00000001)); // FieldProperties.INDEXED
|
||||
}
|
||||
|
||||
private static void implUpdateSchemaField(TestHarness h, String fieldName, IntUnaryOperator propertiesModifier) {
|
||||
try (SolrCore core = h.getCoreInc()) {
|
||||
|
||||
// Add docvalues to the field type
|
||||
IndexSchema schema = core.getLatestSchema();
|
||||
SchemaField oldSchemaField = schema.getField(fieldName);
|
||||
SchemaField newSchemaField = new SchemaField(
|
||||
fieldName,
|
||||
oldSchemaField.getType(),
|
||||
propertiesModifier.applyAsInt(oldSchemaField.getProperties()),
|
||||
oldSchemaField.getDefaultValue());
|
||||
schema.getFields().put(fieldName, newSchemaField);
|
||||
}
|
||||
}
|
||||
|
||||
private interface DirectoryReaderConsumer {
|
||||
public void accept(DirectoryReader consumer) throws Exception;
|
||||
}
|
||||
|
||||
private static void withNewRawReader(TestHarness h, DirectoryReaderConsumer consumer) {
|
||||
try (SolrCore core = h.getCoreInc()) {
|
||||
final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
|
||||
final SolrIndexSearcher searcher = searcherRef.get();
|
||||
try {
|
||||
try {
|
||||
consumer.accept(searcher.getRawReader());
|
||||
} catch (Exception e) {
|
||||
fail(e.toString());
|
||||
}
|
||||
} finally {
|
||||
searcherRef.decref();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue