SOLR-10046: Add UninvertDocValuesMergePolicyFactory class. (Keith Laban, Christine Poerschke)

This commit is contained in:
Christine Poerschke 2017-03-15 10:31:10 +00:00
parent 65c695b025
commit 9d56f13650
5 changed files with 502 additions and 0 deletions

View File

@ -185,6 +185,8 @@ New Features
* SOLR-10224: Add disk total and disk free metrics. (ab) * SOLR-10224: Add disk total and disk free metrics. (ab)
* SOLR-10046: Add UninvertDocValuesMergePolicyFactory class. (Keith Laban, Christine Poerschke)
Bug Fixes Bug Fixes
---------------------- ----------------------

View File

@ -0,0 +1,218 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.index;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FilterCodecReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OneMergeWrappingMergePolicy;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.uninverting.UninvertingReader;
/**
* A merge policy that can detect schema changes and write docvalues into merging segments when a field has docvalues enabled
* Using UninvertingReader.
*
* This merge policy will delegate to the wrapped merge policy for selecting merge segments
*
*/
public class UninvertDocValuesMergePolicyFactory extends WrapperMergePolicyFactory {
final private boolean skipIntegrityCheck;
/**
* Whether or not the wrapped docValues producer should check consistency
*/
public boolean getSkipIntegrityCheck() {
return skipIntegrityCheck;
}
public UninvertDocValuesMergePolicyFactory(SolrResourceLoader resourceLoader, MergePolicyFactoryArgs args, IndexSchema schema) {
super(resourceLoader, args, schema);
final Boolean sic = (Boolean)args.remove("skipIntegrityCheck");
if (sic != null) {
this.skipIntegrityCheck = sic.booleanValue();
} else {
this.skipIntegrityCheck = false;
}
if (!args.keys().isEmpty()) {
throw new IllegalArgumentException("Arguments were "+args+" but "+getClass().getSimpleName()+" takes no arguments.");
}
}
@Override
protected MergePolicy getMergePolicyInstance(MergePolicy wrappedMP) {
return new OneMergeWrappingMergePolicy(wrappedMP, (merge) -> new UninvertDocValuesOneMerge(merge.segments));
}
private UninvertingReader.Type getUninversionType(FieldInfo fi) {
SchemaField sf = schema.getFieldOrNull(fi.name);
if (null != sf &&
sf.hasDocValues() &&
fi.getDocValuesType() == DocValuesType.NONE &&
fi.getIndexOptions() != IndexOptions.NONE) {
return sf.getType().getUninversionType(sf);
} else {
return null;
}
}
private class UninvertDocValuesOneMerge extends MergePolicy.OneMerge {
public UninvertDocValuesOneMerge(List<SegmentCommitInfo> segments) {
super(segments);
}
@Override
public CodecReader wrapForMerge(CodecReader reader) throws IOException {
// Wrap the reader with an uninverting reader if any of the fields have no docvalues but the
// Schema says there should be
Map<String,UninvertingReader.Type> uninversionMap = null;
for(FieldInfo fi: reader.getFieldInfos()) {
final UninvertingReader.Type type = getUninversionType(fi);
if (type != null) {
if (uninversionMap == null) {
uninversionMap = new HashMap<>();
}
uninversionMap.put(fi.name, type);
}
}
if(uninversionMap == null) {
return reader; // Default to normal reader if nothing to uninvert
} else {
return new UninvertingFilterCodecReader(reader, uninversionMap);
}
}
}
/**
* Delegates to an Uninverting for fields with docvalues
*
* This is going to blow up FieldCache, look into an alternative implementation that uninverts without
* fieldcache
*/
private class UninvertingFilterCodecReader extends FilterCodecReader {
private final UninvertingReader uninvertingReader;
private final DocValuesProducer docValuesProducer;
public UninvertingFilterCodecReader(CodecReader in, Map<String,UninvertingReader.Type> uninversionMap) {
super(in);
this.uninvertingReader = new UninvertingReader(in, uninversionMap);
this.docValuesProducer = new DocValuesProducer() {
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
return uninvertingReader.getNumericDocValues(field.name);
}
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
return uninvertingReader.getBinaryDocValues(field.name);
}
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
return uninvertingReader.getSortedDocValues(field.name);
}
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
return uninvertingReader.getSortedNumericDocValues(field.name);
}
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
return uninvertingReader.getSortedSetDocValues(field.name);
}
@Override
public void checkIntegrity() throws IOException {
if (!skipIntegrityCheck) {
uninvertingReader.checkIntegrity();
}
}
@Override
public void close() throws IOException {
}
@Override
public long ramBytesUsed() {
return 0;
}
};
}
@Override
protected void doClose() throws IOException {
docValuesProducer.close();
uninvertingReader.close();
super.doClose();
}
@Override
public DocValuesProducer getDocValuesReader() {
return docValuesProducer;
}
@Override
public FieldInfos getFieldInfos() {
return uninvertingReader.getFieldInfos();
}
@Override
public CacheHelper getCoreCacheHelper() {
return in.getCoreCacheHelper();
}
@Override
public CacheHelper getReaderCacheHelper() {
return in.getReaderCacheHelper();
}
}
}

View File

@ -62,6 +62,7 @@
<field name="datedv" type="date" indexed="false" stored="false" docValues="true" default="1995-12-31T23:59:59.999Z"/> <field name="datedv" type="date" indexed="false" stored="false" docValues="true" default="1995-12-31T23:59:59.999Z"/>
<field name="stringdv" type="string" indexed="false" stored="false" docValues="true" default="solr" /> <field name="stringdv" type="string" indexed="false" stored="false" docValues="true" default="solr" />
<field name="string_add_dv_later" type="string" indexed="true" stored="true" docValues="false"/>
<field name="booldv" type="boolean" indexed="false" stored="false" docValues="true" default="true" /> <field name="booldv" type="boolean" indexed="false" stored="false" docValues="true" default="true" />
<field name="floatdvs" type="float" indexed="false" stored="false" docValues="true" default="1"/> <field name="floatdvs" type="float" indexed="false" stored="false" docValues="true" default="1"/>

View File

@ -0,0 +1,38 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<config>
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
<schemaFactory class="ClassicIndexSchemaFactory"/>
<indexConfig>
<useCompoundFile>${useCompoundFile:false}</useCompoundFile>
<mergePolicyFactory class="org.apache.solr.index.UninvertDocValuesMergePolicyFactory">
<str name="wrapped.prefix">inner</str>
<str name="inner.class">org.apache.solr.index.DefaultMergePolicyFactory</str>
<bool name="skipIntegrityCheck">${solr.tests.skipIntegrityCheck:false}</bool>
</mergePolicyFactory>
<mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
</indexConfig>
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
</config>

View File

@ -0,0 +1,243 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.index;
import java.util.Random;
import java.util.function.IntUnaryOperator;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.SortedDocValues;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.TestHarness;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
public class UninvertDocValuesMergePolicyTest extends SolrTestCaseJ4 {
private static String SOLR_TESTS_SKIP_INTEGRITY_CHECK = "solr.tests.skipIntegrityCheck";
private static String ID_FIELD = "id";
private static String TEST_FIELD = "string_add_dv_later";
@BeforeClass
public static void beforeTests() throws Exception {
System.setProperty(SOLR_TESTS_SKIP_INTEGRITY_CHECK, (random().nextBoolean() ? "true" : "false"));
}
@AfterClass
public static void afterTests() {
System.clearProperty(SOLR_TESTS_SKIP_INTEGRITY_CHECK);
}
@After
public void after() throws Exception {
deleteCore();
}
@Before
public void before() throws Exception {
initCore("solrconfig-uninvertdocvaluesmergepolicyfactory.xml", "schema-docValues.xml");
}
public void testIndexAndAddDocValues() throws Exception {
Random rand = random();
for(int i=0; i < 100; i++) {
assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));
if(rand.nextBoolean()) {
assertU(commit());
}
}
assertU(commit());
// Assert everything has been indexed and there are no docvalues
withNewRawReader(h, topReader -> {
assertEquals(100, topReader.numDocs());
final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
// The global field type should not have docValues yet
assertEquals(DocValuesType.NONE, infos.fieldInfo(TEST_FIELD).getDocValuesType());
});
addDocValuesTo(h, TEST_FIELD);
// Add some more documents with doc values turned on including updating some
for(int i=90; i < 110; i++) {
assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));
if(rand.nextBoolean()) {
assertU(commit());
}
}
assertU(commit());
withNewRawReader(h, topReader -> {
assertEquals(110, topReader.numDocs());
final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
// The global field type should have docValues because a document with dvs was added
assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
});
int optimizeSegments = 1;
assertU(optimize("maxSegments", String.valueOf(optimizeSegments)));
// Assert all docs have the right docvalues
withNewRawReader(h, topReader -> {
// Assert merged into one segment
assertEquals(110, topReader.numDocs());
assertEquals(optimizeSegments, topReader.leaves().size());
final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
// The global field type should have docValues because a document with dvs was added
assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
// Check that all segments have the right docvalues type with the correct value
// Also check that other fields (e.g. the id field) didn't mistakenly get docvalues added
for (LeafReaderContext ctx : topReader.leaves()) {
LeafReader r = ctx.reader();
SortedDocValues docvalues = r.getSortedDocValues(TEST_FIELD);
for(int i = 0; i < r.numDocs(); ++i) {
Document doc = r.document(i);
String v = doc.getField(TEST_FIELD).stringValue();
String id = doc.getField(ID_FIELD).stringValue();
assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(TEST_FIELD).getDocValuesType());
assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(ID_FIELD).getDocValuesType());
assertEquals(v, id);
docvalues.nextDoc();
assertEquals(v, docvalues.binaryValue().utf8ToString());
}
}
});
}
// When an non-indexed field gets merged, it exhibit the old behavior
// The field will be merged, docvalues headers updated, but no docvalues for this field
public void testNonIndexedFieldDoesNonFail() throws Exception {
// Remove Indexed from fieldType
removeIndexFrom(h, TEST_FIELD);
assertU(adoc(ID_FIELD, String.valueOf(1), TEST_FIELD, String.valueOf(1)));
assertU(commit());
addDocValuesTo(h, TEST_FIELD);
assertU(adoc(ID_FIELD, String.valueOf(2), TEST_FIELD, String.valueOf(2)));
assertU(commit());
assertU(optimize("maxSegments", "1"));
withNewRawReader(h, topReader -> {
// Assert merged into one segment
assertEquals(2, topReader.numDocs());
assertEquals(1, topReader.leaves().size());
final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
// The global field type should have docValues because a document with dvs was added
assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
for (LeafReaderContext ctx : topReader.leaves()) {
LeafReader r = ctx.reader();
SortedDocValues docvalues = r.getSortedDocValues(TEST_FIELD);
for(int i = 0; i < r.numDocs(); ++i) {
Document doc = r.document(i);
String v = doc.getField(TEST_FIELD).stringValue();
String id = doc.getField(ID_FIELD).stringValue();
assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(TEST_FIELD).getDocValuesType());
assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(ID_FIELD).getDocValuesType());
if(id.equals("2")) {
assertTrue(docvalues.advanceExact(i));
assertEquals(v, docvalues.binaryValue().utf8ToString());
} else {
assertFalse(docvalues.advanceExact(i));
}
}
}
});
}
private static void addDocValuesTo(TestHarness h, String fieldName) {
implUpdateSchemaField(h, fieldName, (p) -> (p | 0x00008000)); // FieldProperties.DOC_VALUES
}
private static void removeIndexFrom(TestHarness h, String fieldName) {
implUpdateSchemaField(h, fieldName, (p) -> (p ^ 0x00000001)); // FieldProperties.INDEXED
}
private static void implUpdateSchemaField(TestHarness h, String fieldName, IntUnaryOperator propertiesModifier) {
try (SolrCore core = h.getCoreInc()) {
// Add docvalues to the field type
IndexSchema schema = core.getLatestSchema();
SchemaField oldSchemaField = schema.getField(fieldName);
SchemaField newSchemaField = new SchemaField(
fieldName,
oldSchemaField.getType(),
propertiesModifier.applyAsInt(oldSchemaField.getProperties()),
oldSchemaField.getDefaultValue());
schema.getFields().put(fieldName, newSchemaField);
}
}
private interface DirectoryReaderConsumer {
public void accept(DirectoryReader consumer) throws Exception;
}
private static void withNewRawReader(TestHarness h, DirectoryReaderConsumer consumer) {
try (SolrCore core = h.getCoreInc()) {
final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
final SolrIndexSearcher searcher = searcherRef.get();
try {
try {
consumer.accept(searcher.getRawReader());
} catch (Exception e) {
fail(e.toString());
}
} finally {
searcherRef.decref();
}
}
}
}