SOLR-7091: Nested documents with unknown fields don't work in schemaless mode.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1685660 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Steven Rowe 2015-06-15 20:14:09 +00:00
parent 9741c16721
commit 5523048058
6 changed files with 146 additions and 17 deletions

View File

@ -148,6 +148,9 @@ Bug Fixes
* SOLR-7635: Limit lsof port check in bin/solr to just listening ports
(Upayavira, Ramkumar Aiyengar)
* SOLR-7091: Nested documents with unknown fields don't work in schemaless mode.
(Steve Rowe)
Optimizations
----------------------

View File

@ -39,8 +39,10 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static org.apache.solr.common.SolrException.ErrorCode.BAD_REQUEST;
@ -277,11 +279,12 @@ public class AddSchemaFieldsUpdateProcessorFactory extends UpdateRequestProcesso
// build a selector each time through the loop b/c the schema we are
// processing may have changed
FieldNameSelector selector = buildSelector(oldSchema);
for (final String fieldName : doc.getFieldNames()) {
if (selector.shouldMutate(fieldName)) { // returns false if the field already exists in the current schema
String fieldTypeName = mapValueClassesToFieldType(doc.getField(fieldName));
newFields.add(oldSchema.newField(fieldName, fieldTypeName, Collections.<String,Object>emptyMap()));
}
Map<String,List<SolrInputField>> unknownFields = new HashMap<>();
getUnknownFields(selector, doc, unknownFields);
for (final Map.Entry<String,List<SolrInputField>> entry : unknownFields.entrySet()) {
String fieldName = entry.getKey();
String fieldTypeName = mapValueClassesToFieldType(entry.getValue());
newFields.add(oldSchema.newField(fieldName, fieldTypeName, Collections.<String,Object>emptyMap()));
}
if (newFields.isEmpty()) {
// nothing to do - no fields will be added - exit from the retry loop
@ -328,21 +331,52 @@ public class AddSchemaFieldsUpdateProcessorFactory extends UpdateRequestProcesso
super.processAdd(cmd);
}
private String mapValueClassesToFieldType(SolrInputField field) {
NEXT_TYPE_MAPPING: for (TypeMapping typeMapping : typeMappings) {
NEXT_FIELD_VALUE: for (Object fieldValue : field.getValues()) {
for (Class<?> valueClass : typeMapping.valueClasses) {
if (valueClass.isInstance(fieldValue)) {
continue NEXT_FIELD_VALUE;
}
/**
* Recursively find unknown fields in the given doc and its child documents, if any.
*/
private void getUnknownFields
(FieldNameSelector selector, SolrInputDocument doc, Map<String,List<SolrInputField>> unknownFields) {
for (final String fieldName : doc.getFieldNames()) {
if (selector.shouldMutate(fieldName)) { // returns false if the field already exists in the current schema
List<SolrInputField> solrInputFields = unknownFields.get(fieldName);
if (null == solrInputFields) {
solrInputFields = new ArrayList<>();
unknownFields.put(fieldName, solrInputFields);
}
// This fieldValue is not an instance of any of this fieldType's valueClass-s
continue NEXT_TYPE_MAPPING;
solrInputFields.add(doc.getField(fieldName));
}
// Success! Each of this field's values is an instance of one of this fieldType's valueClass-s
}
List<SolrInputDocument> childDocs = doc.getChildDocuments();
if (null != childDocs) {
for (SolrInputDocument childDoc : childDocs) {
getUnknownFields(selector, childDoc, unknownFields);
}
}
}
/**
* Maps all given field values' classes to a field type using the configured type mapping rules.
*
* @param fields one or more (same-named) field values from one or more documents
*/
private String mapValueClassesToFieldType(List<SolrInputField> fields) {
NEXT_TYPE_MAPPING: for (TypeMapping typeMapping : typeMappings) {
for (SolrInputField field : fields) {
NEXT_FIELD_VALUE: for (Object fieldValue : field.getValues()) {
for (Class<?> valueClass : typeMapping.valueClasses) {
if (valueClass.isInstance(fieldValue)) {
continue NEXT_FIELD_VALUE;
}
}
// This fieldValue is not an instance of any of the mapped valueClass-s,
// so mapping fails - go try the next type mapping.
continue NEXT_TYPE_MAPPING;
}
}
// Success! Each of this field's values is an instance of a mapped valueClass
return typeMapping.fieldTypeName;
}
// At least one of this field's values is not an instance of any configured fieldType's valueClass-s
// At least one of this field's values is not an instance of any of the mapped valueClass-s
return defaultFieldType;
}

View File

@ -36,6 +36,7 @@
<fields>
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
<field name="_version_" type="long" indexed="true" stored="true"/>
<field name="_root_" type="string" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
<dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>

View File

@ -46,6 +46,16 @@
</lst>
</requestHandler>
<query>
<!-- custom cache currently used by block join -->
<cache name="perSegFilter"
class="solr.search.LRUCache"
size="10"
initialSize="0"
autowarmCount="10"
regenerator="solr.NoOpRegenerator" />
</query>
<!-- Add unknown fields to the schema
An example field type guessing update processor that will
@ -89,7 +99,7 @@
</processor>
<processor class="solr.AddSchemaFieldsUpdateProcessorFactory">
<str name="defaultFieldType">text_general</str>
<str name="defaultFieldType">text</str>
<lst name="typeMapping">
<str name="valueClass">java.lang.Boolean</str>
<str name="fieldType">boolean</str>

View File

@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.solr.update;
import java.io.File;
import org.apache.commons.io.FileUtils;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.Before;
import org.junit.Test;
public class DataDrivenBlockJoinTest extends SolrTestCaseJ4 {
private File tmpSolrHome;
private File tmpConfDir;
private static final String collection = "collection1";
private static final String confDir = collection + "/conf";
@Before
public void before() throws Exception {
tmpSolrHome = createTempDir().toFile();
tmpConfDir = new File(tmpSolrHome, confDir);
File testHomeConfDir = new File(TEST_HOME(), confDir);
FileUtils.copyFileToDirectory(new File(testHomeConfDir, "solrconfig-schemaless.xml"), tmpConfDir);
FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-add-schema-fields-update-processor.xml"), tmpConfDir);
FileUtils.copyFileToDirectory(new File(testHomeConfDir, "solrconfig.snippet.randomindexconfig.xml"), tmpConfDir);
System.setProperty("managed.schema.mutable", "true");
System.setProperty("enable.update.log", "false");
initCore("solrconfig-schemaless.xml", "schema-add-schema-fields-update-processor.xml", tmpSolrHome.getPath());
}
@Test
public void testAddNestedDocuments() throws Exception {
assertU("<add>"
+ " <doc>"
+ " <field name='id'>1</field>"
+ " <field name='parent'>X</field>"
+ " <field name='hierarchical_numbering'>8</field>"
+ " <doc>"
+ " <field name='id'>2</field>"
+ " <field name='child'>y</field>"
+ " <field name='hierarchical_numbering'>8.138</field>"
+ " <doc>"
+ " <field name='id'>3</field>"
+ " <field name='grandchild'>z</field>"
+ " <field name='hierarchical_numbering'>8.138.4498</field>"
+ " </doc>"
+ " </doc>"
+ " </doc>"
+ "</add>");
assertU("<commit/>");
}
}

View File

@ -503,6 +503,14 @@
initialSize="512"
autowarmCount="0"/>
<!-- custom cache currently used by block join -->
<cache name="perSegFilter"
class="solr.search.LRUCache"
size="10"
initialSize="0"
autowarmCount="10"
regenerator="solr.NoOpRegenerator" />
<!-- Field Value Cache
Cache used to hold field values that are quickly accessible