SOLR-4893: Extend FieldMutatingUpdateProcessor.ConfigurableFieldNameSelector to enable checking whether a field matches any schema field.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1491102 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Steven Rowe 2013-06-08 23:52:47 +00:00
parent b9dc8ef8f6
commit b0aef6e46d
6 changed files with 210 additions and 75 deletions

View File

@ -88,6 +88,12 @@ New Features
* SOLR-4228: SolrJ's SolrPing object has new methods for ping, enable, and
disable. (Shawn Heisey, hossman, Steve Rowe)
* SOLR-4893: Extend FieldMutatingUpdateProcessor.ConfigurableFieldNameSelector
to enable checking whether a field matches any schema field. To select field
names that don't match any fields or dynamic fields in the schema, add
<bool name="fieldNameMatchesSchemaField">false</bool> to an update
processor's configuration in solrconfig.xml. (Steve Rowe, hossman)
Bug Fixes
----------------------

View File

@ -195,25 +195,13 @@ public class CloneFieldUpdateProcessorFactory
srcSelector =
FieldMutatingUpdateProcessor.createFieldNameSelector
(core.getResourceLoader(),
core,
srcInclusions.fieldName,
srcInclusions.typeName,
srcInclusions.typeClass,
srcInclusions.fieldRegex,
FieldMutatingUpdateProcessor.SELECT_NO_FIELDS);
(core.getResourceLoader(), core, srcInclusions, FieldMutatingUpdateProcessor.SELECT_NO_FIELDS);
for (SelectorParams exc : srcExclusions) {
srcSelector = FieldMutatingUpdateProcessor.wrap
(srcSelector,
FieldMutatingUpdateProcessor.createFieldNameSelector
(core.getResourceLoader(),
core,
exc.fieldName,
exc.typeName,
exc.typeClass,
exc.fieldRegex,
FieldMutatingUpdateProcessor.SELECT_NO_FIELDS));
(core.getResourceLoader(), core, exc, FieldMutatingUpdateProcessor.SELECT_NO_FIELDS));
}
}

View File

@ -20,11 +20,11 @@ package org.apache.solr.update.processor;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Set;
import java.util.regex.Pattern;
import static org.apache.solr.common.SolrException.ErrorCode.BAD_REQUEST;
import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
import static org.apache.solr.update.processor.FieldMutatingUpdateProcessorFactory.SelectorParams;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
@ -192,79 +192,75 @@ public abstract class FieldMutatingUpdateProcessor
public static FieldNameSelector createFieldNameSelector
(final SolrResourceLoader loader,
final SolrCore core,
final Set<String> fields,
final Set<String> typeNames,
final Collection<String> typeClasses,
final Collection<Pattern> regexes,
final SelectorParams params,
final FieldNameSelector defSelector) {
final Collection<Class> classes
= new ArrayList<Class>(typeClasses.size());
for (String t : typeClasses) {
try {
classes.add(loader.findClass(t, Object.class));
} catch (Exception e) {
throw new SolrException(SERVER_ERROR,
"Can't resolve typeClass: " + t, e);
}
}
if (classes.isEmpty() &&
typeNames.isEmpty() &&
regexes.isEmpty() &&
fields.isEmpty()) {
if (params.noSelectorsSpecified()) {
return defSelector;
}
return new ConfigurableFieldNameSelector(core, fields, typeNames, classes, regexes);
return new ConfigurableFieldNameSelector(loader, core, params);
}
private static final class ConfigurableFieldNameSelector
implements FieldNameSelector {
final SolrCore core;
final Set<String> fields;
final Set<String> typeNames;
final SelectorParams params;
final Collection<Class> classes;
final Collection<Pattern> regexes;
private ConfigurableFieldNameSelector(final SolrCore core,
final Set<String> fields,
final Set<String> typeNames,
final Collection<Class> classes,
final Collection<Pattern> regexes) {
private ConfigurableFieldNameSelector(final SolrResourceLoader loader,
final SolrCore core,
final SelectorParams params) {
this.core = core;
this.fields = fields;
this.typeNames = typeNames;
this.params = params;
final Collection<Class> classes = new ArrayList<Class>(params.typeClass.size());
for (String t : params.typeClass) {
try {
classes.add(loader.findClass(t, Object.class));
} catch (Exception e) {
throw new SolrException(SERVER_ERROR, "Can't resolve typeClass: " + t, e);
}
}
this.classes = classes;
this.regexes = regexes;
}
@Override
public boolean shouldMutate(final String fieldName) {
// order of checks is bsaed on what should be quicker
// order of checks is based on what should be quicker
// (ie: set lookups faster the looping over instanceOf / matches tests
if ( ! (fields.isEmpty() || fields.contains(fieldName)) ) {
if ( ! (params.fieldName.isEmpty() || params.fieldName.contains(fieldName)) ) {
return false;
}
// do not consider it an error if the fieldName has no type
// there might be another processor dealing with it later
FieldType t = core.getLatestSchema().getFieldTypeNoEx(fieldName);
if (null != t) {
if (! (typeNames.isEmpty() || typeNames.contains(t.getTypeName())) ) {
final boolean fieldExists = (null != t);
if ( (null != params.fieldNameMatchesSchemaField) &&
(fieldExists != params.fieldNameMatchesSchemaField) ) {
return false;
}
if (fieldExists) {
if (! (params.typeName.isEmpty() || params.typeName.contains(t.getTypeName())) ) {
return false;
}
if (! (classes.isEmpty() || instanceOfAny(t, classes)) ) {
return false;
}
}
}
}
if (! (regexes.isEmpty() || matchesAny(fieldName, regexes)) ) {
if (! (params.fieldRegex.isEmpty() || matchesAny(fieldName, params.fieldRegex)) ) {
return false;
}

View File

@ -106,6 +106,13 @@ public abstract class FieldMutatingUpdateProcessorFactory
public Set<String> typeName = Collections.emptySet();
public Collection<String> typeClass = Collections.emptyList();
public Collection<Pattern> fieldRegex = Collections.emptyList();
public Boolean fieldNameMatchesSchemaField = null; // null => not specified
public boolean noSelectorsSpecified() {
return typeClass.isEmpty() && typeName.isEmpty()
&& fieldRegex.isEmpty() && fieldName.isEmpty()
&& null == fieldNameMatchesSchemaField;
}
}
private SelectorParams inclusions = new SelectorParams();
@ -121,7 +128,6 @@ public abstract class FieldMutatingUpdateProcessorFactory
" inform(SolrCore) never called???");
}
@SuppressWarnings("unchecked")
public static SelectorParams parseSelectorParams(NamedList args) {
SelectorParams params = new SelectorParams();
@ -145,13 +151,16 @@ public abstract class FieldMutatingUpdateProcessorFactory
// resolve this into actual Class objects later
params.typeClass = oneOrMany(args, "typeClass");
// getBooleanArg() returns null if the arg is not specified
params.fieldNameMatchesSchemaField = getBooleanArg(args, "fieldNameMatchesSchemaField");
return params;
}
/**
* Handles common initialization related to source fields for
* constructoring the FieldNameSelector to be used.
* constructing the FieldNameSelector to be used.
*
* Will error if any unexpected init args are found, so subclasses should
* remove any subclass-specific init args before calling this method.
@ -195,25 +204,13 @@ public abstract class FieldMutatingUpdateProcessorFactory
selector =
FieldMutatingUpdateProcessor.createFieldNameSelector
(core.getResourceLoader(),
core,
inclusions.fieldName,
inclusions.typeName,
inclusions.typeClass,
inclusions.fieldRegex,
getDefaultSelector(core));
(core.getResourceLoader(), core, inclusions, getDefaultSelector(core));
for (SelectorParams exc : exclusions) {
selector = FieldMutatingUpdateProcessor.wrap
(selector,
FieldMutatingUpdateProcessor.createFieldNameSelector
(core.getResourceLoader(),
core,
exc.fieldName,
exc.typeName,
exc.typeClass,
exc.fieldRegex,
FieldMutatingUpdateProcessor.SELECT_NO_FIELDS));
(core.getResourceLoader(), core, exc, FieldMutatingUpdateProcessor.SELECT_NO_FIELDS));
}
}
@ -270,7 +267,28 @@ public abstract class FieldMutatingUpdateProcessorFactory
return result;
}
/**
* Removes the first instance of the key from NamedList, returning the Boolean
* that key referred to, or null if the key is not specified.
* @exception SolrException invalid type or structure
*/
public static Boolean getBooleanArg(final NamedList args, final String key) {
Boolean bool;
List values = args.getAll(key);
if (0 == values.size()) {
return null;
}
if (values.size() > 1) {
throw new SolrException(SERVER_ERROR, "Only one '" + key + "' is allowed");
}
Object o = args.remove(key);
if (o instanceof Boolean) {
bool = (Boolean)o;
} else if (o instanceof CharSequence) {
bool = Boolean.parseBoolean(o.toString());
} else {
throw new SolrException(SERVER_ERROR, "'" + key + "' must have type 'bool' or 'str'; found " + o.getClass());
}
return bool;
}
}

View File

@ -241,6 +241,46 @@
<processor class="solr.IgnoreFieldUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="ignore-not-in-schema-explicit-selector">
<processor class="solr.IgnoreFieldUpdateProcessorFactory">
<bool name="fieldNameMatchesSchemaField">false</bool>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="ignore-in-schema">
<processor class="solr.IgnoreFieldUpdateProcessorFactory">
<bool name="fieldNameMatchesSchemaField">true</bool>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="ignore-not-in-schema-and-foo-name-prefix">
<processor class="solr.IgnoreFieldUpdateProcessorFactory">
<str name="fieldRegex">foo.*</str>
<bool name="fieldNameMatchesSchemaField">false</bool>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="ignore-foo-name-prefix-except-not-schema">
<processor class="solr.IgnoreFieldUpdateProcessorFactory">
<str name="fieldRegex">foo.*</str>
<lst name="exclude">
<bool name="fieldNameMatchesSchemaField">false</bool>
</lst>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="ignore-not-in-schema-explicit-str-selector">
<processor class="solr.IgnoreFieldUpdateProcessorFactory">
<str name="fieldNameMatchesSchemaField">false</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="ignore-in-schema-str-selector">
<processor class="solr.IgnoreFieldUpdateProcessorFactory">
<str name="fieldNameMatchesSchemaField">true</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="ignore-some">
<processor class="solr.IgnoreFieldUpdateProcessorFactory">
<str name="fieldRegex">.*_raw</str>

View File

@ -552,6 +552,8 @@ public class FieldMutatingUpdateProcessorTest extends UpdateProcessorTestBase {
IndexSchema schema = h.getCore().getLatestSchema();
assertNull("test expects 'foo_giberish' to not be a valid field, looks like schema was changed out from under us",
schema.getFieldTypeNoEx("foo_giberish"));
assertNull("test expects 'bar_giberish' to not be a valid field, looks like schema was changed out from under us",
schema.getFieldTypeNoEx("bar_giberish"));
assertNotNull("test expects 't_raw' to be a valid field, looks like schema was changed out from under us",
schema.getFieldTypeNoEx("t_raw"));
assertNotNull("test expects 'foo_s' to be a valid field, looks like schema was changed out from under us",
@ -561,11 +563,13 @@ public class FieldMutatingUpdateProcessorTest extends UpdateProcessorTestBase {
d = processAdd("ignore-not-in-schema",
doc(f("id", "1111"),
f("bar_giberish", "123456789", "", 42, "abcd"),
f("foo_giberish", "123456789", "", 42, "abcd"),
f("t_raw", "123456789", "", 42, "abcd"),
f("foo_s", "hoss")));
assertNotNull(d);
assertFalse(d.containsKey("bar_giberish"));
assertFalse(d.containsKey("foo_giberish"));
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
d.getFieldValues("t_raw"));
@ -574,15 +578,98 @@ public class FieldMutatingUpdateProcessorTest extends UpdateProcessorTestBase {
d = processAdd("ignore-some",
doc(f("id", "1111"),
f("foo_giberish", "123456789", "", 42, "abcd"),
f("bar_giberish", "123456789", "", 42, "abcd"),
f("t_raw", "123456789", "", 42, "abcd"),
f("foo_s", "hoss")));
assertNotNull(d);
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
d.getFieldValues("foo_giberish"));
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
d.getFieldValues("bar_giberish"));
assertFalse(d.containsKey("t_raw"));
assertEquals("hoss", d.getFieldValue("foo_s"));
d = processAdd("ignore-not-in-schema-explicit-selector",
doc(f("id", "1111"),
f("foo_giberish", "123456789", "", 42, "abcd"),
f("bar_giberish", "123456789", "", 42, "abcd"),
f("t_raw", "123456789", "", 42, "abcd"),
f("foo_s", "hoss")));
assertNotNull(d);
assertFalse(d.containsKey("foo_giberish"));
assertFalse(d.containsKey("bar_giberish"));
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
d.getFieldValues("t_raw"));
assertEquals("hoss", d.getFieldValue("foo_s"));
d = processAdd("ignore-not-in-schema-and-foo-name-prefix",
doc(f("id", "1111"),
f("foo_giberish", "123456789", "", 42, "abcd"),
f("bar_giberish", "123456789", "", 42, "abcd"),
f("t_raw", "123456789", "", 42, "abcd"),
f("foo_s", "hoss")));
assertNotNull(d);
assertFalse(d.containsKey("foo_giberish"));
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
d.getFieldValues("bar_giberish"));
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
d.getFieldValues("t_raw"));
assertEquals("hoss", d.getFieldValue("foo_s"));
d = processAdd("ignore-foo-name-prefix-except-not-schema",
doc(f("id", "1111"),
f("foo_giberish", "123456789", "", 42, "abcd"),
f("bar_giberish", "123456789", "", 42, "abcd"),
f("t_raw", "123456789", "", 42, "abcd"),
f("foo_s", "hoss")));
assertNotNull(d);
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
d.getFieldValues("foo_giberish"));
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
d.getFieldValues("bar_giberish"));
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
d.getFieldValues("t_raw"));
assertFalse(d.containsKey("foo_s"));
d = processAdd("ignore-in-schema",
doc(f("id", "1111"),
f("foo_giberish", "123456789", "", 42, "abcd"),
f("bar_giberish", "123456789", "", 42, "abcd"),
f("t_raw", "123456789", "", 42, "abcd"),
f("foo_s", "hoss")));
assertNotNull(d);
assertTrue(d.containsKey("foo_giberish"));
assertTrue(d.containsKey("bar_giberish"));
assertFalse(d.containsKey("id"));
assertFalse(d.containsKey("t_raw"));
assertFalse(d.containsKey("foo_s"));
d = processAdd("ignore-not-in-schema-explicit-str-selector",
doc(f("id", "1111"),
f("foo_giberish", "123456789", "", 42, "abcd"),
f("bar_giberish", "123456789", "", 42, "abcd"),
f("t_raw", "123456789", "", 42, "abcd"),
f("foo_s", "hoss")));
assertNotNull(d);
assertFalse(d.containsKey("foo_giberish"));
assertFalse(d.containsKey("bar_giberish"));
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
d.getFieldValues("t_raw"));
assertEquals("hoss", d.getFieldValue("foo_s"));
d = processAdd("ignore-in-schema-str-selector",
doc(f("id", "1111"),
f("foo_giberish", "123456789", "", 42, "abcd"),
f("bar_giberish", "123456789", "", 42, "abcd"),
f("t_raw", "123456789", "", 42, "abcd"),
f("foo_s", "hoss")));
assertNotNull(d);
assertTrue(d.containsKey("foo_giberish"));
assertTrue(d.containsKey("bar_giberish"));
assertFalse(d.containsKey("id"));
assertFalse(d.containsKey("t_raw"));
assertFalse(d.containsKey("foo_s"));
}