mirror of https://github.com/apache/lucene.git
SOLR-4893: Extend FieldMutatingUpdateProcessor.ConfigurableFieldNameSelector to enable checking whether a field matches any schema field.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1491102 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b9dc8ef8f6
commit
b0aef6e46d
|
@ -88,6 +88,12 @@ New Features
|
|||
|
||||
* SOLR-4228: SolrJ's SolrPing object has new methods for ping, enable, and
|
||||
disable. (Shawn Heisey, hossman, Steve Rowe)
|
||||
|
||||
* SOLR-4893: Extend FieldMutatingUpdateProcessor.ConfigurableFieldNameSelector
|
||||
to enable checking whether a field matches any schema field. To select field
|
||||
names that don't match any fields or dynamic fields in the schema, add
|
||||
<bool name="fieldNameMatchesSchemaField">false</bool> to an update
|
||||
processor's configuration in solrconfig.xml. (Steve Rowe, hossman)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
|
|
@ -195,25 +195,13 @@ public class CloneFieldUpdateProcessorFactory
|
|||
|
||||
srcSelector =
|
||||
FieldMutatingUpdateProcessor.createFieldNameSelector
|
||||
(core.getResourceLoader(),
|
||||
core,
|
||||
srcInclusions.fieldName,
|
||||
srcInclusions.typeName,
|
||||
srcInclusions.typeClass,
|
||||
srcInclusions.fieldRegex,
|
||||
FieldMutatingUpdateProcessor.SELECT_NO_FIELDS);
|
||||
(core.getResourceLoader(), core, srcInclusions, FieldMutatingUpdateProcessor.SELECT_NO_FIELDS);
|
||||
|
||||
for (SelectorParams exc : srcExclusions) {
|
||||
srcSelector = FieldMutatingUpdateProcessor.wrap
|
||||
(srcSelector,
|
||||
FieldMutatingUpdateProcessor.createFieldNameSelector
|
||||
(core.getResourceLoader(),
|
||||
core,
|
||||
exc.fieldName,
|
||||
exc.typeName,
|
||||
exc.typeClass,
|
||||
exc.fieldRegex,
|
||||
FieldMutatingUpdateProcessor.SELECT_NO_FIELDS));
|
||||
(core.getResourceLoader(), core, exc, FieldMutatingUpdateProcessor.SELECT_NO_FIELDS));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,11 +20,11 @@ package org.apache.solr.update.processor;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static org.apache.solr.common.SolrException.ErrorCode.BAD_REQUEST;
|
||||
import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
|
||||
import static org.apache.solr.update.processor.FieldMutatingUpdateProcessorFactory.SelectorParams;
|
||||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
|
@ -192,79 +192,75 @@ public abstract class FieldMutatingUpdateProcessor
|
|||
public static FieldNameSelector createFieldNameSelector
|
||||
(final SolrResourceLoader loader,
|
||||
final SolrCore core,
|
||||
final Set<String> fields,
|
||||
final Set<String> typeNames,
|
||||
final Collection<String> typeClasses,
|
||||
final Collection<Pattern> regexes,
|
||||
final SelectorParams params,
|
||||
final FieldNameSelector defSelector) {
|
||||
|
||||
final Collection<Class> classes
|
||||
= new ArrayList<Class>(typeClasses.size());
|
||||
|
||||
for (String t : typeClasses) {
|
||||
try {
|
||||
classes.add(loader.findClass(t, Object.class));
|
||||
} catch (Exception e) {
|
||||
throw new SolrException(SERVER_ERROR,
|
||||
"Can't resolve typeClass: " + t, e);
|
||||
}
|
||||
}
|
||||
|
||||
if (classes.isEmpty() &&
|
||||
typeNames.isEmpty() &&
|
||||
regexes.isEmpty() &&
|
||||
fields.isEmpty()) {
|
||||
|
||||
if (params.noSelectorsSpecified()) {
|
||||
return defSelector;
|
||||
}
|
||||
|
||||
return new ConfigurableFieldNameSelector(core, fields, typeNames, classes, regexes);
|
||||
return new ConfigurableFieldNameSelector(loader, core, params);
|
||||
}
|
||||
|
||||
|
||||
|
||||
private static final class ConfigurableFieldNameSelector
|
||||
implements FieldNameSelector {
|
||||
|
||||
final SolrCore core;
|
||||
final Set<String> fields;
|
||||
final Set<String> typeNames;
|
||||
final SelectorParams params;
|
||||
final Collection<Class> classes;
|
||||
final Collection<Pattern> regexes;
|
||||
|
||||
private ConfigurableFieldNameSelector(final SolrCore core,
|
||||
final Set<String> fields,
|
||||
final Set<String> typeNames,
|
||||
final Collection<Class> classes,
|
||||
final Collection<Pattern> regexes) {
|
||||
private ConfigurableFieldNameSelector(final SolrResourceLoader loader,
|
||||
final SolrCore core,
|
||||
final SelectorParams params) {
|
||||
this.core = core;
|
||||
this.fields = fields;
|
||||
this.typeNames = typeNames;
|
||||
this.params = params;
|
||||
|
||||
final Collection<Class> classes = new ArrayList<Class>(params.typeClass.size());
|
||||
|
||||
for (String t : params.typeClass) {
|
||||
try {
|
||||
classes.add(loader.findClass(t, Object.class));
|
||||
} catch (Exception e) {
|
||||
throw new SolrException(SERVER_ERROR, "Can't resolve typeClass: " + t, e);
|
||||
}
|
||||
}
|
||||
this.classes = classes;
|
||||
this.regexes = regexes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean shouldMutate(final String fieldName) {
|
||||
|
||||
// order of checks is bsaed on what should be quicker
|
||||
// order of checks is based on what should be quicker
|
||||
// (ie: set lookups faster the looping over instanceOf / matches tests
|
||||
|
||||
if ( ! (fields.isEmpty() || fields.contains(fieldName)) ) {
|
||||
if ( ! (params.fieldName.isEmpty() || params.fieldName.contains(fieldName)) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// do not consider it an error if the fieldName has no type
|
||||
// there might be another processor dealing with it later
|
||||
FieldType t = core.getLatestSchema().getFieldTypeNoEx(fieldName);
|
||||
if (null != t) {
|
||||
if (! (typeNames.isEmpty() || typeNames.contains(t.getTypeName())) ) {
|
||||
final boolean fieldExists = (null != t);
|
||||
|
||||
if ( (null != params.fieldNameMatchesSchemaField) &&
|
||||
(fieldExists != params.fieldNameMatchesSchemaField) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (fieldExists) {
|
||||
|
||||
if (! (params.typeName.isEmpty() || params.typeName.contains(t.getTypeName())) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (! (classes.isEmpty() || instanceOfAny(t, classes)) ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (! (regexes.isEmpty() || matchesAny(fieldName, regexes)) ) {
|
||||
if (! (params.fieldRegex.isEmpty() || matchesAny(fieldName, params.fieldRegex)) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -106,6 +106,13 @@ public abstract class FieldMutatingUpdateProcessorFactory
|
|||
public Set<String> typeName = Collections.emptySet();
|
||||
public Collection<String> typeClass = Collections.emptyList();
|
||||
public Collection<Pattern> fieldRegex = Collections.emptyList();
|
||||
public Boolean fieldNameMatchesSchemaField = null; // null => not specified
|
||||
|
||||
public boolean noSelectorsSpecified() {
|
||||
return typeClass.isEmpty() && typeName.isEmpty()
|
||||
&& fieldRegex.isEmpty() && fieldName.isEmpty()
|
||||
&& null == fieldNameMatchesSchemaField;
|
||||
}
|
||||
}
|
||||
|
||||
private SelectorParams inclusions = new SelectorParams();
|
||||
|
@ -121,7 +128,6 @@ public abstract class FieldMutatingUpdateProcessorFactory
|
|||
" inform(SolrCore) never called???");
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public static SelectorParams parseSelectorParams(NamedList args) {
|
||||
SelectorParams params = new SelectorParams();
|
||||
|
||||
|
@ -145,13 +151,16 @@ public abstract class FieldMutatingUpdateProcessorFactory
|
|||
// resolve this into actual Class objects later
|
||||
params.typeClass = oneOrMany(args, "typeClass");
|
||||
|
||||
// getBooleanArg() returns null if the arg is not specified
|
||||
params.fieldNameMatchesSchemaField = getBooleanArg(args, "fieldNameMatchesSchemaField");
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Handles common initialization related to source fields for
|
||||
* constructoring the FieldNameSelector to be used.
|
||||
* constructing the FieldNameSelector to be used.
|
||||
*
|
||||
* Will error if any unexpected init args are found, so subclasses should
|
||||
* remove any subclass-specific init args before calling this method.
|
||||
|
@ -195,25 +204,13 @@ public abstract class FieldMutatingUpdateProcessorFactory
|
|||
|
||||
selector =
|
||||
FieldMutatingUpdateProcessor.createFieldNameSelector
|
||||
(core.getResourceLoader(),
|
||||
core,
|
||||
inclusions.fieldName,
|
||||
inclusions.typeName,
|
||||
inclusions.typeClass,
|
||||
inclusions.fieldRegex,
|
||||
getDefaultSelector(core));
|
||||
(core.getResourceLoader(), core, inclusions, getDefaultSelector(core));
|
||||
|
||||
for (SelectorParams exc : exclusions) {
|
||||
selector = FieldMutatingUpdateProcessor.wrap
|
||||
(selector,
|
||||
FieldMutatingUpdateProcessor.createFieldNameSelector
|
||||
(core.getResourceLoader(),
|
||||
core,
|
||||
exc.fieldName,
|
||||
exc.typeName,
|
||||
exc.typeClass,
|
||||
exc.fieldRegex,
|
||||
FieldMutatingUpdateProcessor.SELECT_NO_FIELDS));
|
||||
(core.getResourceLoader(), core, exc, FieldMutatingUpdateProcessor.SELECT_NO_FIELDS));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -270,7 +267,28 @@ public abstract class FieldMutatingUpdateProcessorFactory
|
|||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the first instance of the key from NamedList, returning the Boolean
|
||||
* that key referred to, or null if the key is not specified.
|
||||
* @exception SolrException invalid type or structure
|
||||
*/
|
||||
public static Boolean getBooleanArg(final NamedList args, final String key) {
|
||||
Boolean bool;
|
||||
List values = args.getAll(key);
|
||||
if (0 == values.size()) {
|
||||
return null;
|
||||
}
|
||||
if (values.size() > 1) {
|
||||
throw new SolrException(SERVER_ERROR, "Only one '" + key + "' is allowed");
|
||||
}
|
||||
Object o = args.remove(key);
|
||||
if (o instanceof Boolean) {
|
||||
bool = (Boolean)o;
|
||||
} else if (o instanceof CharSequence) {
|
||||
bool = Boolean.parseBoolean(o.toString());
|
||||
} else {
|
||||
throw new SolrException(SERVER_ERROR, "'" + key + "' must have type 'bool' or 'str'; found " + o.getClass());
|
||||
}
|
||||
return bool;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -241,6 +241,46 @@
|
|||
<processor class="solr.IgnoreFieldUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="ignore-not-in-schema-explicit-selector">
|
||||
<processor class="solr.IgnoreFieldUpdateProcessorFactory">
|
||||
<bool name="fieldNameMatchesSchemaField">false</bool>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="ignore-in-schema">
|
||||
<processor class="solr.IgnoreFieldUpdateProcessorFactory">
|
||||
<bool name="fieldNameMatchesSchemaField">true</bool>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="ignore-not-in-schema-and-foo-name-prefix">
|
||||
<processor class="solr.IgnoreFieldUpdateProcessorFactory">
|
||||
<str name="fieldRegex">foo.*</str>
|
||||
<bool name="fieldNameMatchesSchemaField">false</bool>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="ignore-foo-name-prefix-except-not-schema">
|
||||
<processor class="solr.IgnoreFieldUpdateProcessorFactory">
|
||||
<str name="fieldRegex">foo.*</str>
|
||||
<lst name="exclude">
|
||||
<bool name="fieldNameMatchesSchemaField">false</bool>
|
||||
</lst>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="ignore-not-in-schema-explicit-str-selector">
|
||||
<processor class="solr.IgnoreFieldUpdateProcessorFactory">
|
||||
<str name="fieldNameMatchesSchemaField">false</str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="ignore-in-schema-str-selector">
|
||||
<processor class="solr.IgnoreFieldUpdateProcessorFactory">
|
||||
<str name="fieldNameMatchesSchemaField">true</str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="ignore-some">
|
||||
<processor class="solr.IgnoreFieldUpdateProcessorFactory">
|
||||
<str name="fieldRegex">.*_raw</str>
|
||||
|
|
|
@ -552,6 +552,8 @@ public class FieldMutatingUpdateProcessorTest extends UpdateProcessorTestBase {
|
|||
IndexSchema schema = h.getCore().getLatestSchema();
|
||||
assertNull("test expects 'foo_giberish' to not be a valid field, looks like schema was changed out from under us",
|
||||
schema.getFieldTypeNoEx("foo_giberish"));
|
||||
assertNull("test expects 'bar_giberish' to not be a valid field, looks like schema was changed out from under us",
|
||||
schema.getFieldTypeNoEx("bar_giberish"));
|
||||
assertNotNull("test expects 't_raw' to be a valid field, looks like schema was changed out from under us",
|
||||
schema.getFieldTypeNoEx("t_raw"));
|
||||
assertNotNull("test expects 'foo_s' to be a valid field, looks like schema was changed out from under us",
|
||||
|
@ -561,11 +563,13 @@ public class FieldMutatingUpdateProcessorTest extends UpdateProcessorTestBase {
|
|||
|
||||
d = processAdd("ignore-not-in-schema",
|
||||
doc(f("id", "1111"),
|
||||
f("bar_giberish", "123456789", "", 42, "abcd"),
|
||||
f("foo_giberish", "123456789", "", 42, "abcd"),
|
||||
f("t_raw", "123456789", "", 42, "abcd"),
|
||||
f("foo_s", "hoss")));
|
||||
|
||||
assertNotNull(d);
|
||||
assertFalse(d.containsKey("bar_giberish"));
|
||||
assertFalse(d.containsKey("foo_giberish"));
|
||||
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
|
||||
d.getFieldValues("t_raw"));
|
||||
|
@ -574,15 +578,98 @@ public class FieldMutatingUpdateProcessorTest extends UpdateProcessorTestBase {
|
|||
d = processAdd("ignore-some",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_giberish", "123456789", "", 42, "abcd"),
|
||||
f("bar_giberish", "123456789", "", 42, "abcd"),
|
||||
f("t_raw", "123456789", "", 42, "abcd"),
|
||||
f("foo_s", "hoss")));
|
||||
|
||||
assertNotNull(d);
|
||||
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
|
||||
d.getFieldValues("foo_giberish"));
|
||||
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
|
||||
d.getFieldValues("bar_giberish"));
|
||||
assertFalse(d.containsKey("t_raw"));
|
||||
assertEquals("hoss", d.getFieldValue("foo_s"));
|
||||
|
||||
|
||||
d = processAdd("ignore-not-in-schema-explicit-selector",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_giberish", "123456789", "", 42, "abcd"),
|
||||
f("bar_giberish", "123456789", "", 42, "abcd"),
|
||||
f("t_raw", "123456789", "", 42, "abcd"),
|
||||
f("foo_s", "hoss")));
|
||||
assertNotNull(d);
|
||||
assertFalse(d.containsKey("foo_giberish"));
|
||||
assertFalse(d.containsKey("bar_giberish"));
|
||||
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
|
||||
d.getFieldValues("t_raw"));
|
||||
assertEquals("hoss", d.getFieldValue("foo_s"));
|
||||
|
||||
d = processAdd("ignore-not-in-schema-and-foo-name-prefix",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_giberish", "123456789", "", 42, "abcd"),
|
||||
f("bar_giberish", "123456789", "", 42, "abcd"),
|
||||
f("t_raw", "123456789", "", 42, "abcd"),
|
||||
f("foo_s", "hoss")));
|
||||
assertNotNull(d);
|
||||
assertFalse(d.containsKey("foo_giberish"));
|
||||
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
|
||||
d.getFieldValues("bar_giberish"));
|
||||
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
|
||||
d.getFieldValues("t_raw"));
|
||||
assertEquals("hoss", d.getFieldValue("foo_s"));
|
||||
|
||||
d = processAdd("ignore-foo-name-prefix-except-not-schema",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_giberish", "123456789", "", 42, "abcd"),
|
||||
f("bar_giberish", "123456789", "", 42, "abcd"),
|
||||
f("t_raw", "123456789", "", 42, "abcd"),
|
||||
f("foo_s", "hoss")));
|
||||
assertNotNull(d);
|
||||
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
|
||||
d.getFieldValues("foo_giberish"));
|
||||
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
|
||||
d.getFieldValues("bar_giberish"));
|
||||
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
|
||||
d.getFieldValues("t_raw"));
|
||||
assertFalse(d.containsKey("foo_s"));
|
||||
|
||||
d = processAdd("ignore-in-schema",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_giberish", "123456789", "", 42, "abcd"),
|
||||
f("bar_giberish", "123456789", "", 42, "abcd"),
|
||||
f("t_raw", "123456789", "", 42, "abcd"),
|
||||
f("foo_s", "hoss")));
|
||||
assertNotNull(d);
|
||||
assertTrue(d.containsKey("foo_giberish"));
|
||||
assertTrue(d.containsKey("bar_giberish"));
|
||||
assertFalse(d.containsKey("id"));
|
||||
assertFalse(d.containsKey("t_raw"));
|
||||
assertFalse(d.containsKey("foo_s"));
|
||||
|
||||
d = processAdd("ignore-not-in-schema-explicit-str-selector",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_giberish", "123456789", "", 42, "abcd"),
|
||||
f("bar_giberish", "123456789", "", 42, "abcd"),
|
||||
f("t_raw", "123456789", "", 42, "abcd"),
|
||||
f("foo_s", "hoss")));
|
||||
assertNotNull(d);
|
||||
assertFalse(d.containsKey("foo_giberish"));
|
||||
assertFalse(d.containsKey("bar_giberish"));
|
||||
assertEquals(Arrays.asList("123456789", "", 42, "abcd"),
|
||||
d.getFieldValues("t_raw"));
|
||||
assertEquals("hoss", d.getFieldValue("foo_s"));
|
||||
|
||||
d = processAdd("ignore-in-schema-str-selector",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_giberish", "123456789", "", 42, "abcd"),
|
||||
f("bar_giberish", "123456789", "", 42, "abcd"),
|
||||
f("t_raw", "123456789", "", 42, "abcd"),
|
||||
f("foo_s", "hoss")));
|
||||
assertNotNull(d);
|
||||
assertTrue(d.containsKey("foo_giberish"));
|
||||
assertTrue(d.containsKey("bar_giberish"));
|
||||
assertFalse(d.containsKey("id"));
|
||||
assertFalse(d.containsKey("t_raw"));
|
||||
assertFalse(d.containsKey("foo_s"));
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue