SOLR-4567: copyField source glob matching explicit field(s) stopped working in Solr 4.2

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1455832 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Steven Rowe 2013-03-13 08:01:50 +00:00
parent 806bdbe834
commit 651a15492b
6 changed files with 95 additions and 29 deletions

View File

@ -77,6 +77,9 @@ Bug Fixes
* SOLR-4538: Date Math expressions were being truncated to 32 characters
when used in field:value queries in the lucene QParser. (hossman, yonik)
* SOLR-4567: copyField source glob matching explicit field(s) stopped working
in Solr 4.2. (Alexandre Rafalovitch, Steve Rowe)
Other Changes

View File

@ -34,7 +34,10 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Pattern;
/**
* This class responds to requests at /solr/(corename)/schema/copyfields
@ -63,9 +66,10 @@ public class CopyFieldCollectionResource extends BaseFieldResource implements GE
private static final String MAX_CHARS = "maxChars";
private static final String SOURCE_DYNAMIC_BASE = "sourceDynamicBase";
private static final String DESTINATION_DYNAMIC_BASE = "destDynamicBase";
private static final String SOURCE_EXPLICIT_FIELDS = "sourceExplicitFields";
private Set<String> sourceFields;
private Set<String> destinationFields;
private Set<String> requestedSourceFields;
private Set<String> requestedDestinationFields;
public CopyFieldCollectionResource() {
super();
@ -79,16 +83,16 @@ public class CopyFieldCollectionResource extends BaseFieldResource implements GE
if (null != sourceFieldListParam) {
String[] fields = sourceFieldListParam.trim().split("[,\\s]+");
if (fields.length > 0) {
sourceFields = new HashSet<String>(Arrays.asList(fields));
sourceFields.remove(""); // Remove empty values, if any
requestedSourceFields = new HashSet<String>(Arrays.asList(fields));
requestedSourceFields.remove(""); // Remove empty values, if any
}
}
String destinationFieldListParam = getSolrRequest().getParams().get(DESTINATION_FIELD_LIST);
if (null != destinationFieldListParam) {
String[] fields = destinationFieldListParam.trim().split("[,\\s]+");
if (fields.length > 0) {
destinationFields = new HashSet<String>(Arrays.asList(fields));
destinationFields.remove(""); // Remove empty values, if any
requestedDestinationFields = new HashSet<String>(Arrays.asList(fields));
requestedDestinationFields.remove(""); // Remove empty values, if any
}
}
}
@ -98,8 +102,7 @@ public class CopyFieldCollectionResource extends BaseFieldResource implements GE
public Representation get() {
try {
final List<SimpleOrderedMap<Object>> props = new ArrayList<SimpleOrderedMap<Object>>();
SortedMap<String,List<CopyField>> sortedCopyFields
= new TreeMap<String, List<CopyField>>(getSchema().getCopyFieldsMap());
SortedMap<String,List<CopyField>> sortedCopyFields = new TreeMap<String, List<CopyField>>(getSchema().getCopyFieldsMap());
for (List<CopyField> copyFields : sortedCopyFields.values()) {
Collections.sort(copyFields, new Comparator<CopyField>() {
@Override
@ -111,8 +114,8 @@ public class CopyFieldCollectionResource extends BaseFieldResource implements GE
for (CopyField copyField : copyFields) {
final String source = copyField.getSource().getName();
final String destination = copyField.getDestination().getName();
if ( (null == sourceFields || sourceFields.contains(source))
&& (null == destinationFields || destinationFields.contains(destination))) {
if ( (null == requestedSourceFields || requestedSourceFields.contains(source))
&& (null == requestedDestinationFields || requestedDestinationFields.contains(destination))) {
SimpleOrderedMap<Object> copyFieldProps = new SimpleOrderedMap<Object>();
copyFieldProps.add(SOURCE, source);
copyFieldProps.add(DESTINATION, destination);
@ -126,14 +129,26 @@ public class CopyFieldCollectionResource extends BaseFieldResource implements GE
for (IndexSchema.DynamicCopy dynamicCopy : getSchema().getDynamicCopyFields()) {
final String source = dynamicCopy.getRegex();
final String destination = dynamicCopy.getDestFieldName();
if ( (null == sourceFields || sourceFields.contains(source))
&& (null == destinationFields || destinationFields.contains(destination))) {
if ( (null == requestedSourceFields || requestedSourceFields.contains(source))
&& (null == requestedDestinationFields || requestedDestinationFields.contains(destination))) {
SimpleOrderedMap<Object> dynamicCopyProps = new SimpleOrderedMap<Object>();
dynamicCopyProps.add(SOURCE, dynamicCopy.getRegex());
IndexSchema.DynamicField sourceDynamicBase = dynamicCopy.getSourceDynamicBase();
if (null != sourceDynamicBase) {
dynamicCopyProps.add(SOURCE_DYNAMIC_BASE, sourceDynamicBase.getRegex());
} else if (source.contains("*")) {
List<String> sourceExplicitFields = new ArrayList<String>();
Pattern pattern = Pattern.compile(source.replace("*", ".*")); // glob->regex
for (String field : getSchema().getFields().keySet()) {
if (pattern.matcher(field).matches()) {
sourceExplicitFields.add(field);
}
}
if (sourceExplicitFields.size() > 0) {
Collections.sort(sourceExplicitFields);
dynamicCopyProps.add(SOURCE_EXPLICIT_FIELDS, sourceExplicitFields);
}
}
dynamicCopyProps.add(DESTINATION, dynamicCopy.getDestFieldName());

View File

@ -55,6 +55,7 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
/**
* <code>IndexSchema</code> contains information about the valid fields in an index
@ -420,7 +421,7 @@ public final class IndexSchema {
requiredFields.add(f);
}
} else if (node.getNodeName().equals("dynamicField")) {
if (isValidDynamicFieldName(name)) {
if (isValidFieldGlob(name)) {
// make sure nothing else has the same path
addDynamicField(dFields, f);
} else {
@ -588,7 +589,7 @@ public final class IndexSchema {
}
/** Returns true if the given name has exactly one asterisk either at the start or end of the name */
private boolean isValidDynamicFieldName(String name) {
private static boolean isValidFieldGlob(String name) {
if (name.startsWith("*") || name.endsWith("*")) {
int count = 0;
for (int pos = 0 ; pos < name.length() && -1 != (pos = name.indexOf('*', pos)) ; ++pos) ++count;
@ -660,11 +661,22 @@ public final class IndexSchema {
DynamicField destDynamicBase = null;
boolean sourceIsDynamicFieldReference = false;
boolean sourceIsExplicitFieldGlob = false;
if (null == destSchemaField || null == sourceSchemaField) {
if (null == sourceSchemaField && isValidFieldGlob(source)) {
Pattern pattern = Pattern.compile(source.replace("*", ".*")); // glob->regex
for (String field : fields.keySet()) {
if (pattern.matcher(field).matches()) {
sourceIsExplicitFieldGlob = true;
break;
}
}
}
if (null == destSchemaField || (null == sourceSchemaField && ! sourceIsExplicitFieldGlob)) {
// Go through dynamicFields array only once, collecting info for both source and dest fields, if needed
for (DynamicField dynamicField : dynamicFields) {
if (null == sourceSchemaField && ! sourceIsDynamicFieldReference) {
if (null == sourceSchemaField && ! sourceIsDynamicFieldReference && ! sourceIsExplicitFieldGlob) {
if (dynamicField.matches(source)) {
sourceIsDynamicFieldReference = true;
if ( ! source.equals(dynamicField.getRegex())) {
@ -682,19 +694,22 @@ public final class IndexSchema {
destDynamicBase = dynamicField;
}
}
if (null != destSchemaField && (null != sourceSchemaField || sourceIsDynamicFieldReference)) break;
if (null != destSchemaField
&& (null != sourceSchemaField || sourceIsDynamicFieldReference || sourceIsExplicitFieldGlob)) {
break;
}
}
}
if (null == sourceSchemaField && ! sourceIsDynamicFieldReference) {
String msg = "copyField source :'" + source + "' is not an explicit field and doesn't match a dynamicField.";
if (null == sourceSchemaField && ! sourceIsDynamicFieldReference && ! sourceIsExplicitFieldGlob) {
String msg = "copyField source :'" + source + "' doesn't match any explicit field or dynamicField.";
throw new SolrException(ErrorCode.SERVER_ERROR, msg);
}
if (null == destSchemaField) {
String msg = "copyField dest :'" + dest + "' is not an explicit field and doesn't match a dynamicField.";
throw new SolrException(ErrorCode.SERVER_ERROR, msg);
}
if (sourceIsDynamicFieldReference) {
if (null != destDynamicField) { // source & dest: dynamic field references
if (sourceIsDynamicFieldReference || sourceIsExplicitFieldGlob) {
if (null != destDynamicField) { // source: dynamic field ref or explicit field glob; dest: dynamic field ref
registerDynamicCopyField(new DynamicCopy(source, destDynamicField, maxChars, sourceDynamicBase, destDynamicBase));
incrementCopyFieldTargetCount(destSchemaField);
} else { // source: dynamic field reference; dest: explicit field
@ -710,7 +725,7 @@ public final class IndexSchema {
incrementCopyFieldTargetCount(destSchemaField);
} else {
String msg = "copyField only supports a dynamic destination with an asterisk "
+ "if the source is also dynamic with an asterisk";
+ "if the source also has an asterisk";
throw new SolrException(ErrorCode.SERVER_ERROR, msg);
}
} else { // source & dest: explicit fields

View File

@ -460,7 +460,13 @@
<copyField source="text_fr" dest="highlight" maxChars="25" />
<copyField source="text_en" dest="highlight" maxChars="25" />
<copyField source="text_*" dest="highlight" maxChars="25" />
<!-- test source glob matching multiple explicit fields: sku1 and sku2 -->
<copyField source="sku*" dest="text"/>
<copyField source="sku*" dest="*_s"/>
<copyField source="sku*" dest="*_dest_sub_s"/>
<copyField source="sku*" dest="dest_sub_no_ast_s"/>
<!-- Similarity is the scoring routine for each document vs a query.
A custom similarity may be specified here, but the default is fine
for most applications.

View File

@ -616,4 +616,11 @@
<copyField source="src_sub_no_ast_i" dest="*_s"/>
<copyField source="src_sub_no_ast_i" dest="*_dest_sub_s"/>
<copyField source="src_sub_no_ast_i" dest="dest_sub_no_ast_s"/>
<!-- test source glob matching multiple explicit fields: title_stemmed and title_lettertok -->
<copyField source="title_*" dest="text"/>
<copyField source="title_*" dest="*_s"/>
<copyField source="title_*" dest="*_dest_sub_s"/>
<copyField source="title_*" dest="dest_sub_no_ast_s"/>
</schema>

View File

@ -78,7 +78,27 @@ public class TestCopyFieldCollectionResource extends SchemaRestletTestBase {
"/response/arr[@name='copyfields']/lst[ str[@name='source'][.='src_sub_no_ast_i']"
+" and str[@name='sourceDynamicBase'][.='*_i']"
+" and str[@name='dest'][.='dest_sub_no_ast_s']"
+" and str[@name='destDynamicBase'][.='*_s']]");
+" and str[@name='destDynamicBase'][.='*_s']]",
"/response/arr[@name='copyfields']/lst[ str[@name='source'][.='title_*']"
+" and arr[@name='sourceExplicitFields']/str[.='title_stemmed']"
+" and arr[@name='sourceExplicitFields']/str[.='title_lettertok']"
+" and str[@name='dest'][.='text']]",
"/response/arr[@name='copyfields']/lst[ str[@name='source'][.='title_*']"
+" and arr[@name='sourceExplicitFields']/str[.='title_stemmed']"
+" and arr[@name='sourceExplicitFields']/str[.='title_lettertok']"
+" and str[@name='dest'][.='*_s']]",
"/response/arr[@name='copyfields']/lst[ str[@name='source'][.='title_*']"
+" and arr[@name='sourceExplicitFields']/str[.='title_stemmed']"
+" and arr[@name='sourceExplicitFields']/str[.='title_lettertok']"
+" and str[@name='dest'][.='*_dest_sub_s']]",
"/response/arr[@name='copyfields']/lst[ str[@name='source'][.='title_*']"
+" and arr[@name='sourceExplicitFields']/str[.='title_stemmed']"
+" and arr[@name='sourceExplicitFields']/str[.='title_lettertok']"
+" and str[@name='dest'][.='dest_sub_no_ast_s']]");
}
@Test
@ -116,11 +136,11 @@ public class TestCopyFieldCollectionResource extends SchemaRestletTestBase {
@Test
public void testRestrictDest() throws Exception {
assertQ("/schema/copyfields/?indent=on&wt=xml&dest.fl=title,*_s,*_dest_sub_s,dest_sub_no_ast_s",
"count(/response/arr[@name='copyfields']/lst) = 13", // 3 + 3 + 3 + 4
"count(/response/arr[@name='copyfields']/lst) = 16", // 3 + 4 + 4 + 5
"count(/response/arr[@name='copyfields']/lst/str[@name='dest'][.='title']) = 3",
"count(/response/arr[@name='copyfields']/lst/str[@name='dest'][.='*_s']) = 3",
"count(/response/arr[@name='copyfields']/lst/str[@name='dest'][.='*_dest_sub_s']) = 3",
"count(/response/arr[@name='copyfields']/lst/str[@name='dest'][.='dest_sub_no_ast_s']) = 4");
"count(/response/arr[@name='copyfields']/lst/str[@name='dest'][.='*_s']) = 4",
"count(/response/arr[@name='copyfields']/lst/str[@name='dest'][.='*_dest_sub_s']) = 4",
"count(/response/arr[@name='copyfields']/lst/str[@name='dest'][.='dest_sub_no_ast_s']) = 5");
}
@Test