mirror of https://github.com/apache/lucene.git
pattern-match copyField source name: SOLR-21
git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@412190 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4f4219196f
commit
8226476315
|
@ -15,6 +15,8 @@ New Features
|
|||
8. Support for customizing the QueryResponseWriter per request
|
||||
(Mike Baranczak / SOLR-16 / hossman)
|
||||
9. Added KeywordTokenizerFactory (hossman)
|
||||
10. copyField accepts dynamicfield-like names as the source.
|
||||
(Darren Erik Vengroff via yonik, SOLR-21)
|
||||
|
||||
Changes in runtime behavior
|
||||
1. classes reorganized into different packages, package names changed to Apache
|
||||
|
|
|
@ -96,7 +96,7 @@ public final class IndexSchema {
|
|||
* </p>
|
||||
*/
|
||||
public Map<String,SchemaField> getFields() { return fields; }
|
||||
|
||||
|
||||
/**
|
||||
* Provides direct access to the Map containing all Field Types
|
||||
* in the index, keyed on fild type name.
|
||||
|
@ -109,7 +109,7 @@ public final class IndexSchema {
|
|||
|
||||
|
||||
private Similarity similarity;
|
||||
|
||||
|
||||
/**
|
||||
* Returns the Similarity used for this index
|
||||
*/
|
||||
|
@ -128,7 +128,7 @@ public final class IndexSchema {
|
|||
public Analyzer getAnalyzer() { return analyzer; }
|
||||
|
||||
private Analyzer queryAnalyzer;
|
||||
|
||||
|
||||
/**
|
||||
* Returns the Analyzer used when searching this index
|
||||
*
|
||||
|
@ -147,7 +147,7 @@ public final class IndexSchema {
|
|||
}
|
||||
|
||||
private SchemaField uniqueKeyField;
|
||||
|
||||
|
||||
/**
|
||||
* Unique Key field specified in the schema file
|
||||
* @return null if this schema has no unique key field
|
||||
|
@ -339,17 +339,7 @@ public final class IndexSchema {
|
|||
// OK, now sort the dynamic fields largest to smallest size so we don't get
|
||||
// any false matches. We want to act like a compiler tool and try and match
|
||||
// the largest string possible.
|
||||
Collections.sort(dFields, new Comparator<DynamicField>() {
|
||||
public int compare(DynamicField a, DynamicField b) {
|
||||
// swap natural ordering to get biggest first.
|
||||
// The sort is stable, so elements of the same size should
|
||||
// be
|
||||
if (a.regex.length() < b.regex.length()) return 1;
|
||||
else if (a.regex.length() > b.regex.length()) return -1;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
);
|
||||
Collections.sort(dFields);
|
||||
|
||||
log.finest("Dynamic Field Ordering:" + dFields);
|
||||
|
||||
|
@ -388,6 +378,9 @@ public final class IndexSchema {
|
|||
/////////////// parse out copyField commands ///////////////
|
||||
// Map<String,ArrayList<SchemaField>> cfields = new HashMap<String,ArrayList<SchemaField>>();
|
||||
// expression = "/schema/copyField";
|
||||
|
||||
ArrayList<DynamicCopy> dCopies = new ArrayList<DynamicCopy>();
|
||||
|
||||
expression = "//copyField";
|
||||
nodes = (NodeList) xpath.evaluate(expression, document, XPathConstants.NODESET);
|
||||
|
||||
|
@ -396,19 +389,33 @@ public final class IndexSchema {
|
|||
NamedNodeMap attrs = node.getAttributes();
|
||||
|
||||
String source = DOMUtil.getAttr(attrs,"source","copyField definition");
|
||||
|
||||
boolean sourceIsPattern = isWildCard(source);
|
||||
|
||||
String dest = DOMUtil.getAttr(attrs,"dest","copyField definition");
|
||||
log.fine("copyField source='"+source+"' dest='"+dest+"'");
|
||||
SchemaField f = getField(source);
|
||||
SchemaField d = getField(dest);
|
||||
SchemaField[] destArr = copyFields.get(source);
|
||||
if (destArr==null) {
|
||||
destArr=new SchemaField[]{d};
|
||||
} else {
|
||||
destArr = (SchemaField[])append(destArr,d);
|
||||
}
|
||||
copyFields.put(source,destArr);
|
||||
}
|
||||
|
||||
if(sourceIsPattern) {
|
||||
dCopies.add(new DynamicCopy(source, d));
|
||||
} else {
|
||||
// retrieve the field to force an exception if it doesn't exist
|
||||
SchemaField f = getField(source);
|
||||
|
||||
SchemaField[] destArr = copyFields.get(source);
|
||||
if (destArr==null) {
|
||||
destArr=new SchemaField[]{d};
|
||||
} else {
|
||||
destArr = (SchemaField[])append(destArr,d);
|
||||
}
|
||||
copyFields.put(source,destArr);
|
||||
}
|
||||
}
|
||||
|
||||
log.finest("Dynamic Copied Fields:" + dCopies);
|
||||
|
||||
// stuff it in a normal array for faster access
|
||||
dynamicCopyFields = (DynamicCopy[])dCopies.toArray(new DynamicCopy[dCopies.size()]);
|
||||
|
||||
} catch (SolrException e) {
|
||||
throw e;
|
||||
|
@ -493,24 +500,17 @@ public final class IndexSchema {
|
|||
}
|
||||
|
||||
|
||||
//
|
||||
// Instead of storing a type, this could be implemented as a hierarchy
|
||||
// with a virtual matches().
|
||||
// Given how often a search will be done, however, speed is the overriding
|
||||
// concern and I'm not sure which is faster.
|
||||
//
|
||||
final static class DynamicField {
|
||||
static abstract class DynamicReplacement implements Comparable<DynamicReplacement> {
|
||||
final static int STARTS_WITH=1;
|
||||
final static int ENDS_WITH=2;
|
||||
|
||||
final String regex;
|
||||
final int type;
|
||||
final SchemaField prototype;
|
||||
|
||||
final String str;
|
||||
|
||||
DynamicField(SchemaField prototype) {
|
||||
this.regex=prototype.name;
|
||||
protected DynamicReplacement(String regex) {
|
||||
this.regex = regex;
|
||||
if (regex.startsWith("*")) {
|
||||
type=ENDS_WITH;
|
||||
str=regex.substring(1);
|
||||
|
@ -522,15 +522,41 @@ public final class IndexSchema {
|
|||
else {
|
||||
throw new RuntimeException("dynamic field name must start or end with *");
|
||||
}
|
||||
this.prototype=prototype;
|
||||
}
|
||||
|
||||
boolean matches(String name) {
|
||||
public boolean matches(String name) {
|
||||
if (type==STARTS_WITH && name.startsWith(str)) return true;
|
||||
else if (type==ENDS_WITH && name.endsWith(str)) return true;
|
||||
else return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sort order is based on length of regex. Longest comes first.
|
||||
* @param other The object to compare to.
|
||||
* @return a negative integer, zero, or a positive integer
|
||||
* as this object is less than, equal to, or greater than
|
||||
* the specified object.
|
||||
*/
|
||||
public int compareTo(DynamicReplacement other) {
|
||||
return other.regex.length() - regex.length();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Instead of storing a type, this could be implemented as a hierarchy
|
||||
// with a virtual matches().
|
||||
// Given how often a search will be done, however, speed is the overriding
|
||||
// concern and I'm not sure which is faster.
|
||||
//
|
||||
final static class DynamicField extends DynamicReplacement {
|
||||
final SchemaField prototype;
|
||||
|
||||
DynamicField(SchemaField prototype) {
|
||||
super(prototype.name);
|
||||
this.prototype=prototype;
|
||||
}
|
||||
|
||||
SchemaField makeSchemaField(String name) {
|
||||
// could have a cache instead of returning a new one each time, but it might
|
||||
// not be worth it.
|
||||
|
@ -546,14 +572,72 @@ public final class IndexSchema {
|
|||
}
|
||||
|
||||
|
||||
//
|
||||
// Instead of storing a type, this could be implemented as a hierarchy
|
||||
// with a virtual matches().
|
||||
// Given how often a search will be done, however, speed is the overriding
|
||||
// concern and I'm not sure which is faster.
|
||||
//
|
||||
final static class DynamicCopy extends DynamicReplacement {
|
||||
final SchemaField targetField;
|
||||
DynamicCopy(String regex, SchemaField targetField) {
|
||||
super(regex);
|
||||
this.targetField = targetField;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return targetField.toString();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private DynamicField[] dynamicFields;
|
||||
|
||||
|
||||
/**
|
||||
* Returns the SchemaField that should be used for the specified field name
|
||||
* Does the schema have the specified field defined explicitly, i.e.
|
||||
* not as a result of a copyField declaration with a wildcard? We
|
||||
* consider it explicitly defined if it matches a field or dynamicField
|
||||
* declaration.
|
||||
* @param fieldName
|
||||
* @return true if explicitly declared in the schema.
|
||||
*/
|
||||
public boolean hasExplicitField(String fieldName) {
|
||||
if(fields.containsKey(fieldName)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (DynamicField df : dynamicFields) {
|
||||
if (df.matches(fieldName)) return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the SchemaField that should be used for the specified field name, or
|
||||
* null if none exists.
|
||||
*
|
||||
* @param fieldName may be an explicitly created field, or a name that
|
||||
* excercies a dynamic field.
|
||||
* @param fieldName may be an explicitly defined field, or a name that
|
||||
* matches a dynamic field.
|
||||
* @see #getFieldType
|
||||
*/
|
||||
public SchemaField getFieldOrNull(String fieldName) {
|
||||
SchemaField f = fields.get(fieldName);
|
||||
if (f != null) return f;
|
||||
|
||||
for (DynamicField df : dynamicFields) {
|
||||
if (df.matches(fieldName)) return df.makeSchemaField(fieldName);
|
||||
}
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the SchemaField that should be used for the specified field name
|
||||
*
|
||||
* @param fieldName may be an explicitly defined field, or a name that
|
||||
* matches a dynamic field.
|
||||
* @throws SolrException if no such field exists
|
||||
* @see #getFieldType
|
||||
*/
|
||||
|
@ -641,13 +725,48 @@ public final class IndexSchema {
|
|||
|
||||
|
||||
private final Map<String, SchemaField[]> copyFields = new HashMap<String,SchemaField[]>();
|
||||
|
||||
private DynamicCopy[] dynamicCopyFields;
|
||||
|
||||
/**
|
||||
* Returns the list of fields that should recieve a copy of any indexed values added to the specified field.
|
||||
* @return may be null or empty if there are no matching copyField directives
|
||||
* Get all copy fields, both the static and the dynamic ones.
|
||||
* @param sourceField
|
||||
* @return Array of fields to copy to.
|
||||
*/
|
||||
public SchemaField[] getCopyFields(String sourceField) {
|
||||
return copyFields.get(sourceField);
|
||||
// Get the dynamic ones into a list.
|
||||
List<SchemaField> matchCopyFields = new ArrayList<SchemaField>();
|
||||
|
||||
for(DynamicCopy dynamicCopy : dynamicCopyFields) {
|
||||
if(dynamicCopy.matches(sourceField)) {
|
||||
matchCopyFields.add(dynamicCopy.targetField);
|
||||
}
|
||||
}
|
||||
|
||||
// Get the fixed ones, if there are any.
|
||||
SchemaField[] fixedCopyFields = copyFields.get(sourceField);
|
||||
|
||||
boolean appendFixed = copyFields.containsKey(sourceField);
|
||||
|
||||
// Construct the results by concatenating dynamic and fixed into a results array.
|
||||
|
||||
SchemaField[] results = new SchemaField[matchCopyFields.size() + (appendFixed ? fixedCopyFields.length : 0)];
|
||||
|
||||
matchCopyFields.toArray(results);
|
||||
|
||||
if(appendFixed) {
|
||||
System.arraycopy(fixedCopyFields, 0, results, matchCopyFields.size(), fixedCopyFields.length);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the given field name a wildcard? I.e. does it begin or end with *?
|
||||
* @param name
|
||||
* @return true/false
|
||||
*/
|
||||
private static boolean isWildCard(String name) {
|
||||
return name.startsWith("*") || name.endsWith("*");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -68,25 +68,31 @@ public class DocumentBuilder {
|
|||
|
||||
public void addField(SchemaField sfield, String val, float boost) {
|
||||
addSingleField(sfield,val,boost);
|
||||
}
|
||||
|
||||
public void addField(String name, String val) {
|
||||
addField(name, val, 1.0f);
|
||||
}
|
||||
|
||||
public void addField(String name, String val, float boost) {
|
||||
SchemaField sfield = schema.getFieldOrNull(name);
|
||||
if (sfield != null) {
|
||||
addField(sfield,val,boost);
|
||||
}
|
||||
|
||||
// Check if we should copy this field to any other fields.
|
||||
SchemaField[] destArr = schema.getCopyFields(sfield.getName());
|
||||
// This could happen whether it is explicit or not.
|
||||
SchemaField[] destArr = schema.getCopyFields(name);
|
||||
if (destArr != null) {
|
||||
for (SchemaField destField : destArr) {
|
||||
addSingleField(destField,val,boost);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void addField(String name, String val) {
|
||||
SchemaField ftype = schema.getField(name);
|
||||
// fields.get(name);
|
||||
addField(ftype,val,1.0f);
|
||||
}
|
||||
|
||||
public void addField(String name, String val, float boost) {
|
||||
SchemaField ftype = schema.getField(name);
|
||||
addField(ftype,val,boost);
|
||||
// error if this field name doesn't match anything
|
||||
if (sfield==null && (destArr==null || destArr.length==0)) {
|
||||
throw new SolrException(400,"ERROR:unknown field '" + name + "'");
|
||||
}
|
||||
}
|
||||
|
||||
public void setBoost(float boost) {
|
||||
|
|
|
@ -808,6 +808,17 @@ public class ConvertedLegacyTest extends AbstractSolrTestCase {
|
|||
,"*[count(//doc)=1]"
|
||||
);
|
||||
|
||||
// test copyField functionality with a pattern.
|
||||
|
||||
assertU("<add><doc><field name=\"id\">42</field><field name=\"copy_t\">Copy me to the text field pretty please.</field></doc></add>");
|
||||
assertU("<commit/>");
|
||||
assertQ(req("id:42 AND text:pretty")
|
||||
,"*[count(//doc)=1]"
|
||||
);
|
||||
assertQ(req("id:42 AND copy_t:pretty")
|
||||
,"*[count(//doc)=1]"
|
||||
);
|
||||
|
||||
// test slop
|
||||
|
||||
assertU("<add><doc><field name=\"id\">42</field><field name=\"text\">foo bar</field></doc></add>");
|
||||
|
|
|
@ -340,7 +340,8 @@
|
|||
<copyField source="title" dest="text"/>
|
||||
<copyField source="subject" dest="text"/>
|
||||
|
||||
|
||||
<copyField source="*_t" dest="text"/>
|
||||
|
||||
<!-- Similarity is the scoring routine for each document vs a query.
|
||||
A custom similarity may be specified here, but the default is fine
|
||||
for most applications.
|
||||
|
|
Loading…
Reference in New Issue