From 8226476315091e6af255335ed28c3bbf027711dd Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Tue, 6 Jun 2006 18:53:04 +0000 Subject: [PATCH] pattern-match copyField source name: SOLR-21 git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@412190 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 + .../org/apache/solr/schema/IndexSchema.java | 205 ++++++++++++++---- .../apache/solr/update/DocumentBuilder.java | 28 ++- .../org/apache/solr/ConvertedLegacyTest.java | 11 + src/test/test-files/solr/conf/schema.xml | 3 +- 5 files changed, 194 insertions(+), 55 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 8a19109cbfe..4a676ddb7f0 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -15,6 +15,8 @@ New Features 8. Support for customizing the QueryResponseWriter per request (Mike Baranczak / SOLR-16 / hossman) 9. Added KeywordTokenizerFactory (hossman) +10. copyField accepts dynamicfield-like names as the source. + (Darren Erik Vengroff via yonik, SOLR-21) Changes in runtime behavior 1. classes reorganized into different packages, package names changed to Apache diff --git a/src/java/org/apache/solr/schema/IndexSchema.java b/src/java/org/apache/solr/schema/IndexSchema.java index df5ac3c47d4..87266292fc6 100644 --- a/src/java/org/apache/solr/schema/IndexSchema.java +++ b/src/java/org/apache/solr/schema/IndexSchema.java @@ -96,7 +96,7 @@ public final class IndexSchema { *

*/ public Map getFields() { return fields; } - + /** * Provides direct access to the Map containing all Field Types * in the index, keyed on fild type name. @@ -109,7 +109,7 @@ public final class IndexSchema { private Similarity similarity; - + /** * Returns the Similarity used for this index */ @@ -128,7 +128,7 @@ public final class IndexSchema { public Analyzer getAnalyzer() { return analyzer; } private Analyzer queryAnalyzer; - + /** * Returns the Analyzer used when searching this index * @@ -147,7 +147,7 @@ public final class IndexSchema { } private SchemaField uniqueKeyField; - + /** * Unique Key field specified in the schema file * @return null if this schema has no unique key field @@ -339,17 +339,7 @@ public final class IndexSchema { // OK, now sort the dynamic fields largest to smallest size so we don't get // any false matches. We want to act like a compiler tool and try and match // the largest string possible. - Collections.sort(dFields, new Comparator() { - public int compare(DynamicField a, DynamicField b) { - // swap natural ordering to get biggest first. - // The sort is stable, so elements of the same size should - // be - if (a.regex.length() < b.regex.length()) return 1; - else if (a.regex.length() > b.regex.length()) return -1; - return 0; - } - } - ); + Collections.sort(dFields); log.finest("Dynamic Field Ordering:" + dFields); @@ -388,6 +378,9 @@ public final class IndexSchema { /////////////// parse out copyField commands /////////////// // Map> cfields = new HashMap>(); // expression = "/schema/copyField"; + + ArrayList dCopies = new ArrayList(); + expression = "//copyField"; nodes = (NodeList) xpath.evaluate(expression, document, XPathConstants.NODESET); @@ -396,19 +389,33 @@ public final class IndexSchema { NamedNodeMap attrs = node.getAttributes(); String source = DOMUtil.getAttr(attrs,"source","copyField definition"); + + boolean sourceIsPattern = isWildCard(source); + String dest = DOMUtil.getAttr(attrs,"dest","copyField definition"); log.fine("copyField source='"+source+"' dest='"+dest+"'"); - SchemaField f = getField(source); SchemaField d = getField(dest); - SchemaField[] destArr = copyFields.get(source); - if (destArr==null) { - destArr=new SchemaField[]{d}; - } else { - destArr = (SchemaField[])append(destArr,d); - } - copyFields.put(source,destArr); - } + if(sourceIsPattern) { + dCopies.add(new DynamicCopy(source, d)); + } else { + // retrieve the field to force an exception if it doesn't exist + SchemaField f = getField(source); + + SchemaField[] destArr = copyFields.get(source); + if (destArr==null) { + destArr=new SchemaField[]{d}; + } else { + destArr = (SchemaField[])append(destArr,d); + } + copyFields.put(source,destArr); + } + } + + log.finest("Dynamic Copied Fields:" + dCopies); + + // stuff it in a normal array for faster access + dynamicCopyFields = (DynamicCopy[])dCopies.toArray(new DynamicCopy[dCopies.size()]); } catch (SolrException e) { throw e; @@ -493,24 +500,17 @@ public final class IndexSchema { } - // - // Instead of storing a type, this could be implemented as a hierarchy - // with a virtual matches(). - // Given how often a search will be done, however, speed is the overriding - // concern and I'm not sure which is faster. - // - final static class DynamicField { + static abstract class DynamicReplacement implements Comparable { final static int STARTS_WITH=1; final static int ENDS_WITH=2; final String regex; final int type; - final SchemaField prototype; final String str; - DynamicField(SchemaField prototype) { - this.regex=prototype.name; + protected DynamicReplacement(String regex) { + this.regex = regex; if (regex.startsWith("*")) { type=ENDS_WITH; str=regex.substring(1); @@ -522,15 +522,41 @@ public final class IndexSchema { else { throw new RuntimeException("dynamic field name must start or end with *"); } - this.prototype=prototype; } - boolean matches(String name) { + public boolean matches(String name) { if (type==STARTS_WITH && name.startsWith(str)) return true; else if (type==ENDS_WITH && name.endsWith(str)) return true; else return false; } + /** + * Sort order is based on length of regex. Longest comes first. + * @param other The object to compare to. + * @return a negative integer, zero, or a positive integer + * as this object is less than, equal to, or greater than + * the specified object. + */ + public int compareTo(DynamicReplacement other) { + return other.regex.length() - regex.length(); + } + } + + + // + // Instead of storing a type, this could be implemented as a hierarchy + // with a virtual matches(). + // Given how often a search will be done, however, speed is the overriding + // concern and I'm not sure which is faster. + // + final static class DynamicField extends DynamicReplacement { + final SchemaField prototype; + + DynamicField(SchemaField prototype) { + super(prototype.name); + this.prototype=prototype; + } + SchemaField makeSchemaField(String name) { // could have a cache instead of returning a new one each time, but it might // not be worth it. @@ -546,14 +572,72 @@ public final class IndexSchema { } + // + // Instead of storing a type, this could be implemented as a hierarchy + // with a virtual matches(). + // Given how often a search will be done, however, speed is the overriding + // concern and I'm not sure which is faster. + // + final static class DynamicCopy extends DynamicReplacement { + final SchemaField targetField; + DynamicCopy(String regex, SchemaField targetField) { + super(regex); + this.targetField = targetField; + } + + public String toString() { + return targetField.toString(); + } + } + private DynamicField[] dynamicFields; + /** - * Returns the SchemaField that should be used for the specified field name + * Does the schema have the specified field defined explicitly, i.e. + * not as a result of a copyField declaration with a wildcard? We + * consider it explicitly defined if it matches a field or dynamicField + * declaration. + * @param fieldName + * @return true if explicitly declared in the schema. + */ + public boolean hasExplicitField(String fieldName) { + if(fields.containsKey(fieldName)) { + return true; + } + + for (DynamicField df : dynamicFields) { + if (df.matches(fieldName)) return true; + } + + return false; + } + + /** + * Returns the SchemaField that should be used for the specified field name, or + * null if none exists. * - * @param fieldName may be an explicitly created field, or a name that - * excercies a dynamic field. + * @param fieldName may be an explicitly defined field, or a name that + * matches a dynamic field. + * @see #getFieldType + */ + public SchemaField getFieldOrNull(String fieldName) { + SchemaField f = fields.get(fieldName); + if (f != null) return f; + + for (DynamicField df : dynamicFields) { + if (df.matches(fieldName)) return df.makeSchemaField(fieldName); + } + + return f; + } + + /** + * Returns the SchemaField that should be used for the specified field name + * + * @param fieldName may be an explicitly defined field, or a name that + * matches a dynamic field. * @throws SolrException if no such field exists * @see #getFieldType */ @@ -641,13 +725,48 @@ public final class IndexSchema { private final Map copyFields = new HashMap(); - + private DynamicCopy[] dynamicCopyFields; + /** - * Returns the list of fields that should recieve a copy of any indexed values added to the specified field. - * @return may be null or empty if there are no matching copyField directives + * Get all copy fields, both the static and the dynamic ones. + * @param sourceField + * @return Array of fields to copy to. */ public SchemaField[] getCopyFields(String sourceField) { - return copyFields.get(sourceField); + // Get the dynamic ones into a list. + List matchCopyFields = new ArrayList(); + + for(DynamicCopy dynamicCopy : dynamicCopyFields) { + if(dynamicCopy.matches(sourceField)) { + matchCopyFields.add(dynamicCopy.targetField); + } + } + + // Get the fixed ones, if there are any. + SchemaField[] fixedCopyFields = copyFields.get(sourceField); + + boolean appendFixed = copyFields.containsKey(sourceField); + + // Construct the results by concatenating dynamic and fixed into a results array. + + SchemaField[] results = new SchemaField[matchCopyFields.size() + (appendFixed ? fixedCopyFields.length : 0)]; + + matchCopyFields.toArray(results); + + if(appendFixed) { + System.arraycopy(fixedCopyFields, 0, results, matchCopyFields.size(), fixedCopyFields.length); + } + + return results; + } + + /** + * Is the given field name a wildcard? I.e. does it begin or end with *? + * @param name + * @return true/false + */ + private static boolean isWildCard(String name) { + return name.startsWith("*") || name.endsWith("*"); } } diff --git a/src/java/org/apache/solr/update/DocumentBuilder.java b/src/java/org/apache/solr/update/DocumentBuilder.java index e1983c4d729..3211dcf5e50 100644 --- a/src/java/org/apache/solr/update/DocumentBuilder.java +++ b/src/java/org/apache/solr/update/DocumentBuilder.java @@ -68,25 +68,31 @@ public class DocumentBuilder { public void addField(SchemaField sfield, String val, float boost) { addSingleField(sfield,val,boost); + } + + public void addField(String name, String val) { + addField(name, val, 1.0f); + } + + public void addField(String name, String val, float boost) { + SchemaField sfield = schema.getFieldOrNull(name); + if (sfield != null) { + addField(sfield,val,boost); + } // Check if we should copy this field to any other fields. - SchemaField[] destArr = schema.getCopyFields(sfield.getName()); + // This could happen whether it is explicit or not. + SchemaField[] destArr = schema.getCopyFields(name); if (destArr != null) { for (SchemaField destField : destArr) { addSingleField(destField,val,boost); } } - } - public void addField(String name, String val) { - SchemaField ftype = schema.getField(name); - // fields.get(name); - addField(ftype,val,1.0f); - } - - public void addField(String name, String val, float boost) { - SchemaField ftype = schema.getField(name); - addField(ftype,val,boost); + // error if this field name doesn't match anything + if (sfield==null && (destArr==null || destArr.length==0)) { + throw new SolrException(400,"ERROR:unknown field '" + name + "'"); + } } public void setBoost(float boost) { diff --git a/src/test/org/apache/solr/ConvertedLegacyTest.java b/src/test/org/apache/solr/ConvertedLegacyTest.java index 805814f3be6..e62a94aaa50 100644 --- a/src/test/org/apache/solr/ConvertedLegacyTest.java +++ b/src/test/org/apache/solr/ConvertedLegacyTest.java @@ -808,6 +808,17 @@ public class ConvertedLegacyTest extends AbstractSolrTestCase { ,"*[count(//doc)=1]" ); + // test copyField functionality with a pattern. + + assertU("42Copy me to the text field pretty please."); + assertU(""); + assertQ(req("id:42 AND text:pretty") + ,"*[count(//doc)=1]" + ); + assertQ(req("id:42 AND copy_t:pretty") + ,"*[count(//doc)=1]" + ); + // test slop assertU("42foo bar"); diff --git a/src/test/test-files/solr/conf/schema.xml b/src/test/test-files/solr/conf/schema.xml index 45a8ea3a607..479dc1d1758 100644 --- a/src/test/test-files/solr/conf/schema.xml +++ b/src/test/test-files/solr/conf/schema.xml @@ -340,7 +340,8 @@ - + +