diff --git a/CHANGES.txt b/CHANGES.txt index 15e5f3f5cf0..76360ef9e67 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -94,6 +94,10 @@ New Features 18. SOLR-877: Added TermsComponent for accessing Lucene's TermEnum capabilities. Useful for auto suggest and possibly distributed search. Not distributed search compliant. (gsingers) +19. SOLR-538: Add maxChars attribute for copyField function so that the length limit for destination + can be specified. + (Georgios Stamatis, Lars Kotthoff, Chris Harris via koji) + Optimizations ---------------------- 1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the diff --git a/src/java/org/apache/solr/schema/CopyField.java b/src/java/org/apache/solr/schema/CopyField.java new file mode 100644 index 00000000000..372cb4f496b --- /dev/null +++ b/src/java/org/apache/solr/schema/CopyField.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.schema; + +/** + * CopyField contains all the information of a valid copy fields in an index. + * + * @since solr 1.4 + */ +public class CopyField { + + private final SchemaField source; + private final SchemaField destination; + private final int maxChars; + public static final int UNLIMITED = 0; + + public CopyField(final SchemaField source, final SchemaField destination) { + this(source, destination, UNLIMITED); + } + + /** + * @param source The SchemaField of the source field. + * @param destination The SchemaField of the destination field. + * @param maxChars Maximum number of chars in source field to copy to destination field. + * If equal to 0, there is no limit. + */ + public CopyField(final SchemaField source, final SchemaField destination, + final int maxChars) { + if (source == null || destination == null) { + throw new IllegalArgumentException( + "Source or Destination SchemaField can't be NULL."); + } + if (maxChars < 0) { + throw new IllegalArgumentException( + "Attribute maxChars can't have a negative value."); + } + this.source = source; + this.destination = destination; + this.maxChars = maxChars; + } + + public String getLimitedValue( final String val ){ + return maxChars == UNLIMITED || val.length() < maxChars ? + val : val.substring( 0, maxChars ); + } + + /** + * @return source SchemaField + */ + public SchemaField getSource() { + return source; + } + + /** + * @return destination SchemaField + */ + public SchemaField getDestination() { + return destination; + } + + /** + * @return tha maximum number of chars in source field to copy to destination field. + */ + public int getMaxChars() { + return maxChars; + } +} diff --git a/src/java/org/apache/solr/schema/IndexSchema.java b/src/java/org/apache/solr/schema/IndexSchema.java index c12ec766d2d..5b53ba015bf 100644 --- a/src/java/org/apache/solr/schema/IndexSchema.java +++ b/src/java/org/apache/solr/schema/IndexSchema.java @@ -621,8 +621,18 @@ public final class IndexSchema { String source = DOMUtil.getAttr(attrs,"source","copyField definition"); String dest = DOMUtil.getAttr(attrs,"dest", "copyField definition"); + String maxChars = DOMUtil.getAttr(attrs, "maxChars"); + int maxCharsInt = CopyField.UNLIMITED; + if (maxChars != null) { + try { + maxCharsInt = Integer.parseInt(maxChars); + } catch (NumberFormatException e) { + log.warn("Couldn't parse maxChars attribute for copyField from " + + source + " to " + dest + " as integer. The whole field will be copied."); + } + } - registerCopyField(source, dest); + registerCopyField(source, dest, maxCharsInt); } for (Map.Entry entry : copyFieldTargetCounts.entrySet()) { @@ -646,6 +656,11 @@ public final class IndexSchema { refreshAnalyzers(); } + public void registerCopyField( String source, String dest ) + { + registerCopyField(source, dest, CopyField.UNLIMITED); + } + /** *

* NOTE: this function is not thread safe. However, it is safe to use within the standard @@ -655,12 +670,12 @@ public final class IndexSchema { * * @see SolrCoreAware */ - public void registerCopyField( String source, String dest ) + public void registerCopyField( String source, String dest, int maxChars ) { boolean sourceIsPattern = isWildCard(source); boolean destIsPattern = isWildCard(dest); - log.debug("copyField source='"+source+"' dest='"+dest+"'"); + log.debug("copyField source='"+source+"' dest='"+dest+"' maxChars='"+maxChars); SchemaField d = getFieldOrNull(dest); if(d == null){ throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "copyField destination :'"+dest+"' does not exist" ); @@ -678,10 +693,10 @@ public final class IndexSchema { if( df == null ) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "copyField dynamic destination must match a dynamicField." ); } - registerDynamicCopyField(new DynamicDestCopy(source, df )); + registerDynamicCopyField(new DynamicDestCopy(source, df, maxChars )); } else { - registerDynamicCopyField(new DynamicCopy(source, d)); + registerDynamicCopyField(new DynamicCopy(source, d, maxChars)); } } else if( destIsPattern ) { @@ -692,13 +707,12 @@ public final class IndexSchema { // retrieve the field to force an exception if it doesn't exist SchemaField f = getField(source); - SchemaField[] destArr = copyFields.get(source); - if (destArr==null) { - destArr=new SchemaField[]{d}; - } else { - destArr = (SchemaField[])append(destArr,d); + List copyFieldList = copyFieldsMap.get(source); + if (copyFieldList == null) { + copyFieldList = new ArrayList(); + copyFieldsMap.put(source, copyFieldList); } - copyFields.put(source,destArr); + copyFieldList.add(new CopyField(f, d, maxChars)); copyFieldTargetCounts.put(d, (copyFieldTargetCounts.containsKey(d) ? copyFieldTargetCounts.get(d) + 1 : 1)); } @@ -894,9 +908,16 @@ public final class IndexSchema { static class DynamicCopy extends DynamicReplacement { final SchemaField targetField; + final int maxChars; + DynamicCopy(String regex, SchemaField targetField) { + this(regex, targetField, CopyField.UNLIMITED); + } + + DynamicCopy(String regex, SchemaField targetField, int maxChars) { super(regex); this.targetField = targetField; + this.maxChars = maxChars; } public SchemaField getTargetField( String sourceField ) @@ -918,7 +939,11 @@ public final class IndexSchema { final String dstr; DynamicDestCopy(String source, DynamicField dynamic) { - super(source, dynamic.prototype ); + this(source, dynamic, CopyField.UNLIMITED); + } + + DynamicDestCopy(String source, DynamicField dynamic, int maxChars) { + super(source, dynamic.prototype, maxChars); this.dynamic = dynamic; String dest = dynamic.regex; @@ -1098,7 +1123,7 @@ public final class IndexSchema { }; - private final Map copyFields = new HashMap(); + private final Map> copyFieldsMap = new HashMap>(); private DynamicCopy[] dynamicCopyFields; /** * keys are all fields copied to, count is num of copyField @@ -1119,46 +1144,69 @@ public final class IndexSchema { return new SchemaField[0]; } List sf = new ArrayList(); - for (Map.Entry cfs : copyFields.entrySet()) { - for (SchemaField cf : cfs.getValue()) { - if (cf.getName().equals(destField)) { - sf.add(getField(cfs.getKey())); + for (Map.Entry> cfs : copyFieldsMap.entrySet()) { + for (CopyField copyField : cfs.getValue()) { + if (copyField.getDestination().getName().equals(destField)) { + sf.add(copyField.getSource()); } } } - return sf.toArray(new SchemaField[1]); + return sf.toArray(new SchemaField[sf.size()]); } /** * Get all copy fields, both the static and the dynamic ones. + * * @param sourceField * @return Array of fields to copy to. + * @deprecated Use {@link #getCopyFieldsList(String)} instead. */ + @Deprecated public SchemaField[] getCopyFields(String sourceField) { - // Get the dynamic ones into a list. + // This is the List that holds all the results, dynamic or not. List matchCopyFields = new ArrayList(); + // Get the dynamic results into the list. for(DynamicCopy dynamicCopy : dynamicCopyFields) { if(dynamicCopy.matches(sourceField)) { matchCopyFields.add(dynamicCopy.getTargetField(sourceField)); } } - // Get the fixed ones, if there are any. - SchemaField[] fixedCopyFields = copyFields.get(sourceField); - - boolean appendFixed = copyFields.containsKey(sourceField); - - // Construct the results by concatenating dynamic and fixed into a results array. - - SchemaField[] results = new SchemaField[matchCopyFields.size() + (appendFixed ? fixedCopyFields.length : 0)]; - - matchCopyFields.toArray(results); - - if(appendFixed) { - System.arraycopy(fixedCopyFields, 0, results, matchCopyFields.size(), fixedCopyFields.length); + // Get the fixed ones, if there are any and add them. + final List copyFields = copyFieldsMap.get(sourceField); + if (copyFields!=null) { + final Iterator it = copyFields.iterator(); + while (it.hasNext()) { + matchCopyFields.add(it.next().getDestination()); + } } - return results; + // Construct the results by transforming the list into an array. + return matchCopyFields.toArray(new SchemaField[matchCopyFields.size()]); + } + + /** + * Get all copy fields for a specified source field, both static + * and dynamic ones. + * @param sourceField + * @return List of CopyFields to copy to. + * @since solr 1.4 + */ + // This is useful when we need the maxSize param of each CopyField + public List getCopyFieldsList(final String sourceField){ + final List result = new ArrayList(); + for (DynamicCopy dynamicCopy : dynamicCopyFields) { + if (dynamicCopy.matches(sourceField)) { + result.add(new CopyField(getField(sourceField), dynamicCopy.getTargetField(sourceField), dynamicCopy.maxChars)); + } + } + List fixedCopyFields = copyFieldsMap.get(sourceField); + if (fixedCopyFields != null) + { + result.addAll(fixedCopyFields); + } + + return result; } /** diff --git a/src/java/org/apache/solr/update/DocumentBuilder.java b/src/java/org/apache/solr/update/DocumentBuilder.java index 8be1076870e..0cf698f89c1 100644 --- a/src/java/org/apache/solr/update/DocumentBuilder.java +++ b/src/java/org/apache/solr/update/DocumentBuilder.java @@ -29,6 +29,7 @@ import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; +import org.apache.solr.schema.CopyField; import org.apache.solr.schema.DateField; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; @@ -120,15 +121,15 @@ public class DocumentBuilder { // Check if we should copy this field to any other fields. // This could happen whether it is explicit or not. - SchemaField[] destArr = schema.getCopyFields(name); - if (destArr != null) { - for (SchemaField destField : destArr) { - addSingleField(destField,val,boost); + final List copyFields = schema.getCopyFieldsList(name); + if (copyFields != null) { + for(CopyField cf : copyFields) { + addSingleField(cf.getDestination(), cf.getLimitedValue( val ), boost); } } // error if this field name doesn't match anything - if (sfield==null && (destArr==null || destArr.length==0)) { + if (sfield==null && (copyFields==null || copyFields.size()==0)) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"ERROR:unknown field '" + name + "'"); } } @@ -218,7 +219,7 @@ public class DocumentBuilder { sfield.getName() + ": " +field.getValue() ); } - SchemaField[] destArr = schema.getCopyFields(name); + final List copyFields = schema.getCopyFieldsList(name); // load each field value boolean hasField = false; @@ -246,8 +247,10 @@ public class DocumentBuilder { } } - // Add the copy fields - for( SchemaField sf : destArr ) { + // Check if we should copy this field to any other fields. + // This could happen whether it is explicit or not. + for( CopyField cf : copyFields ) { + SchemaField sf = cf.getDestination(); // check if the copy field is a multivalued or not if( !sf.multiValued() && out.get( sf.getName() ) != null ) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, @@ -256,7 +259,7 @@ public class DocumentBuilder { } used = true; - Field f = sf.createField( val, boost ); + Field f = sf.createField( cf.getLimitedValue( val ), boost ); if( f != null ) { // null fields are not added out.add( f ); } diff --git a/src/test/org/apache/solr/schema/CopyFieldTest.java b/src/test/org/apache/solr/schema/CopyFieldTest.java new file mode 100644 index 00000000000..192f0f2723d --- /dev/null +++ b/src/test/org/apache/solr/schema/CopyFieldTest.java @@ -0,0 +1,180 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.schema; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.MapSolrParams; +import org.apache.solr.core.SolrCore; +import org.apache.solr.request.LocalSolrQueryRequest; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.util.AbstractSolrTestCase; +import org.junit.Test; + +/** + * This is a simple test to make sure the CopyField works. + * It uses its own special schema file. + * + * @since solr 1.4 + */ +public class CopyFieldTest extends AbstractSolrTestCase { + + @Override + public String getSchemaFile() { + return "schema-copyfield-test.xml"; + } + + @Override + public String getSolrConfigFile() { + return "solrconfig.xml"; + } + + @Override + public void setUp() throws Exception { + super.setUp(); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + } + + @Test + public void testCopyFieldSchemaFieldSchemaField() { + try { + new CopyField(new SchemaField("source", new TextField()), null); + fail("CopyField failed with null SchemaField argument."); + } catch (IllegalArgumentException e) { + assertTrue(e.getLocalizedMessage().contains("can't be NULL")); + } + try { + new CopyField(null, new SchemaField("destination", new TextField())); + fail("CopyField failed with null SchemaField argument."); + } catch (IllegalArgumentException e) { + assertTrue(e.getLocalizedMessage().contains("can't be NULL")); + } + try { + new CopyField(null, null); + fail("CopyField failed with null SchemaField argument."); + } catch (IllegalArgumentException e) { + assertTrue(e.getLocalizedMessage().contains("can't be NULL")); + } + } + + @Test + public void testCopyFieldSchemaFieldSchemaFieldInt() { + try { + new CopyField(null, + new SchemaField("destination", new TextField()), 1000); + fail("CopyField failed with null SchemaField argument."); + } catch (IllegalArgumentException e) { + assertTrue(e.getLocalizedMessage().contains("can't be NULL")); + } + try { + new CopyField(new SchemaField("source", new TextField()), null, + 1000); + fail("CopyField failed with null SchemaField argument."); + } catch (IllegalArgumentException e) { + assertTrue(e.getLocalizedMessage().contains("can't be NULL")); + } + try { + new CopyField(null, null, 1000); + fail("CopyField failed with null SchemaField argument."); + } catch (IllegalArgumentException e) { + assertTrue(e.getLocalizedMessage().contains("can't be NULL")); + } + try { + new CopyField(new SchemaField("source", new TextField()), + new SchemaField("destination", new TextField()), -1000); + fail("CopyField failed with negative length argument."); + } catch (IllegalArgumentException e) { + assertTrue(e.getLocalizedMessage().contains( + "can't have a negative value")); + } + new CopyField(new SchemaField("source", new TextField()), + new SchemaField("destination", new TextField()), CopyField.UNLIMITED); + } + + @Test + public void testGetSource() { + final CopyField copyField = new CopyField(new SchemaField("source", + new TextField()), new SchemaField("destination", + new TextField()), 1000); + assertEquals("source", copyField.getSource().name); + } + + @Test + public void testGetDestination() { + final CopyField copyField = new CopyField(new SchemaField("source", + new TextField()), new SchemaField("destination", + new TextField()), 1000); + assertEquals("destination", copyField.getDestination().name); + } + + @Test + public void testGetMaxChars() { + final CopyField copyField = new CopyField(new SchemaField("source", + new TextField()), new SchemaField("destination", + new TextField()), 1000); + assertEquals(1000, copyField.getMaxChars()); + } + + @Test + public void testCopyFieldFunctionality() + { + SolrCore core = h.getCore(); + assertU(adoc("id", "10", "title", "test copy field", "text_en", "this is a simple test of the copy field functionality")); + assertU(commit()); + + Map args = new HashMap(); + args.put( CommonParams.Q, "text_en:simple" ); + args.put( "indent", "true" ); + SolrQueryRequest req = new LocalSolrQueryRequest( core, new MapSolrParams( args) ); + + assertQ("Make sure they got in", req + ,"//*[@numFound='1']" + ,"//result/doc[1]/int[@name='id'][.='10']" + ); + + args = new HashMap(); + args.put( CommonParams.Q, "highlight:simple" ); + args.put( "indent", "true" ); + req = new LocalSolrQueryRequest( core, new MapSolrParams( args) ); + assertQ("dynamic source", req + ,"//*[@numFound='1']" + ,"//result/doc[1]/int[@name='id'][.='10']" + ,"//result/doc[1]/arr[@name='highlight']/str[.='this is a simple test of ']" + ); + + args = new HashMap(); + args.put( CommonParams.Q, "text_en:functionality" ); + args.put( "indent", "true" ); + req = new LocalSolrQueryRequest( core, new MapSolrParams( args) ); + assertQ("Make sure they got in", req + ,"//*[@numFound='1']"); + + args = new HashMap(); + args.put( CommonParams.Q, "highlight:functionality" ); + args.put( "indent", "true" ); + req = new LocalSolrQueryRequest( core, new MapSolrParams( args) ); + assertQ("dynamic source", req + ,"//*[@numFound='0']"); + } +} diff --git a/src/test/test-files/solr/conf/schema-copyfield-test.xml b/src/test/test-files/solr/conf/schema-copyfield-test.xml new file mode 100644 index 00000000000..a03b2f1f1e3 --- /dev/null +++ b/src/test/test-files/solr/conf/schema-copyfield-test.xml @@ -0,0 +1,468 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + id + + + + + + + + + + + + + + + + + + + + + +