diff --git a/CHANGES.txt b/CHANGES.txt index 307a7d741f0..76e3d054a4e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -30,7 +30,7 @@ Detailed Change List New Features 1. SOLR-69: Adding MoreLikeThisHandler to search for similar documents using - lucene contrib/queries MoreLikeThis. MoreLikeThis is also avaliable from + lucene contrib/queries MoreLikeThis. MoreLikeThis is also available from the StandardRequestHandler using ?mlt=true. (bdelacretaz, ryan) 2. SOLR-253: Adding KeepWordFilter and KeepWordFilterFactory. A TokenFilter @@ -54,6 +54,10 @@ New Features also includes tests that start jetty and test a connection using the full HTTP request cycle. (Darren Erik Vengroff, Will Johnson, ryan) + 7. SOLR-262: Added toObject( Fieldable ) to FieldType. This lets the + DocumentBuilder convert lucene Document to a SolrDocument. This patch + also lets the DocumentBuilder convert a SolrInputDocument to a lucene + Document. (ryan) Changes in runtime behavior diff --git a/src/java/org/apache/solr/common/SolrInputDocument.java b/src/java/org/apache/solr/common/SolrInputDocument.java index 47e392e0509..25a541f55e1 100644 --- a/src/java/org/apache/solr/common/SolrInputDocument.java +++ b/src/java/org/apache/solr/common/SolrInputDocument.java @@ -17,7 +17,10 @@ package org.apache.solr.common; +import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; +import java.util.LinkedHashSet; import java.util.Map; /** @@ -25,6 +28,9 @@ import java.util.Map; * a Lucene Document. Like the SolrDocument, the field values need to * match those specified in schema.xml * + * By default, this will keep every field value added to the document. To only + * keep distinct values, use setKeepDuplicateFieldValues( "fieldname", false); + * * @author ryan * @version $Id$ * @since solr 1.3 @@ -32,6 +38,22 @@ import java.util.Map; public class SolrInputDocument extends SolrDocument { private Map _boost = null; + private Map _keepDuplicates = null; + + /** + * Return a base collection to manage the fields for a given value. If + * the field is defined to be "distinct", the field will be backed as + * a Set rather then a List. Adding the same value multiple times will + * only keep a single instance of that value. + */ + @Override + protected Collection getEmptyCollection( String name ) + { + if( _keepDuplicates == null || Boolean.TRUE == _keepDuplicates.get( name )) { + return new ArrayList(); + } + return new LinkedHashSet(); // keep the order? -- perhaps HashSet? + } /** * Remove all fields and boosts from the document @@ -43,6 +65,9 @@ public class SolrInputDocument extends SolrDocument if( _boost != null ) { _boost.clear(); } + if(_keepDuplicates != null ) { + _keepDuplicates.clear(); + } } /** @@ -88,4 +113,34 @@ public class SolrInputDocument extends SolrDocument } return _boost.get( name ); } + + + /** + * Should the Document be able to contain duplicate values for the same field? + * + * By default, all field values are maintained. If you only want to distinct values + * set setKeepDuplicateFieldValues( "fieldname", false ); + * + * To change the default behavior, use null as the fieldname. + * + * NOTE: this must be called before adding any values to the given field. + */ + public void setKeepDuplicateFieldValues( String name, boolean v ) + { + if( this.getFieldValues( name ) != null ) { + // If it was not distinct and changed to distinct, we could, but this seems like a better rule + throw new RuntimeException( "You can't change a fields distinctness after it is initialized." ); + } + + if( _keepDuplicates == null ) { + if( v == true ) { + // we only care about 'false' we don't need to make a map unless + // something does not want multiple values + return; + } + _keepDuplicates = new HashMap(); + } + _keepDuplicates.put( name, v ); + } + } diff --git a/src/java/org/apache/solr/schema/BCDIntField.java b/src/java/org/apache/solr/schema/BCDIntField.java index 181df07cdd9..b2267244c42 100644 --- a/src/java/org/apache/solr/schema/BCDIntField.java +++ b/src/java/org/apache/solr/schema/BCDIntField.java @@ -43,12 +43,19 @@ public class BCDIntField extends FieldType { } public String toInternal(String val) { + // TODO? make sure each character is a digit? return BCDUtils.base10toBase10kSortableInt(val); } public String toExternal(Fieldable f) { return indexedToReadable(f.stringValue()); } + + // Note, this can't return type 'Integer' because BCDStrField and BCDLong extend it + @Override + public Object toObject(Fieldable f) { + return Integer.valueOf( toExternal(f) ); + } public String indexedToReadable(String indexedForm) { return BCDUtils.base10kSortableIntToBase10(indexedForm); @@ -64,3 +71,6 @@ public class BCDIntField extends FieldType { } + + + diff --git a/src/java/org/apache/solr/schema/BCDLongField.java b/src/java/org/apache/solr/schema/BCDLongField.java index 398d39b6a2f..297bc1ea99c 100644 --- a/src/java/org/apache/solr/schema/BCDLongField.java +++ b/src/java/org/apache/solr/schema/BCDLongField.java @@ -29,4 +29,9 @@ public class BCDLongField extends BCDIntField { public void write(XMLWriter xmlWriter, String name, Fieldable f) throws IOException { xmlWriter.writeLong(name,toExternal(f)); } + + @Override + public Long toObject(Fieldable f) { + return Long.valueOf( toExternal(f) ); + } } diff --git a/src/java/org/apache/solr/schema/BCDStrField.java b/src/java/org/apache/solr/schema/BCDStrField.java index 4b211e3221c..0531f31b212 100644 --- a/src/java/org/apache/solr/schema/BCDStrField.java +++ b/src/java/org/apache/solr/schema/BCDStrField.java @@ -29,4 +29,13 @@ public class BCDStrField extends BCDIntField { public void write(XMLWriter xmlWriter, String name, Fieldable f) throws IOException { xmlWriter.writeStr(name,toExternal(f)); } + + /** + * This only works for strings that represent an interger. If the string + * is not an integer, it will not survive the base10k conversion! + */ + @Override + public String toObject(Fieldable f) { + return toExternal(f); + } } diff --git a/src/java/org/apache/solr/schema/BoolField.java b/src/java/org/apache/solr/schema/BoolField.java index e2aa1f602e6..61b35dad02e 100644 --- a/src/java/org/apache/solr/schema/BoolField.java +++ b/src/java/org/apache/solr/schema/BoolField.java @@ -89,6 +89,11 @@ public class BoolField extends FieldType { return indexedToReadable(f.stringValue()); } + @Override + public Boolean toObject(Fieldable f) { + return Boolean.valueOf( toExternal(f) ); + } + public String indexedToReadable(String indexedForm) { char ch = indexedForm.charAt(0); return ch=='T' ? "true" : "false"; diff --git a/src/java/org/apache/solr/schema/DateField.java b/src/java/org/apache/solr/schema/DateField.java index 31a498d72e7..60ac74fcd33 100644 --- a/src/java/org/apache/solr/schema/DateField.java +++ b/src/java/org/apache/solr/schema/DateField.java @@ -120,6 +120,16 @@ public class DateField extends FieldType { return indexedToReadable(f.stringValue()); } + @Override + public Date toObject(Fieldable f) { + try { + return getThreadLocalDateFormat().parse( toExternal(f) ); + } + catch( ParseException ex ) { + throw new RuntimeException( ex ); + } + } + public SortField getSortField(SchemaField field,boolean reverse) { return getStringSort(field,reverse); } diff --git a/src/java/org/apache/solr/schema/DoubleField.java b/src/java/org/apache/solr/schema/DoubleField.java index 323a914d399..701542e698e 100644 --- a/src/java/org/apache/solr/schema/DoubleField.java +++ b/src/java/org/apache/solr/schema/DoubleField.java @@ -53,4 +53,10 @@ public class DoubleField extends FieldType { public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { writer.writeDouble(name, f.stringValue()); } + + + @Override + public Double toObject(Fieldable f) { + return Double.valueOf( toExternal(f) ); + } } diff --git a/src/java/org/apache/solr/schema/FieldType.java b/src/java/org/apache/solr/schema/FieldType.java index 437cac6e4c4..bc2c14cc82c 100644 --- a/src/java/org/apache/solr/schema/FieldType.java +++ b/src/java/org/apache/solr/schema/FieldType.java @@ -235,6 +235,15 @@ public abstract class FieldType extends FieldProperties { return f.stringValue(); } + /** + * Convert the stored-field format to an external object. + * @see #toInternal + * @since solr 1.3 + */ + public Object toObject(Fieldable f) { + return toExternal(f); // by default use the string + } + /** :TODO: document this method */ public String indexedToReadable(String indexedForm) { return indexedForm; diff --git a/src/java/org/apache/solr/schema/FloatField.java b/src/java/org/apache/solr/schema/FloatField.java index b68c2c590a3..bb91e25101e 100644 --- a/src/java/org/apache/solr/schema/FloatField.java +++ b/src/java/org/apache/solr/schema/FloatField.java @@ -50,4 +50,9 @@ public class FloatField extends FieldType { public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { writer.writeFloat(name, f.stringValue()); } + + @Override + public Float toObject(Fieldable f) { + return Float.valueOf( toExternal(f) ); + } } diff --git a/src/java/org/apache/solr/schema/IndexSchema.java b/src/java/org/apache/solr/schema/IndexSchema.java index 84bf3363331..85cd51df36f 100644 --- a/src/java/org/apache/solr/schema/IndexSchema.java +++ b/src/java/org/apache/solr/schema/IndexSchema.java @@ -539,6 +539,7 @@ public final class IndexSchema { destArr = (SchemaField[])append(destArr,d); } copyFields.put(source,destArr); + copyFieldTarget.add( d ); } } @@ -898,6 +899,7 @@ public final class IndexSchema { private final Map copyFields = new HashMap(); private DynamicCopy[] dynamicCopyFields; + private final Set copyFieldTarget = new HashSet(); /** * Get all copy fields, both the static and the dynamic ones. @@ -931,6 +933,16 @@ public final class IndexSchema { return results; } + + /** + * Check if a field is used as the destination of a copyField operation + * + * @since solr 1.3 + */ + public boolean isCopyFieldTarget( SchemaField f ) + { + return copyFieldTarget.contains( f ); + } /** * Is the given field name a wildcard? I.e. does it begin or end with *? @@ -948,3 +960,4 @@ public final class IndexSchema { + diff --git a/src/java/org/apache/solr/schema/IntField.java b/src/java/org/apache/solr/schema/IntField.java index 0a9eb1aa02c..31cf18cc71b 100644 --- a/src/java/org/apache/solr/schema/IntField.java +++ b/src/java/org/apache/solr/schema/IntField.java @@ -50,4 +50,9 @@ public class IntField extends FieldType { public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { writer.writeInt(name, f.stringValue()); } + + @Override + public Integer toObject(Fieldable f) { + return Integer.valueOf( toExternal(f) ); + } } diff --git a/src/java/org/apache/solr/schema/LongField.java b/src/java/org/apache/solr/schema/LongField.java index fb31d937f9e..c0b33fc9dd7 100644 --- a/src/java/org/apache/solr/schema/LongField.java +++ b/src/java/org/apache/solr/schema/LongField.java @@ -55,4 +55,9 @@ public class LongField extends FieldType { public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { writer.writeLong(name, f.stringValue()); } + + @Override + public Long toObject(Fieldable f) { + return Long.valueOf( toExternal(f) ); + } } diff --git a/src/java/org/apache/solr/schema/SortableDoubleField.java b/src/java/org/apache/solr/schema/SortableDoubleField.java index ba2cb978144..5ff1ea53cd1 100644 --- a/src/java/org/apache/solr/schema/SortableDoubleField.java +++ b/src/java/org/apache/solr/schema/SortableDoubleField.java @@ -54,6 +54,11 @@ public class SortableDoubleField extends FieldType { return indexedToReadable(f.stringValue()); } + @Override + public Double toObject(Fieldable f) { + return Double.valueOf( toExternal(f) ); + } + public String indexedToReadable(String indexedForm) { return NumberUtils.SortableStr2doubleStr(indexedForm); } @@ -137,3 +142,6 @@ class SortableDoubleFieldSource extends FieldCacheSource { } + + + diff --git a/src/java/org/apache/solr/schema/SortableFloatField.java b/src/java/org/apache/solr/schema/SortableFloatField.java index f6c8736fb17..3db8e119a2b 100644 --- a/src/java/org/apache/solr/schema/SortableFloatField.java +++ b/src/java/org/apache/solr/schema/SortableFloatField.java @@ -54,6 +54,11 @@ public class SortableFloatField extends FieldType { return indexedToReadable(f.stringValue()); } + @Override + public Float toObject(Fieldable f) { + return Float.valueOf( toExternal(f) ); + } + public String indexedToReadable(String indexedForm) { return NumberUtils.SortableStr2floatStr(indexedForm); } @@ -134,3 +139,6 @@ class SortableFloatFieldSource extends FieldCacheSource { }; } + + + diff --git a/src/java/org/apache/solr/schema/SortableIntField.java b/src/java/org/apache/solr/schema/SortableIntField.java index 1c1e6d54132..df1f2bf8baf 100644 --- a/src/java/org/apache/solr/schema/SortableIntField.java +++ b/src/java/org/apache/solr/schema/SortableIntField.java @@ -61,6 +61,11 @@ public class SortableIntField extends FieldType { return NumberUtils.SortableStr2int(indexedForm); } + @Override + public Integer toObject(Fieldable f) { + return Integer.valueOf( toExternal(f) ); + } + public void write(XMLWriter xmlWriter, String name, Fieldable f) throws IOException { String sval = f.stringValue(); // since writeInt an int instead of a String since that may be more efficient diff --git a/src/java/org/apache/solr/schema/SortableLongField.java b/src/java/org/apache/solr/schema/SortableLongField.java index 4c543a6a3fa..1e8ea160c21 100644 --- a/src/java/org/apache/solr/schema/SortableLongField.java +++ b/src/java/org/apache/solr/schema/SortableLongField.java @@ -58,6 +58,11 @@ public class SortableLongField extends FieldType { return indexedToReadable(f.stringValue()); } + @Override + public Long toObject(Fieldable f) { + return Long.valueOf( toExternal(f) ); + } + public void write(XMLWriter xmlWriter, String name, Fieldable f) throws IOException { String sval = f.stringValue(); xmlWriter.writeLong(name, NumberUtils.SortableStr2long(sval,0,sval.length())); diff --git a/src/java/org/apache/solr/update/DocumentBuilder.java b/src/java/org/apache/solr/update/DocumentBuilder.java index dd4a3f0f284..82cab3380d4 100644 --- a/src/java/org/apache/solr/update/DocumentBuilder.java +++ b/src/java/org/apache/solr/update/DocumentBuilder.java @@ -18,12 +18,17 @@ package org.apache.solr.update; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; +import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.schema.DateField; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; @@ -63,6 +68,12 @@ public class DocumentBuilder { + ": first='" + oldValue + "' second='" + val + "'"); } } + + if( doc == null ) { + throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, + "must call startDoc() before adding fields!" ); + } + // field.setBoost(boost); doc.add(field); } @@ -140,4 +151,65 @@ public class DocumentBuilder { Document ret = doc; doc=null; return ret; } + + /** + * Build a lucene document from a SolrInputDocument + * + * @since solr 1.3 + */ + public Document build( SolrInputDocument doc ) + { + this.startDoc(); + + for( String name : doc.getFieldNames() ) { + Float boost = doc.getBoost( name ); + if( boost == null ) { + boost = new Float( 1 ); + } + + for( Object v : doc.getFieldValues( name ) ) { + if( v instanceof Date ) { + // Make sure to format dates + SchemaField sfield = schema.getField(name); + if( sfield.getType() instanceof DateField ) { + DateField df = (DateField)sfield.getType(); + this.addField( name, df.toInternal( (Date)v )+'Z', boost ); + continue; + } + } + this.addField( name, v==null ? null : v.toString(), boost ); + } + } + + // set the full document boost + Document luceneDoc = this.getDoc(); + if( doc.getBoost( null ) != null ) { + luceneDoc.setBoost( doc.getBoost( null ) ); + } + return luceneDoc; + } + + /** + * Add fields from the solr document + * + * TODO: /!\ NOTE /!\ This semantics of this function are still in flux. + * Something somewhere needs to be able to fill up a SolrDocument from + * a lucene document - this is one place that may happen. It may also be + * moved to an independent function + * + * @since solr 1.3 + */ + public SolrDocument loadStoredFields( SolrDocument doc, Document luceneDoc ) + { + for( Object f : luceneDoc.getFields() ) { + Fieldable field = (Fieldable)f; + if( field.isStored() ) { + SchemaField sf = schema.getField( field.name() ); + if( !schema.isCopyFieldTarget( sf ) ) { + doc.addField( field.name(), sf.getType().toObject( field ) ); + } + } + } + return doc; + } } diff --git a/src/test/org/apache/solr/common/SolrDocumentTest.java b/src/test/org/apache/solr/common/SolrDocumentTest.java index f9f1d184448..4105eaa6da7 100644 --- a/src/test/org/apache/solr/common/SolrDocumentTest.java +++ b/src/test/org/apache/solr/common/SolrDocumentTest.java @@ -121,7 +121,56 @@ public class SolrDocumentTest extends TestCase doc.addField( "v", c0 ); assertEquals( arr.length, doc.getFieldValues("v").size() ); } + + public void testOrderedDistinctFields() + { + List c0 = new ArrayList(); + c0.add( "aaa" ); + c0.add( "bbb" ); + c0.add( "aaa" ); + c0.add( "aaa" ); + c0.add( "ccc" ); + + SolrInputDocument doc = new SolrInputDocument(); + doc.setKeepDuplicateFieldValues( null, false ); + doc.addField( "v", c0 ); + assertEquals( 3, doc.getFieldValues("v").size() ); + + assertEquals( "[aaa, bbb, ccc]", doc.getFieldValues( "v" ).toString() ); + } + + public void testDuplicate() + { + Float fval0 = new Float( 10.01f ); + Float fval1 = new Float( 11.01f ); + Float fval2 = new Float( 12.01f ); + + // Set up a simple document + SolrInputDocument doc = new SolrInputDocument(); + for( int i=0; i<5; i++ ) { + doc.addField( "f", fval0 ); + doc.addField( "f", fval1 ); + doc.addField( "f", fval2 ); + } + assertEquals( (3*5), doc.getFieldValues("f").size() ); + + try { + doc.setKeepDuplicateFieldValues( "f", false ); + fail( "can't change distinct for an existing field" ); + } + catch( Exception ex ) {} + + doc.removeFields( "f" ); + doc.setKeepDuplicateFieldValues( "f", false ); + for( int i=0; i<5; i++ ) { + doc.addField( "f", fval0 ); + doc.addField( "f", fval1 ); + doc.addField( "f", fval2 ); + } + assertEquals( (3), doc.getFieldValues("f").size() ); + } } +