SOLR-262 -- Added toObject( Fieldable ) to FieldType. This lets the DocumentBuilder convert lucene Document to a SolrDocument. This patch also lets the DocumentBuilder convert a SolrInputDocument to a lucene Document.

The semantics of document conversion is not totally agreed upon yet.  This is commited so we have something to work with for other patches.

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@547493 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Ryan McKinley 2007-06-15 02:27:48 +00:00
parent ecdbcc62d5
commit 04e7c94b99
19 changed files with 289 additions and 1 deletions

View File

@ -30,7 +30,7 @@ Detailed Change List
New Features
1. SOLR-69: Adding MoreLikeThisHandler to search for similar documents using
lucene contrib/queries MoreLikeThis. MoreLikeThis is also avaliable from
lucene contrib/queries MoreLikeThis. MoreLikeThis is also available from
the StandardRequestHandler using ?mlt=true. (bdelacretaz, ryan)
2. SOLR-253: Adding KeepWordFilter and KeepWordFilterFactory. A TokenFilter
@ -54,6 +54,10 @@ New Features
also includes tests that start jetty and test a connection using the full
HTTP request cycle. (Darren Erik Vengroff, Will Johnson, ryan)
7. SOLR-262: Added toObject( Fieldable ) to FieldType. This lets the
DocumentBuilder convert lucene Document to a SolrDocument. This patch
also lets the DocumentBuilder convert a SolrInputDocument to a lucene
Document. (ryan)
Changes in runtime behavior

View File

@ -17,7 +17,10 @@
package org.apache.solr.common;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.Map;
/**
@ -25,6 +28,9 @@ import java.util.Map;
* a Lucene Document. Like the SolrDocument, the field values need to
* match those specified in schema.xml
*
* By default, this will keep every field value added to the document. To only
* keep distinct values, use setKeepDuplicateFieldValues( "fieldname", false);
*
* @author ryan
* @version $Id$
* @since solr 1.3
@ -32,6 +38,22 @@ import java.util.Map;
public class SolrInputDocument extends SolrDocument
{
private Map<String,Float> _boost = null;
private Map<String,Boolean> _keepDuplicates = null;
/**
* Return a base collection to manage the fields for a given value. If
* the field is defined to be "distinct", the field will be backed as
* a Set rather then a List. Adding the same value multiple times will
* only keep a single instance of that value.
*/
@Override
protected Collection<Object> getEmptyCollection( String name )
{
if( _keepDuplicates == null || Boolean.TRUE == _keepDuplicates.get( name )) {
return new ArrayList<Object>();
}
return new LinkedHashSet<Object>(); // keep the order? -- perhaps HashSet?
}
/**
* Remove all fields and boosts from the document
@ -43,6 +65,9 @@ public class SolrInputDocument extends SolrDocument
if( _boost != null ) {
_boost.clear();
}
if(_keepDuplicates != null ) {
_keepDuplicates.clear();
}
}
/**
@ -88,4 +113,34 @@ public class SolrInputDocument extends SolrDocument
}
return _boost.get( name );
}
/**
* Should the Document be able to contain duplicate values for the same field?
*
* By default, all field values are maintained. If you only want to distinct values
* set setKeepDuplicateFieldValues( "fieldname", false );
*
* To change the default behavior, use <code>null</code> as the fieldname.
*
* NOTE: this must be called before adding any values to the given field.
*/
public void setKeepDuplicateFieldValues( String name, boolean v )
{
if( this.getFieldValues( name ) != null ) {
// If it was not distinct and changed to distinct, we could, but this seems like a better rule
throw new RuntimeException( "You can't change a fields distinctness after it is initialized." );
}
if( _keepDuplicates == null ) {
if( v == true ) {
// we only care about 'false' we don't need to make a map unless
// something does not want multiple values
return;
}
_keepDuplicates = new HashMap<String, Boolean>();
}
_keepDuplicates.put( name, v );
}
}

View File

@ -43,6 +43,7 @@ public class BCDIntField extends FieldType {
}
public String toInternal(String val) {
// TODO? make sure each character is a digit?
return BCDUtils.base10toBase10kSortableInt(val);
}
@ -50,6 +51,12 @@ public class BCDIntField extends FieldType {
return indexedToReadable(f.stringValue());
}
// Note, this can't return type 'Integer' because BCDStrField and BCDLong extend it
@Override
public Object toObject(Fieldable f) {
return Integer.valueOf( toExternal(f) );
}
public String indexedToReadable(String indexedForm) {
return BCDUtils.base10kSortableIntToBase10(indexedForm);
}
@ -64,3 +71,6 @@ public class BCDIntField extends FieldType {
}

View File

@ -29,4 +29,9 @@ public class BCDLongField extends BCDIntField {
public void write(XMLWriter xmlWriter, String name, Fieldable f) throws IOException {
xmlWriter.writeLong(name,toExternal(f));
}
@Override
public Long toObject(Fieldable f) {
return Long.valueOf( toExternal(f) );
}
}

View File

@ -29,4 +29,13 @@ public class BCDStrField extends BCDIntField {
public void write(XMLWriter xmlWriter, String name, Fieldable f) throws IOException {
xmlWriter.writeStr(name,toExternal(f));
}
/**
* This only works for strings that represent an interger. If the string
* is not an integer, it will not survive the base10k conversion!
*/
@Override
public String toObject(Fieldable f) {
return toExternal(f);
}
}

View File

@ -89,6 +89,11 @@ public class BoolField extends FieldType {
return indexedToReadable(f.stringValue());
}
@Override
public Boolean toObject(Fieldable f) {
return Boolean.valueOf( toExternal(f) );
}
public String indexedToReadable(String indexedForm) {
char ch = indexedForm.charAt(0);
return ch=='T' ? "true" : "false";

View File

@ -120,6 +120,16 @@ public class DateField extends FieldType {
return indexedToReadable(f.stringValue());
}
@Override
public Date toObject(Fieldable f) {
try {
return getThreadLocalDateFormat().parse( toExternal(f) );
}
catch( ParseException ex ) {
throw new RuntimeException( ex );
}
}
public SortField getSortField(SchemaField field,boolean reverse) {
return getStringSort(field,reverse);
}

View File

@ -53,4 +53,10 @@ public class DoubleField extends FieldType {
public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
writer.writeDouble(name, f.stringValue());
}
@Override
public Double toObject(Fieldable f) {
return Double.valueOf( toExternal(f) );
}
}

View File

@ -235,6 +235,15 @@ public abstract class FieldType extends FieldProperties {
return f.stringValue();
}
/**
* Convert the stored-field format to an external object.
* @see #toInternal
* @since solr 1.3
*/
public Object toObject(Fieldable f) {
return toExternal(f); // by default use the string
}
/** :TODO: document this method */
public String indexedToReadable(String indexedForm) {
return indexedForm;

View File

@ -50,4 +50,9 @@ public class FloatField extends FieldType {
public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
writer.writeFloat(name, f.stringValue());
}
@Override
public Float toObject(Fieldable f) {
return Float.valueOf( toExternal(f) );
}
}

View File

@ -539,6 +539,7 @@ public final class IndexSchema {
destArr = (SchemaField[])append(destArr,d);
}
copyFields.put(source,destArr);
copyFieldTarget.add( d );
}
}
@ -898,6 +899,7 @@ public final class IndexSchema {
private final Map<String, SchemaField[]> copyFields = new HashMap<String,SchemaField[]>();
private DynamicCopy[] dynamicCopyFields;
private final Set<SchemaField> copyFieldTarget = new HashSet<SchemaField>();
/**
* Get all copy fields, both the static and the dynamic ones.
@ -932,6 +934,16 @@ public final class IndexSchema {
return results;
}
/**
* Check if a field is used as the destination of a copyField operation
*
* @since solr 1.3
*/
public boolean isCopyFieldTarget( SchemaField f )
{
return copyFieldTarget.contains( f );
}
/**
* Is the given field name a wildcard? I.e. does it begin or end with *?
* @param name
@ -948,3 +960,4 @@ public final class IndexSchema {

View File

@ -50,4 +50,9 @@ public class IntField extends FieldType {
public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
writer.writeInt(name, f.stringValue());
}
@Override
public Integer toObject(Fieldable f) {
return Integer.valueOf( toExternal(f) );
}
}

View File

@ -55,4 +55,9 @@ public class LongField extends FieldType {
public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
writer.writeLong(name, f.stringValue());
}
@Override
public Long toObject(Fieldable f) {
return Long.valueOf( toExternal(f) );
}
}

View File

@ -54,6 +54,11 @@ public class SortableDoubleField extends FieldType {
return indexedToReadable(f.stringValue());
}
@Override
public Double toObject(Fieldable f) {
return Double.valueOf( toExternal(f) );
}
public String indexedToReadable(String indexedForm) {
return NumberUtils.SortableStr2doubleStr(indexedForm);
}
@ -137,3 +142,6 @@ class SortableDoubleFieldSource extends FieldCacheSource {
}

View File

@ -54,6 +54,11 @@ public class SortableFloatField extends FieldType {
return indexedToReadable(f.stringValue());
}
@Override
public Float toObject(Fieldable f) {
return Float.valueOf( toExternal(f) );
}
public String indexedToReadable(String indexedForm) {
return NumberUtils.SortableStr2floatStr(indexedForm);
}
@ -134,3 +139,6 @@ class SortableFloatFieldSource extends FieldCacheSource {
};
}

View File

@ -61,6 +61,11 @@ public class SortableIntField extends FieldType {
return NumberUtils.SortableStr2int(indexedForm);
}
@Override
public Integer toObject(Fieldable f) {
return Integer.valueOf( toExternal(f) );
}
public void write(XMLWriter xmlWriter, String name, Fieldable f) throws IOException {
String sval = f.stringValue();
// since writeInt an int instead of a String since that may be more efficient

View File

@ -58,6 +58,11 @@ public class SortableLongField extends FieldType {
return indexedToReadable(f.stringValue());
}
@Override
public Long toObject(Fieldable f) {
return Long.valueOf( toExternal(f) );
}
public void write(XMLWriter xmlWriter, String name, Fieldable f) throws IOException {
String sval = f.stringValue();
xmlWriter.writeLong(name, NumberUtils.SortableStr2long(sval,0,sval.length()));

View File

@ -18,12 +18,17 @@
package org.apache.solr.update;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.schema.DateField;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
@ -63,6 +68,12 @@ public class DocumentBuilder {
+ ": first='" + oldValue + "' second='" + val + "'");
}
}
if( doc == null ) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
"must call startDoc() before adding fields!" );
}
// field.setBoost(boost);
doc.add(field);
}
@ -140,4 +151,65 @@ public class DocumentBuilder {
Document ret = doc; doc=null;
return ret;
}
/**
* Build a lucene document from a SolrInputDocument
*
* @since solr 1.3
*/
public Document build( SolrInputDocument doc )
{
this.startDoc();
for( String name : doc.getFieldNames() ) {
Float boost = doc.getBoost( name );
if( boost == null ) {
boost = new Float( 1 );
}
for( Object v : doc.getFieldValues( name ) ) {
if( v instanceof Date ) {
// Make sure to format dates
SchemaField sfield = schema.getField(name);
if( sfield.getType() instanceof DateField ) {
DateField df = (DateField)sfield.getType();
this.addField( name, df.toInternal( (Date)v )+'Z', boost );
continue;
}
}
this.addField( name, v==null ? null : v.toString(), boost );
}
}
// set the full document boost
Document luceneDoc = this.getDoc();
if( doc.getBoost( null ) != null ) {
luceneDoc.setBoost( doc.getBoost( null ) );
}
return luceneDoc;
}
/**
* Add fields from the solr document
*
* TODO: /!\ NOTE /!\ This semantics of this function are still in flux.
* Something somewhere needs to be able to fill up a SolrDocument from
* a lucene document - this is one place that may happen. It may also be
* moved to an independent function
*
* @since solr 1.3
*/
public SolrDocument loadStoredFields( SolrDocument doc, Document luceneDoc )
{
for( Object f : luceneDoc.getFields() ) {
Fieldable field = (Fieldable)f;
if( field.isStored() ) {
SchemaField sf = schema.getField( field.name() );
if( !schema.isCopyFieldTarget( sf ) ) {
doc.addField( field.name(), sf.getType().toObject( field ) );
}
}
}
return doc;
}
}

View File

@ -121,7 +121,56 @@ public class SolrDocumentTest extends TestCase
doc.addField( "v", c0 );
assertEquals( arr.length, doc.getFieldValues("v").size() );
}
public void testOrderedDistinctFields()
{
List<String> c0 = new ArrayList<String>();
c0.add( "aaa" );
c0.add( "bbb" );
c0.add( "aaa" );
c0.add( "aaa" );
c0.add( "ccc" );
SolrInputDocument doc = new SolrInputDocument();
doc.setKeepDuplicateFieldValues( null, false );
doc.addField( "v", c0 );
assertEquals( 3, doc.getFieldValues("v").size() );
assertEquals( "[aaa, bbb, ccc]", doc.getFieldValues( "v" ).toString() );
}
public void testDuplicate()
{
Float fval0 = new Float( 10.01f );
Float fval1 = new Float( 11.01f );
Float fval2 = new Float( 12.01f );
// Set up a simple document
SolrInputDocument doc = new SolrInputDocument();
for( int i=0; i<5; i++ ) {
doc.addField( "f", fval0 );
doc.addField( "f", fval1 );
doc.addField( "f", fval2 );
}
assertEquals( (3*5), doc.getFieldValues("f").size() );
try {
doc.setKeepDuplicateFieldValues( "f", false );
fail( "can't change distinct for an existing field" );
}
catch( Exception ex ) {}
doc.removeFields( "f" );
doc.setKeepDuplicateFieldValues( "f", false );
for( int i=0; i<5; i++ ) {
doc.addField( "f", fval0 );
doc.addField( "f", fval1 );
doc.addField( "f", fval2 );
}
assertEquals( (3), doc.getFieldValues("f").size() );
}
}