mirror of https://github.com/apache/lucene.git
SOLR-262 -- Added toObject( Fieldable ) to FieldType. This lets the DocumentBuilder convert lucene Document to a SolrDocument. This patch also lets the DocumentBuilder convert a SolrInputDocument to a lucene Document.
The semantics of document conversion is not totally agreed upon yet. This is commited so we have something to work with for other patches. git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@547493 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ecdbcc62d5
commit
04e7c94b99
|
@ -30,7 +30,7 @@ Detailed Change List
|
|||
|
||||
New Features
|
||||
1. SOLR-69: Adding MoreLikeThisHandler to search for similar documents using
|
||||
lucene contrib/queries MoreLikeThis. MoreLikeThis is also avaliable from
|
||||
lucene contrib/queries MoreLikeThis. MoreLikeThis is also available from
|
||||
the StandardRequestHandler using ?mlt=true. (bdelacretaz, ryan)
|
||||
|
||||
2. SOLR-253: Adding KeepWordFilter and KeepWordFilterFactory. A TokenFilter
|
||||
|
@ -54,6 +54,10 @@ New Features
|
|||
also includes tests that start jetty and test a connection using the full
|
||||
HTTP request cycle. (Darren Erik Vengroff, Will Johnson, ryan)
|
||||
|
||||
7. SOLR-262: Added toObject( Fieldable ) to FieldType. This lets the
|
||||
DocumentBuilder convert lucene Document to a SolrDocument. This patch
|
||||
also lets the DocumentBuilder convert a SolrInputDocument to a lucene
|
||||
Document. (ryan)
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
|
|
|
@ -17,7 +17,10 @@
|
|||
|
||||
package org.apache.solr.common;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
|
@ -25,6 +28,9 @@ import java.util.Map;
|
|||
* a Lucene Document. Like the SolrDocument, the field values need to
|
||||
* match those specified in schema.xml
|
||||
*
|
||||
* By default, this will keep every field value added to the document. To only
|
||||
* keep distinct values, use setKeepDuplicateFieldValues( "fieldname", false);
|
||||
*
|
||||
* @author ryan
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
|
@ -32,6 +38,22 @@ import java.util.Map;
|
|||
public class SolrInputDocument extends SolrDocument
|
||||
{
|
||||
private Map<String,Float> _boost = null;
|
||||
private Map<String,Boolean> _keepDuplicates = null;
|
||||
|
||||
/**
|
||||
* Return a base collection to manage the fields for a given value. If
|
||||
* the field is defined to be "distinct", the field will be backed as
|
||||
* a Set rather then a List. Adding the same value multiple times will
|
||||
* only keep a single instance of that value.
|
||||
*/
|
||||
@Override
|
||||
protected Collection<Object> getEmptyCollection( String name )
|
||||
{
|
||||
if( _keepDuplicates == null || Boolean.TRUE == _keepDuplicates.get( name )) {
|
||||
return new ArrayList<Object>();
|
||||
}
|
||||
return new LinkedHashSet<Object>(); // keep the order? -- perhaps HashSet?
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove all fields and boosts from the document
|
||||
|
@ -43,6 +65,9 @@ public class SolrInputDocument extends SolrDocument
|
|||
if( _boost != null ) {
|
||||
_boost.clear();
|
||||
}
|
||||
if(_keepDuplicates != null ) {
|
||||
_keepDuplicates.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -88,4 +113,34 @@ public class SolrInputDocument extends SolrDocument
|
|||
}
|
||||
return _boost.get( name );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Should the Document be able to contain duplicate values for the same field?
|
||||
*
|
||||
* By default, all field values are maintained. If you only want to distinct values
|
||||
* set setKeepDuplicateFieldValues( "fieldname", false );
|
||||
*
|
||||
* To change the default behavior, use <code>null</code> as the fieldname.
|
||||
*
|
||||
* NOTE: this must be called before adding any values to the given field.
|
||||
*/
|
||||
public void setKeepDuplicateFieldValues( String name, boolean v )
|
||||
{
|
||||
if( this.getFieldValues( name ) != null ) {
|
||||
// If it was not distinct and changed to distinct, we could, but this seems like a better rule
|
||||
throw new RuntimeException( "You can't change a fields distinctness after it is initialized." );
|
||||
}
|
||||
|
||||
if( _keepDuplicates == null ) {
|
||||
if( v == true ) {
|
||||
// we only care about 'false' we don't need to make a map unless
|
||||
// something does not want multiple values
|
||||
return;
|
||||
}
|
||||
_keepDuplicates = new HashMap<String, Boolean>();
|
||||
}
|
||||
_keepDuplicates.put( name, v );
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -43,12 +43,19 @@ public class BCDIntField extends FieldType {
|
|||
}
|
||||
|
||||
public String toInternal(String val) {
|
||||
// TODO? make sure each character is a digit?
|
||||
return BCDUtils.base10toBase10kSortableInt(val);
|
||||
}
|
||||
|
||||
public String toExternal(Fieldable f) {
|
||||
return indexedToReadable(f.stringValue());
|
||||
}
|
||||
|
||||
// Note, this can't return type 'Integer' because BCDStrField and BCDLong extend it
|
||||
@Override
|
||||
public Object toObject(Fieldable f) {
|
||||
return Integer.valueOf( toExternal(f) );
|
||||
}
|
||||
|
||||
public String indexedToReadable(String indexedForm) {
|
||||
return BCDUtils.base10kSortableIntToBase10(indexedForm);
|
||||
|
@ -64,3 +71,6 @@ public class BCDIntField extends FieldType {
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -29,4 +29,9 @@ public class BCDLongField extends BCDIntField {
|
|||
public void write(XMLWriter xmlWriter, String name, Fieldable f) throws IOException {
|
||||
xmlWriter.writeLong(name,toExternal(f));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long toObject(Fieldable f) {
|
||||
return Long.valueOf( toExternal(f) );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,4 +29,13 @@ public class BCDStrField extends BCDIntField {
|
|||
public void write(XMLWriter xmlWriter, String name, Fieldable f) throws IOException {
|
||||
xmlWriter.writeStr(name,toExternal(f));
|
||||
}
|
||||
|
||||
/**
|
||||
* This only works for strings that represent an interger. If the string
|
||||
* is not an integer, it will not survive the base10k conversion!
|
||||
*/
|
||||
@Override
|
||||
public String toObject(Fieldable f) {
|
||||
return toExternal(f);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -89,6 +89,11 @@ public class BoolField extends FieldType {
|
|||
return indexedToReadable(f.stringValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean toObject(Fieldable f) {
|
||||
return Boolean.valueOf( toExternal(f) );
|
||||
}
|
||||
|
||||
public String indexedToReadable(String indexedForm) {
|
||||
char ch = indexedForm.charAt(0);
|
||||
return ch=='T' ? "true" : "false";
|
||||
|
|
|
@ -120,6 +120,16 @@ public class DateField extends FieldType {
|
|||
return indexedToReadable(f.stringValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Date toObject(Fieldable f) {
|
||||
try {
|
||||
return getThreadLocalDateFormat().parse( toExternal(f) );
|
||||
}
|
||||
catch( ParseException ex ) {
|
||||
throw new RuntimeException( ex );
|
||||
}
|
||||
}
|
||||
|
||||
public SortField getSortField(SchemaField field,boolean reverse) {
|
||||
return getStringSort(field,reverse);
|
||||
}
|
||||
|
|
|
@ -53,4 +53,10 @@ public class DoubleField extends FieldType {
|
|||
public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
|
||||
writer.writeDouble(name, f.stringValue());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Double toObject(Fieldable f) {
|
||||
return Double.valueOf( toExternal(f) );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -235,6 +235,15 @@ public abstract class FieldType extends FieldProperties {
|
|||
return f.stringValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the stored-field format to an external object.
|
||||
* @see #toInternal
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public Object toObject(Fieldable f) {
|
||||
return toExternal(f); // by default use the string
|
||||
}
|
||||
|
||||
/** :TODO: document this method */
|
||||
public String indexedToReadable(String indexedForm) {
|
||||
return indexedForm;
|
||||
|
|
|
@ -50,4 +50,9 @@ public class FloatField extends FieldType {
|
|||
public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
|
||||
writer.writeFloat(name, f.stringValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Float toObject(Fieldable f) {
|
||||
return Float.valueOf( toExternal(f) );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -539,6 +539,7 @@ public final class IndexSchema {
|
|||
destArr = (SchemaField[])append(destArr,d);
|
||||
}
|
||||
copyFields.put(source,destArr);
|
||||
copyFieldTarget.add( d );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -898,6 +899,7 @@ public final class IndexSchema {
|
|||
|
||||
private final Map<String, SchemaField[]> copyFields = new HashMap<String,SchemaField[]>();
|
||||
private DynamicCopy[] dynamicCopyFields;
|
||||
private final Set<SchemaField> copyFieldTarget = new HashSet<SchemaField>();
|
||||
|
||||
/**
|
||||
* Get all copy fields, both the static and the dynamic ones.
|
||||
|
@ -931,6 +933,16 @@ public final class IndexSchema {
|
|||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a field is used as the destination of a copyField operation
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public boolean isCopyFieldTarget( SchemaField f )
|
||||
{
|
||||
return copyFieldTarget.contains( f );
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the given field name a wildcard? I.e. does it begin or end with *?
|
||||
|
@ -948,3 +960,4 @@ public final class IndexSchema {
|
|||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -50,4 +50,9 @@ public class IntField extends FieldType {
|
|||
public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
|
||||
writer.writeInt(name, f.stringValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer toObject(Fieldable f) {
|
||||
return Integer.valueOf( toExternal(f) );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,4 +55,9 @@ public class LongField extends FieldType {
|
|||
public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
|
||||
writer.writeLong(name, f.stringValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long toObject(Fieldable f) {
|
||||
return Long.valueOf( toExternal(f) );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,6 +54,11 @@ public class SortableDoubleField extends FieldType {
|
|||
return indexedToReadable(f.stringValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Double toObject(Fieldable f) {
|
||||
return Double.valueOf( toExternal(f) );
|
||||
}
|
||||
|
||||
public String indexedToReadable(String indexedForm) {
|
||||
return NumberUtils.SortableStr2doubleStr(indexedForm);
|
||||
}
|
||||
|
@ -137,3 +142,6 @@ class SortableDoubleFieldSource extends FieldCacheSource {
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -54,6 +54,11 @@ public class SortableFloatField extends FieldType {
|
|||
return indexedToReadable(f.stringValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Float toObject(Fieldable f) {
|
||||
return Float.valueOf( toExternal(f) );
|
||||
}
|
||||
|
||||
public String indexedToReadable(String indexedForm) {
|
||||
return NumberUtils.SortableStr2floatStr(indexedForm);
|
||||
}
|
||||
|
@ -134,3 +139,6 @@ class SortableFloatFieldSource extends FieldCacheSource {
|
|||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -61,6 +61,11 @@ public class SortableIntField extends FieldType {
|
|||
return NumberUtils.SortableStr2int(indexedForm);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer toObject(Fieldable f) {
|
||||
return Integer.valueOf( toExternal(f) );
|
||||
}
|
||||
|
||||
public void write(XMLWriter xmlWriter, String name, Fieldable f) throws IOException {
|
||||
String sval = f.stringValue();
|
||||
// since writeInt an int instead of a String since that may be more efficient
|
||||
|
|
|
@ -58,6 +58,11 @@ public class SortableLongField extends FieldType {
|
|||
return indexedToReadable(f.stringValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long toObject(Fieldable f) {
|
||||
return Long.valueOf( toExternal(f) );
|
||||
}
|
||||
|
||||
public void write(XMLWriter xmlWriter, String name, Fieldable f) throws IOException {
|
||||
String sval = f.stringValue();
|
||||
xmlWriter.writeLong(name, NumberUtils.SortableStr2long(sval,0,sval.length()));
|
||||
|
|
|
@ -18,12 +18,17 @@
|
|||
package org.apache.solr.update;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.schema.DateField;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
|
||||
|
@ -63,6 +68,12 @@ public class DocumentBuilder {
|
|||
+ ": first='" + oldValue + "' second='" + val + "'");
|
||||
}
|
||||
}
|
||||
|
||||
if( doc == null ) {
|
||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
|
||||
"must call startDoc() before adding fields!" );
|
||||
}
|
||||
|
||||
// field.setBoost(boost);
|
||||
doc.add(field);
|
||||
}
|
||||
|
@ -140,4 +151,65 @@ public class DocumentBuilder {
|
|||
Document ret = doc; doc=null;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a lucene document from a SolrInputDocument
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public Document build( SolrInputDocument doc )
|
||||
{
|
||||
this.startDoc();
|
||||
|
||||
for( String name : doc.getFieldNames() ) {
|
||||
Float boost = doc.getBoost( name );
|
||||
if( boost == null ) {
|
||||
boost = new Float( 1 );
|
||||
}
|
||||
|
||||
for( Object v : doc.getFieldValues( name ) ) {
|
||||
if( v instanceof Date ) {
|
||||
// Make sure to format dates
|
||||
SchemaField sfield = schema.getField(name);
|
||||
if( sfield.getType() instanceof DateField ) {
|
||||
DateField df = (DateField)sfield.getType();
|
||||
this.addField( name, df.toInternal( (Date)v )+'Z', boost );
|
||||
continue;
|
||||
}
|
||||
}
|
||||
this.addField( name, v==null ? null : v.toString(), boost );
|
||||
}
|
||||
}
|
||||
|
||||
// set the full document boost
|
||||
Document luceneDoc = this.getDoc();
|
||||
if( doc.getBoost( null ) != null ) {
|
||||
luceneDoc.setBoost( doc.getBoost( null ) );
|
||||
}
|
||||
return luceneDoc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add fields from the solr document
|
||||
*
|
||||
* TODO: /!\ NOTE /!\ This semantics of this function are still in flux.
|
||||
* Something somewhere needs to be able to fill up a SolrDocument from
|
||||
* a lucene document - this is one place that may happen. It may also be
|
||||
* moved to an independent function
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public SolrDocument loadStoredFields( SolrDocument doc, Document luceneDoc )
|
||||
{
|
||||
for( Object f : luceneDoc.getFields() ) {
|
||||
Fieldable field = (Fieldable)f;
|
||||
if( field.isStored() ) {
|
||||
SchemaField sf = schema.getField( field.name() );
|
||||
if( !schema.isCopyFieldTarget( sf ) ) {
|
||||
doc.addField( field.name(), sf.getType().toObject( field ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -121,7 +121,56 @@ public class SolrDocumentTest extends TestCase
|
|||
doc.addField( "v", c0 );
|
||||
assertEquals( arr.length, doc.getFieldValues("v").size() );
|
||||
}
|
||||
|
||||
public void testOrderedDistinctFields()
|
||||
{
|
||||
List<String> c0 = new ArrayList<String>();
|
||||
c0.add( "aaa" );
|
||||
c0.add( "bbb" );
|
||||
c0.add( "aaa" );
|
||||
c0.add( "aaa" );
|
||||
c0.add( "ccc" );
|
||||
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.setKeepDuplicateFieldValues( null, false );
|
||||
doc.addField( "v", c0 );
|
||||
assertEquals( 3, doc.getFieldValues("v").size() );
|
||||
|
||||
assertEquals( "[aaa, bbb, ccc]", doc.getFieldValues( "v" ).toString() );
|
||||
}
|
||||
|
||||
public void testDuplicate()
|
||||
{
|
||||
Float fval0 = new Float( 10.01f );
|
||||
Float fval1 = new Float( 11.01f );
|
||||
Float fval2 = new Float( 12.01f );
|
||||
|
||||
// Set up a simple document
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
for( int i=0; i<5; i++ ) {
|
||||
doc.addField( "f", fval0 );
|
||||
doc.addField( "f", fval1 );
|
||||
doc.addField( "f", fval2 );
|
||||
}
|
||||
assertEquals( (3*5), doc.getFieldValues("f").size() );
|
||||
|
||||
try {
|
||||
doc.setKeepDuplicateFieldValues( "f", false );
|
||||
fail( "can't change distinct for an existing field" );
|
||||
}
|
||||
catch( Exception ex ) {}
|
||||
|
||||
doc.removeFields( "f" );
|
||||
doc.setKeepDuplicateFieldValues( "f", false );
|
||||
for( int i=0; i<5; i++ ) {
|
||||
doc.addField( "f", fval0 );
|
||||
doc.addField( "f", fval1 );
|
||||
doc.addField( "f", fval2 );
|
||||
}
|
||||
assertEquals( (3), doc.getFieldValues("f").size() );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue