SOLR-193: Adding SolrDocument and SolrInputDocument to represent documents outside of the lucene Document infrastructure.

This commit includes two functions not in the last (reviewed) patch: public Map<String,Collection<Object>> getFieldValuesMap() public Map<String,Object> getFieldValueMap() If there are concerns, we can remove these functions. git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@546979 13f79535-47bb-0310-9956-ffa450edef68
2007-06-13 17:57:40 +00:00 · 2007-06-13 17:57:40 +00:00 · c6c0e141bf
parent 669e43d7af
commit c6c0e141bf
4 changed files with 419 additions and 0 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -40,6 +40,10 @@ New Features
 3. SOLR-257: WordDelimiterFilter has a new parameter splitOnCaseChange,
    which can be set to 0 to disable splitting "PowerShot" => "Power" "Shot".
    (klaas)
+    
+ 3. SOLR-193: Adding SolrDocument and SolrInputDocument to represent documents
+    outside of the lucene Document infrastructure.  This class will be used 
+    by clients and for processing documents. (ryan) 

 Changes in runtime behavior

--- a/src/java/org/apache/solr/util/SolrDocument.java
+++ b/src/java/org/apache/solr/util/SolrDocument.java
@ -0,0 +1,212 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+
+/**
+ * A concrete representation of a document within a Solr index.  Unlike a lucene
+ * Document, a SolrDocument may have an Object value matching the type defined in
+ * schema.xml
+ * 
+ * For indexing documents, use the SolrInputDocumet that contains extra information
+ * for document and field boosting.
+ * 
+ * @author ryan
+ * @version $Id$
+ * @since solr 1.3
+ */
+public class SolrDocument 
+{
+  private Map<String,Collection<Object>> _fields = null;
+  
+  public SolrDocument()
+  {
+    _fields = new HashMap<String,Collection<Object>>();
+  }
+
+  /**
+   * Let sub classes return something other then a List.  
+   * Perhaps a Set or LinkedHashSet
+   */
+  protected Collection<Object> getEmptyCollection( String name )
+  {
+    return new ArrayList<Object>();
+  }
+
+  /**
+   * @return a list of fields defined in this document
+   */
+  public Collection<String> getFieldNames() {
+    return _fields.keySet();
+  }
+
+  ///////////////////////////////////////////////////////////////////
+  // Add / Set / Remove Fields
+  ///////////////////////////////////////////////////////////////////
+
+  /**
+   * Remove all fields with the name
+   */
+  public boolean removeFields(String name) 
+  {
+    return _fields.remove( name ) != null;
+  }
+
+  /**
+   * Set a field with the given object.  If the object is an Array or Iterable, it will 
+   * set multiple fields with the included contents.  This will replace any existing 
+   * field with the given name
+   */
+  public void setField(String name, Object value) 
+  {
+    Collection<Object> existing = _fields.get( name );
+    if( existing != null ) {
+      existing.clear();
+    }
+    this.addField(name, value);
+  }
+
+  /**
+   * This will add a field to the document.  If fields already exist with this name
+   * it will append the collection
+   */
+  public void addField(String name, Object value) 
+  { 
+    Collection<Object> existing = _fields.get( name );
+    if( existing == null ) {
+      existing = getEmptyCollection(name);
+      _fields.put( name, existing );
+    }
+    
+    // Arrays are iterable?  They appear to be, but not in the docs...
+    if( value instanceof Iterable ) {
+      for( Object o : (Iterable)value ) {
+        this.addField( name, o );  
+      }
+    }
+    else if( value instanceof Object[] ) {
+      for( Object o : (Object[])value ) {
+        this.addField( name, o );  
+      }
+    }
+    else {
+      existing.add( value );
+    }
+  }
+
+  ///////////////////////////////////////////////////////////////////
+  // Get the field values
+  ///////////////////////////////////////////////////////////////////
+
+  /**
+   * returns the first value for this field
+   */
+  public Object getFieldValue(String name) {
+    Collection v = _fields.get( name );
+    if( v != null && v.size() > 0 ) {
+      return v.iterator().next();
+    }
+    return null;
+  }
+
+  /**
+   * Get a collection or all the values for a given field name
+   */
+  public Collection<Object> getFieldValues(String name) {
+    return _fields.get( name );
+  }
+  
+// TODO? should this be in the API?
+//  /**
+//   * Return a named list version
+//   */
+//  public NamedList<Object> toNamedList()
+//  {
+//    NamedList<Object> nl = new NamedList<Object>();
+//    for( Map.Entry<String, Collection<Object>> entry : _fields.entrySet() ) {
+//      Collection<Object> v = entry.getValue();
+//      if( v.size() == 0 ) {
+//        nl.add( entry.getKey(), null );
+//      }
+//      else if( v.size() > 1 ) {
+//        nl.add( entry.getKey(), v );
+//      }
+//      else { // Add a single value
+//        nl.add( entry.getKey(), v.iterator().next() );
+//      }
+//    }
+//    return nl;
+//  }
+  
+  @Override
+  public String toString()
+  {
+    return "SolrDocument["+getFieldNames()+"]";
+  }
+  
+  /**
+   * Expose a Map interface to the solr field value collection.
+   */
+  public Map<String,Collection<Object>> getFieldValuesMap()
+  {
+    return _fields;
+  }
+
+  /**
+   * Expose a Map interface to the solr fields.  This function is useful for JSTL
+   */
+  public Map<String,Object> getFieldValueMap() {
+    return new Map<String,Object>() {
+      /** Get the field Value */
+      public Object get(Object key) { 
+        return getFieldValue( (String)key ); 
+      }
+      
+      /** Set the field Value */
+      public Object put(String key, Object value) {
+        setField( key, value );
+        return null;
+      }
+
+      /** Remove the field Value */
+      public Object remove(Object key) {
+        removeFields( (String)key ); 
+        return null;
+      }
+      
+      // Easily Supported methods
+      public boolean containsKey(Object key) { return _fields.containsKey( key ); }
+      public Set<String>  keySet()           { return _fields.keySet();  }
+      public int          size()             { return _fields.size();    }
+      public boolean      isEmpty()          { return _fields.isEmpty(); }
+
+      // Unsupported operations.  These are not necessary for JSTL
+      public void clear() { throw new UnsupportedOperationException(); }
+      public boolean containsValue(Object value) {throw new UnsupportedOperationException();}
+      public Set<java.util.Map.Entry<String, Object>> entrySet() {throw new UnsupportedOperationException();}
+      public void putAll(Map<? extends String, ? extends Object> t) {throw new UnsupportedOperationException();}
+      public Collection<Object> values() {throw new UnsupportedOperationException();}
+    };
+  }
+}
--- a/src/java/org/apache/solr/util/SolrInputDocument.java
+++ b/src/java/org/apache/solr/util/SolrInputDocument.java
@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Represent the field and boost information needed to construct and index
+ * a Lucene Document.  Like the SolrDocument, the field values need to
+ * match those specified in schema.xml 
+ * 
+ * @author ryan
+ * @version $Id$
+ * @since solr 1.3
+ */
+public class SolrInputDocument extends SolrDocument
+{
+  private Map<String,Float> _boost = null;
+  
+  /**
+   * Set the document boost.  null will remove the boost
+   */
+  public void setDocumentBoost( Float v )
+  {
+    this.setBoost( null, v );
+  }
+  
+  /**
+   * @return the document boost.  or null if not set
+   */
+  public Float getDocumentBoost()
+  {
+    return this.getBoost( null );
+  }
+  
+  /**
+   * Get the lucene document boost for a field.  Passing in <code>null</code> returns the
+   * document boost, not a field boost.  
+   */
+  public void setBoost(String name, Float boost) {
+    if( _boost == null ) {
+      _boost = new HashMap<String, Float>();
+    }
+    if( boost == null ) {
+      _boost.remove( name );
+    }
+    else {
+      _boost.put( name, boost );
+    }
+  }
+
+  /**
+   * Set the field boost.  All fields with the name will have the same boost.  
+   * Passing in <code>null</code> sets the document boost.
+   * @param boost
+   */
+  public Float getBoost(String name) {
+    if( _boost == null ) {
+      return null;
+    }
+    return _boost.get( name );
+  }
+}
--- a/src/test/org/apache/solr/util/SolrDocumentTest.java
+++ b/src/test/org/apache/solr/util/SolrDocumentTest.java
@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import junit.framework.TestCase;
+
+/**
+ * @author ryan
+ */
+public class SolrDocumentTest extends TestCase 
+{
+  public void testSimple() 
+  {
+    Float fval = new Float( 10.01f );
+    Boolean bval = Boolean.TRUE;
+    String sval = "12qwaszx";
+    
+    // Set up a simple document
+    SolrDocument doc = new SolrDocument();
+    doc.addField( "f", fval );
+    doc.addField( "b", bval );
+    doc.addField( "s", sval );
+    doc.addField( "f", 100 ); // again, but something else
+
+    // make sure we can pull values out of it
+    assertEquals( fval, doc.getFieldValue( "f" ) );
+    assertEquals( bval, doc.getFieldValue( "b" ) );
+    assertEquals( sval, doc.getFieldValue( "s" ) );
+    assertEquals( 2, doc.getFieldValues( "f" ).size() );
+    assertNull( doc.getFieldValue( "xxxxx" ) );
+    assertNull( doc.getFieldValues( "xxxxx" ) );
+    
+    List<String> keys = new ArrayList<String>();
+    for( String s : doc.getFieldNames() ) {
+      keys.add( s );
+    }
+    Collections.sort( keys );
+    assertEquals( 3, keys.size() );
+    assertEquals( "[b, f, s]", keys.toString() );
+    
+    // set field replaced existing values:
+    doc.setField( "f", fval );
+    assertEquals( 1, doc.getFieldValues( "f" ).size() );
+    assertEquals( fval, doc.getFieldValue( "f" ) );
+    
+    doc.setField( "n", null );
+    assertEquals( null, doc.getFieldValue( "n" ) );
+    
+    // now remove some fields
+    assertEquals( true, doc.removeFields( "f" ) );
+    assertEquals( false, doc.removeFields( "asdgsadgas" ) );
+    assertNull( doc.getFieldValue( "f" ) );
+    assertNull( doc.getFieldValues( "f" ) );
+  }
+  
+  public void testDocumentBoosts()
+  {
+    SolrInputDocument doc = new SolrInputDocument();
+    assertEquals( null, doc.getBoost( "aaa" ) );
+    doc.setBoost( "aaa", 10.0f );
+    assertEquals( 10.0f, doc.getBoost( "aaa" ) );
+    doc.setBoost( "aaa", null );
+    assertEquals( null, doc.getBoost( "aaa" ) );
+  }
+  
+  public void testUnsupportedStuff()
+  {
+    SolrDocument doc = new SolrDocument();
+
+    try { doc.getFieldValueMap().clear();               fail( "should be unsupported!" ); } catch( Exception ex ){}
+    try { doc.getFieldValueMap().containsValue( null ); fail( "should be unsupported!" ); } catch( Exception ex ){}
+    try { doc.getFieldValueMap().entrySet();            fail( "should be unsupported!" ); } catch( Exception ex ){}
+    try { doc.getFieldValueMap().putAll( null );        fail( "should be unsupported!" ); } catch( Exception ex ){}
+    try { doc.getFieldValueMap().values();              fail( "should be unsupported!" ); } catch( Exception ex ){}
+
+    assertEquals( null, doc.getFieldValueMap().get( "aaa" ) );
+    doc.setField( "aaa", "bbb" );
+    assertEquals( "bbb", doc.getFieldValueMap().get( "aaa" ) );
+    doc.getFieldValueMap().remove( "aaa" );
+    assertEquals( null, doc.getFieldValueMap().get( "aaa" ) );
+  }
+  
+  public void testAddCollections()
+  {
+    List<String> c0 = new ArrayList<String>();
+    c0.add( "aaa" );
+    c0.add( "aaa" );
+    c0.add( "aaa" );
+    c0.add( "bbb" );
+    c0.add( "ccc" );
+    
+    SolrDocument doc = new SolrDocument();
+    doc.addField( "v", c0 );
+    assertEquals( c0.size(), doc.getFieldValues("v").size() );
+    
+    // Same thing with an array
+    Object[] arr = new Object[] { "aaa", "aaa", "aaa", 10, 'b' };
+    doc = new SolrDocument();
+    doc.addField( "v", c0 );
+    assertEquals( arr.length, doc.getFieldValues("v").size() );
+  }
+}
+
+
+