SOLR-2685: always use SolrInputDocument in update chain, change from String to BytesRef

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1152500 13f79535-47bb-0310-9956-ffa450edef68
2011-07-30 19:18:09 +00:00 · 2011-07-30 19:18:09 +00:00 · 135d89c4bd
parent 13347bd3bc
commit 135d89c4bd
12 changed files with 82 additions and 133 deletions
--- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
+++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
@ -119,7 +119,7 @@ public class ExtractingDocumentLoader extends ContentStreamLoader {
  }
  void addDoc(SolrContentHandler handler) throws IOException {
-    templateAdd.indexedId = null;
+    templateAdd.clear();
    doAdd(handler, templateAdd);
  }
--- a/solr/core/src/java/org/apache/solr/handler/CSVRequestHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/CSVRequestHandler.java
@ -411,7 +411,7 @@ class SingleThreadedCSVLoader extends CSVLoader {
  @Override
  void addDoc(int line, String[] vals) throws IOException {
-    templateAdd.indexedId = null;
+    templateAdd.clear();
    SolrInputDocument doc = new SolrInputDocument();
    doAdd(line, vals, doc, templateAdd);
  }
--- a/solr/core/src/java/org/apache/solr/handler/XMLLoader.java
+++ b/solr/core/src/java/org/apache/solr/handler/XMLLoader.java
@ -211,8 +211,7 @@ class XMLLoader extends ContentStreamLoader {
                    "unexpected XML tag /delete/" + currTag);
          }
          processor.processDelete(deleteCmd);
-          deleteCmd.id = null;
+          deleteCmd.clear();
          deleteCmd.query = null;
          break;
          // Add everything to the text
--- a/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java
+++ b/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java
@ -20,6 +20,8 @@ package org.apache.solr.update;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.BytesRef;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.SolrInputField;
 import org.apache.solr.request.SolrQueryRequest;
@ -32,10 +34,7 @@ import org.apache.solr.schema.SchemaField;
 public class AddUpdateCommand extends UpdateCommand {
   // optional id in "internal" indexed form... if it is needed and not supplied,
   // it will be obtained from the doc.
-   public String indexedId;
+   private BytesRef indexedId;
   // The Lucene document to be indexed
   public Document doc;
   // Higher level SolrInputDocument, normally used to construct the Lucene Document
   // to index.
@ -52,7 +51,6 @@ public class AddUpdateCommand extends UpdateCommand {
   /** Reset state to reuse this object with a different document in the same request */
   public void clear() {
     doc = null;
     solrDoc = null;
     indexedId = null;
   }
@ -61,26 +59,32 @@ public class AddUpdateCommand extends UpdateCommand {
     return solrDoc;
   }
-   public Document getLuceneDocument(IndexSchema schema) {
+  /** Creates and returns a lucene Document to index.  Any changes made to the returned Document
-     if (doc == null && solrDoc != null) {
+   * will not be reflected in the SolrInputDocument, or future calls to this method.
-       // TODO??  build the doc from the SolrDocument?
+   */
-     }
+   public Document getLuceneDocument() {
-     return doc;    
+     return DocumentBuilder.toDocument(getSolrInputDocument(), req.getSchema());
   }
-   public String getIndexedId(IndexSchema schema) {
+  /** Returns the indexed ID for this document.  The returned BytesRef is retained across multiple calls, and should not be modified. */
   public BytesRef getIndexedId() {
     if (indexedId == null) {
       IndexSchema schema = req.getSchema();
       SchemaField sf = schema.getUniqueKeyField();
       if (sf != null) {
         if (doc != null) {
           schema.getUniqueKeyField();
           Fieldable storedId = doc.getFieldable(sf.getName());
           indexedId = sf.getType().storedToIndexed(storedId);
         }
         if (solrDoc != null) {
           SolrInputField field = solrDoc.getField(sf.getName());
-           if (field != null) {
+
-             indexedId = sf.getType().toInternal( field.getFirstValue().toString() );
+           int count = field==null ? 0 : field.getValueCount();
           if (count == 0) {
             if (overwrite) {
               throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Document is missing mandatory uniqueKey field: " + sf.getName());
             }
           } else if (count  > 1) {
             throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Document contains multiple values for uniqueKey field: " + field);
           } else {
             indexedId = new BytesRef();
             sf.getType().readableToIndexed(field.getFirstValue().toString(), indexedId);
           }
         }
       }
@ -88,16 +92,9 @@ public class AddUpdateCommand extends UpdateCommand {
     return indexedId;
   }
-   public String getPrintableId(IndexSchema schema) {
+   public String getPrintableId() {
     IndexSchema schema = req.getSchema();
     SchemaField sf = schema.getUniqueKeyField();
     if (indexedId != null && sf != null) {
       return sf.getType().indexedToReadable(indexedId);
     }
     if (doc != null) {
       return schema.printableUniqueKey(doc);
     }
     if (solrDoc != null && sf != null) {
       SolrInputField field = solrDoc.getField(sf.getName());
       if (field != null) {
--- a/solr/core/src/java/org/apache/solr/update/DeleteUpdateCommand.java
+++ b/solr/core/src/java/org/apache/solr/update/DeleteUpdateCommand.java
@ -17,7 +17,11 @@
 package org.apache.solr.update;
 import org.apache.lucene.util.BytesRef;
 import org.apache.solr.common.SolrInputField;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
 /**
 *
@ -25,11 +29,33 @@ import org.apache.solr.request.SolrQueryRequest;
 public class DeleteUpdateCommand extends UpdateCommand {
  public String id;    // external (printable) id, for delete-by-id
  public String query; // query string for delete-by-query
  private BytesRef indexedId;
  public DeleteUpdateCommand(SolrQueryRequest req) {
    super("delete", req);
  }
  public void clear() {
    id = null;
    query = null;
    indexedId = null;
  }
  /** Returns the indexed ID for this delete.  The returned BytesRef is retained across multiple calls, and should not be modified. */
  public BytesRef getIndexedId() {
    if (indexedId == null) {
      IndexSchema schema = req.getSchema();
      SchemaField sf = schema.getUniqueKeyField();
      if (sf != null && id != null) {
        indexedId = new BytesRef();
        sf.getType().readableToIndexed(id, indexedId);
      }
    }
    return indexedId;
  }
  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder(commandName);
--- a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
+++ b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
@ -154,10 +154,7 @@ public class DirectUpdateHandler2 extends UpdateHandler {
 			Term updateTerm = null;
      if (cmd.overwrite) {
-        if (cmd.indexedId == null) {
+        Term idTerm = new Term(idField.getName(), cmd.getIndexedId());
          cmd.indexedId = getIndexedId(cmd.doc);
        }
        Term idTerm = new Term(idField.getName(), cmd.indexedId);
        boolean del = false;
        if (cmd.updateTerm == null) {
          updateTerm = idTerm;
@ -166,7 +163,7 @@ public class DirectUpdateHandler2 extends UpdateHandler {
          updateTerm = cmd.updateTerm;
        }
-        writer.updateDocument(updateTerm, cmd.getLuceneDocument(schema));
+        writer.updateDocument(updateTerm, cmd.getLuceneDocument());
        if(del) { // ensure id remains unique
          BooleanQuery bq = new BooleanQuery();
          bq.add(new BooleanClause(new TermQuery(updateTerm), Occur.MUST_NOT));
@ -175,7 +172,7 @@ public class DirectUpdateHandler2 extends UpdateHandler {
        }
      } else {
        // allow duplicates
-        writer.addDocument(cmd.getLuceneDocument(schema));
+        writer.addDocument(cmd.getLuceneDocument());
      }
      rc = 1;
@ -198,7 +195,7 @@ public class DirectUpdateHandler2 extends UpdateHandler {
    deleteByIdCommands.incrementAndGet();
    deleteByIdCommandsCumulative.incrementAndGet();
-    indexWriterProvider.getIndexWriter().deleteDocuments(new Term(idField.getName(), idFieldType.toInternal(cmd.id)));
+    indexWriterProvider.getIndexWriter().deleteDocuments(new Term(idField.getName(), cmd.getIndexedId()));
    if (commitTracker.timeUpperBound > 0) {
      commitTracker.scheduleCommitWithin(commitTracker.timeUpperBound);
--- a/solr/core/src/java/org/apache/solr/update/UpdateHandler.java
+++ b/solr/core/src/java/org/apache/solr/update/UpdateHandler.java
@ -111,34 +111,6 @@ public abstract class UpdateHandler implements SolrInfoMBean {
    idFieldType = idField!=null ? idField.getType() : null;
    parseEventListeners();
  }
  protected final Term idTerm(String readableId) {
    // to correctly create the Term, the string needs to be run
    // through the Analyzer for that field.
    return new Term(idField.getName(), idFieldType.toInternal(readableId));
  }
  protected final String getIndexedId(Document doc) {
    if (idField == null)
      throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Operation requires schema to have a unique key field");
    // Right now, single valued fields that require value transformation from external to internal (indexed)
    // form have that transformation already performed and stored as the field value.
    Fieldable[] id = doc.getFieldables( idField.getName() );
    if (id == null || id.length < 1)
      throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Document is missing mandatory uniqueKey field: " + idField.getName());
    if( id.length > 1 )
      throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Document contains multiple values for uniqueKey field: " + idField.getName());
    return idFieldType.storedToIndexed( id[0] );
  }
  protected final String getIndexedIdOptional(Document doc) {
    if (idField == null) return null;
    Fieldable f = doc.getFieldable(idField.getName());
    if (f == null) return null;
    return idFieldType.storedToIndexed(f);
  }
  /**
   * Allows the UpdateHandler to create the SolrIndexSearcher after it
@ -167,44 +139,6 @@ public abstract class UpdateHandler implements SolrInfoMBean {
  public abstract void close() throws IOException;
  static class DeleteHitCollector extends Collector {
    public int deleted=0;
    public final SolrIndexSearcher searcher;
    private int docBase;
    public DeleteHitCollector(SolrIndexSearcher searcher) {
      this.searcher = searcher;
    }
    @Override
    public void collect(int doc) {
      try {
        searcher.getIndexReader().deleteDocument(doc + docBase);
        deleted++;
      } catch (IOException e) {
        // don't try to close the searcher on failure for now...
        // try { closeSearcher(); } catch (Exception ee) { SolrException.log(log,ee); }
        throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,"Error deleting doc# "+doc,e,false);
      }
    }
    @Override
    public boolean acceptsDocsOutOfOrder() {
      return false;
    }
    @Override
    public void setNextReader(AtomicReaderContext context) throws IOException {
      docBase = context.docBase;
    }
    @Override
    public void setScorer(Scorer scorer) throws IOException {
    }
  }
  /**
   * NOTE: this function is not thread safe.  However, it is safe to call within the
   * <code>inform( SolrCore core )</code> function for <code>SolrCoreAware</code> classes.
--- a/solr/core/src/java/org/apache/solr/update/processor/LogUpdateProcessorFactory.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/LogUpdateProcessorFactory.java
@ -106,9 +106,9 @@ class LogUpdateProcessor extends UpdateRequestProcessor {
    }
    if (adds.size() < maxNumToLog) {
-      adds.add(cmd.getPrintableId(req.getSchema()));
+      adds.add(cmd.getPrintableId());
    }
-    if (logDebug) { log.debug("add {}", cmd.getPrintableId(req.getSchema())); }
+    if (logDebug) { log.debug("add {}", cmd.getPrintableId()); }
    numAdds++;
--- a/solr/core/src/java/org/apache/solr/update/processor/RunUpdateProcessorFactory.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/RunUpdateProcessorFactory.java
@ -57,7 +57,6 @@ class RunUpdateProcessor extends UpdateRequestProcessor
  @Override
  public void processAdd(AddUpdateCommand cmd) throws IOException {
    cmd.doc = DocumentBuilder.toDocument(cmd.getSolrInputDocument(), req.getSchema());
    updateHandler.addDoc(cmd);
    super.processAdd(cmd);
  }
--- a/solr/core/src/test/org/apache/solr/search/TestRealTimeGet.java
+++ b/solr/core/src/test/org/apache/solr/search/TestRealTimeGet.java
@ -294,7 +294,7 @@ public class TestRealTimeGet extends SolrTestCaseJ4 {
    final boolean tombstones = false;
    // query variables
-    final AtomicLong operations = new AtomicLong(10000000);  // number of query operations to perform in total       // TODO: temporarily high due to lack of stability
+    final AtomicLong operations = new AtomicLong(0);  // number of query operations to perform in total       // TODO: temporarily high due to lack of stability
    int nReadThreads = 10;
    initModel(ndocs);
--- a/solr/core/src/test/org/apache/solr/update/DirectUpdateHandlerOptimizeTest.java
+++ b/solr/core/src/test/org/apache/solr/update/DirectUpdateHandlerOptimizeTest.java
@ -18,6 +18,7 @@ package org.apache.solr.update;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.util.AbstractSolrTestCase;
@ -55,9 +56,9 @@ public class DirectUpdateHandlerOptimizeTest extends AbstractSolrTestCase {
    //the merge factor is 100 and the maxBufferedDocs is 2, so there should be 50 segments
    for (int i = 0; i < 99; i++) {
      // Add a valid document
-      cmd.doc = new Document();
+      cmd.solrDoc = new SolrInputDocument();
-      cmd.doc.add(new Field("id", "id_" + i, Field.Store.YES, Field.Index.NOT_ANALYZED));
+      cmd.solrDoc.addField("id", "id_" + i);
-      cmd.doc.add(new Field("subject", "subject_" + i, Field.Store.NO, Field.Index.ANALYZED));
+      cmd.solrDoc.addField("subject", "subject_" + i);
      updater.addDoc(cmd);
    }
--- a/solr/core/src/test/org/apache/solr/update/TestIndexingPerformance.java
+++ b/solr/core/src/test/org/apache/solr/update/TestIndexingPerformance.java
@ -20,6 +20,7 @@ package org.apache.solr.update;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Fieldable;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.util.AbstractSolrTestCase;
@ -50,7 +51,6 @@ public class TestIndexingPerformance extends AbstractSolrTestCase {
    int iter=1000;
    String iterS = System.getProperty("iter");
    if (iterS != null) iter=Integer.parseInt(iterS);
    boolean includeDoc = Boolean.parseBoolean(System.getProperty("includeDoc","true")); // include the time to create the document
    boolean overwrite = Boolean.parseBoolean(System.getProperty("overwrite","false"));
    String doc = System.getProperty("doc");
    if (doc != null) {
@ -61,14 +61,15 @@ public class TestIndexingPerformance extends AbstractSolrTestCase {
    SolrQueryRequest req = lrf.makeRequest();
    IndexSchema schema = req.getSchema();
    UpdateHandler updateHandler = req.getCore().getUpdateHandler();
    String field = "textgap";
-    String[] fields = {"text","simple"
+    String[] fields = {field,"simple"
-            ,"text","test"
+            ,field,"test"
-            ,"text","how now brown cow"
+            ,field,"how now brown cow"
-            ,"text","what's that?"
+            ,field,"what's that?"
-            ,"text","radical!"
+            ,field,"radical!"
-            ,"text","what's all this about, anyway?"
+            ,field,"what's all this about, anyway?"
-            ,"text","just how fast is this text indexing?"
+            ,field,"just how fast is this text indexing?"
    };
@ -91,26 +92,21 @@ public class TestIndexingPerformance extends AbstractSolrTestCase {
    long start = System.currentTimeMillis();
    AddUpdateCommand add = new AddUpdateCommand(req);
-
+    add.overwrite = overwrite;
    Field idField=null;
    for (int i=0; i<iter; i++) {
-      if (includeDoc || add.doc==null) {
+      add.clear();
-        add.doc = new Document();
+      add.solrDoc = new SolrInputDocument();
-        idField = new Field("id","", Field.Store.YES, Field.Index.NOT_ANALYZED);
+      add.solrDoc.addField("id", Integer.toString(i));
-        add.doc.add(idField);
+      for (int j=0; j<fields.length; j+=2) {
-        for (int j=0; j<fields.length; j+=2) {
+        String f = fields[j];
-          String field = fields[j];
+        String val = fields[j+1];
-          String val = fields[j+1];
+        add.solrDoc.addField(f, val);
          Fieldable f = schema.getField(field).createField(val, 1.0f);
          add.doc.add(f);
        }
      }
      idField.setValue(Integer.toString(i));
      updateHandler.addDoc(add);
    }
    long end = System.currentTimeMillis();
-    log.info("includeDoc="+includeDoc+" doc="+ Arrays.toString(fields));
+    log.info("doc="+ Arrays.toString(fields));
    log.info("iter="+iter +" time=" + (end-start) + " throughput=" + ((long)iter*1000)/(end-start));
    //discard all the changes