SOLR-2685: always use SolrInputDocument in update chain, change from String to BytesRef

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1152500 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2011-07-30 19:18:09 +00:00
parent 13347bd3bc
commit 135d89c4bd
12 changed files with 82 additions and 133 deletions

View File

@ -119,7 +119,7 @@ public class ExtractingDocumentLoader extends ContentStreamLoader {
} }
void addDoc(SolrContentHandler handler) throws IOException { void addDoc(SolrContentHandler handler) throws IOException {
templateAdd.indexedId = null; templateAdd.clear();
doAdd(handler, templateAdd); doAdd(handler, templateAdd);
} }

View File

@ -411,7 +411,7 @@ class SingleThreadedCSVLoader extends CSVLoader {
@Override @Override
void addDoc(int line, String[] vals) throws IOException { void addDoc(int line, String[] vals) throws IOException {
templateAdd.indexedId = null; templateAdd.clear();
SolrInputDocument doc = new SolrInputDocument(); SolrInputDocument doc = new SolrInputDocument();
doAdd(line, vals, doc, templateAdd); doAdd(line, vals, doc, templateAdd);
} }

View File

@ -211,8 +211,7 @@ class XMLLoader extends ContentStreamLoader {
"unexpected XML tag /delete/" + currTag); "unexpected XML tag /delete/" + currTag);
} }
processor.processDelete(deleteCmd); processor.processDelete(deleteCmd);
deleteCmd.id = null; deleteCmd.clear();
deleteCmd.query = null;
break; break;
// Add everything to the text // Add everything to the text

View File

@ -20,6 +20,8 @@ package org.apache.solr.update;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField; import org.apache.solr.common.SolrInputField;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
@ -32,10 +34,7 @@ import org.apache.solr.schema.SchemaField;
public class AddUpdateCommand extends UpdateCommand { public class AddUpdateCommand extends UpdateCommand {
// optional id in "internal" indexed form... if it is needed and not supplied, // optional id in "internal" indexed form... if it is needed and not supplied,
// it will be obtained from the doc. // it will be obtained from the doc.
public String indexedId; private BytesRef indexedId;
// The Lucene document to be indexed
public Document doc;
// Higher level SolrInputDocument, normally used to construct the Lucene Document // Higher level SolrInputDocument, normally used to construct the Lucene Document
// to index. // to index.
@ -52,7 +51,6 @@ public class AddUpdateCommand extends UpdateCommand {
/** Reset state to reuse this object with a different document in the same request */ /** Reset state to reuse this object with a different document in the same request */
public void clear() { public void clear() {
doc = null;
solrDoc = null; solrDoc = null;
indexedId = null; indexedId = null;
} }
@ -61,26 +59,32 @@ public class AddUpdateCommand extends UpdateCommand {
return solrDoc; return solrDoc;
} }
public Document getLuceneDocument(IndexSchema schema) { /** Creates and returns a lucene Document to index. Any changes made to the returned Document
if (doc == null && solrDoc != null) { * will not be reflected in the SolrInputDocument, or future calls to this method.
// TODO?? build the doc from the SolrDocument? */
} public Document getLuceneDocument() {
return doc; return DocumentBuilder.toDocument(getSolrInputDocument(), req.getSchema());
} }
public String getIndexedId(IndexSchema schema) { /** Returns the indexed ID for this document. The returned BytesRef is retained across multiple calls, and should not be modified. */
public BytesRef getIndexedId() {
if (indexedId == null) { if (indexedId == null) {
IndexSchema schema = req.getSchema();
SchemaField sf = schema.getUniqueKeyField(); SchemaField sf = schema.getUniqueKeyField();
if (sf != null) { if (sf != null) {
if (doc != null) {
schema.getUniqueKeyField();
Fieldable storedId = doc.getFieldable(sf.getName());
indexedId = sf.getType().storedToIndexed(storedId);
}
if (solrDoc != null) { if (solrDoc != null) {
SolrInputField field = solrDoc.getField(sf.getName()); SolrInputField field = solrDoc.getField(sf.getName());
if (field != null) {
indexedId = sf.getType().toInternal( field.getFirstValue().toString() ); int count = field==null ? 0 : field.getValueCount();
if (count == 0) {
if (overwrite) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Document is missing mandatory uniqueKey field: " + sf.getName());
}
} else if (count > 1) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Document contains multiple values for uniqueKey field: " + field);
} else {
indexedId = new BytesRef();
sf.getType().readableToIndexed(field.getFirstValue().toString(), indexedId);
} }
} }
} }
@ -88,16 +92,9 @@ public class AddUpdateCommand extends UpdateCommand {
return indexedId; return indexedId;
} }
public String getPrintableId(IndexSchema schema) { public String getPrintableId() {
IndexSchema schema = req.getSchema();
SchemaField sf = schema.getUniqueKeyField(); SchemaField sf = schema.getUniqueKeyField();
if (indexedId != null && sf != null) {
return sf.getType().indexedToReadable(indexedId);
}
if (doc != null) {
return schema.printableUniqueKey(doc);
}
if (solrDoc != null && sf != null) { if (solrDoc != null && sf != null) {
SolrInputField field = solrDoc.getField(sf.getName()); SolrInputField field = solrDoc.getField(sf.getName());
if (field != null) { if (field != null) {

View File

@ -17,7 +17,11 @@
package org.apache.solr.update; package org.apache.solr.update;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
/** /**
* *
@ -25,11 +29,33 @@ import org.apache.solr.request.SolrQueryRequest;
public class DeleteUpdateCommand extends UpdateCommand { public class DeleteUpdateCommand extends UpdateCommand {
public String id; // external (printable) id, for delete-by-id public String id; // external (printable) id, for delete-by-id
public String query; // query string for delete-by-query public String query; // query string for delete-by-query
private BytesRef indexedId;
public DeleteUpdateCommand(SolrQueryRequest req) { public DeleteUpdateCommand(SolrQueryRequest req) {
super("delete", req); super("delete", req);
} }
public void clear() {
id = null;
query = null;
indexedId = null;
}
/** Returns the indexed ID for this delete. The returned BytesRef is retained across multiple calls, and should not be modified. */
public BytesRef getIndexedId() {
if (indexedId == null) {
IndexSchema schema = req.getSchema();
SchemaField sf = schema.getUniqueKeyField();
if (sf != null && id != null) {
indexedId = new BytesRef();
sf.getType().readableToIndexed(id, indexedId);
}
}
return indexedId;
}
@Override @Override
public String toString() { public String toString() {
StringBuilder sb = new StringBuilder(commandName); StringBuilder sb = new StringBuilder(commandName);

View File

@ -154,10 +154,7 @@ public class DirectUpdateHandler2 extends UpdateHandler {
Term updateTerm = null; Term updateTerm = null;
if (cmd.overwrite) { if (cmd.overwrite) {
if (cmd.indexedId == null) { Term idTerm = new Term(idField.getName(), cmd.getIndexedId());
cmd.indexedId = getIndexedId(cmd.doc);
}
Term idTerm = new Term(idField.getName(), cmd.indexedId);
boolean del = false; boolean del = false;
if (cmd.updateTerm == null) { if (cmd.updateTerm == null) {
updateTerm = idTerm; updateTerm = idTerm;
@ -166,7 +163,7 @@ public class DirectUpdateHandler2 extends UpdateHandler {
updateTerm = cmd.updateTerm; updateTerm = cmd.updateTerm;
} }
writer.updateDocument(updateTerm, cmd.getLuceneDocument(schema)); writer.updateDocument(updateTerm, cmd.getLuceneDocument());
if(del) { // ensure id remains unique if(del) { // ensure id remains unique
BooleanQuery bq = new BooleanQuery(); BooleanQuery bq = new BooleanQuery();
bq.add(new BooleanClause(new TermQuery(updateTerm), Occur.MUST_NOT)); bq.add(new BooleanClause(new TermQuery(updateTerm), Occur.MUST_NOT));
@ -175,7 +172,7 @@ public class DirectUpdateHandler2 extends UpdateHandler {
} }
} else { } else {
// allow duplicates // allow duplicates
writer.addDocument(cmd.getLuceneDocument(schema)); writer.addDocument(cmd.getLuceneDocument());
} }
rc = 1; rc = 1;
@ -198,7 +195,7 @@ public class DirectUpdateHandler2 extends UpdateHandler {
deleteByIdCommands.incrementAndGet(); deleteByIdCommands.incrementAndGet();
deleteByIdCommandsCumulative.incrementAndGet(); deleteByIdCommandsCumulative.incrementAndGet();
indexWriterProvider.getIndexWriter().deleteDocuments(new Term(idField.getName(), idFieldType.toInternal(cmd.id))); indexWriterProvider.getIndexWriter().deleteDocuments(new Term(idField.getName(), cmd.getIndexedId()));
if (commitTracker.timeUpperBound > 0) { if (commitTracker.timeUpperBound > 0) {
commitTracker.scheduleCommitWithin(commitTracker.timeUpperBound); commitTracker.scheduleCommitWithin(commitTracker.timeUpperBound);

View File

@ -112,34 +112,6 @@ public abstract class UpdateHandler implements SolrInfoMBean {
parseEventListeners(); parseEventListeners();
} }
protected final Term idTerm(String readableId) {
// to correctly create the Term, the string needs to be run
// through the Analyzer for that field.
return new Term(idField.getName(), idFieldType.toInternal(readableId));
}
protected final String getIndexedId(Document doc) {
if (idField == null)
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Operation requires schema to have a unique key field");
// Right now, single valued fields that require value transformation from external to internal (indexed)
// form have that transformation already performed and stored as the field value.
Fieldable[] id = doc.getFieldables( idField.getName() );
if (id == null || id.length < 1)
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Document is missing mandatory uniqueKey field: " + idField.getName());
if( id.length > 1 )
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Document contains multiple values for uniqueKey field: " + idField.getName());
return idFieldType.storedToIndexed( id[0] );
}
protected final String getIndexedIdOptional(Document doc) {
if (idField == null) return null;
Fieldable f = doc.getFieldable(idField.getName());
if (f == null) return null;
return idFieldType.storedToIndexed(f);
}
/** /**
* Allows the UpdateHandler to create the SolrIndexSearcher after it * Allows the UpdateHandler to create the SolrIndexSearcher after it
* has issued a 'softCommit'. * has issued a 'softCommit'.
@ -167,44 +139,6 @@ public abstract class UpdateHandler implements SolrInfoMBean {
public abstract void close() throws IOException; public abstract void close() throws IOException;
static class DeleteHitCollector extends Collector {
public int deleted=0;
public final SolrIndexSearcher searcher;
private int docBase;
public DeleteHitCollector(SolrIndexSearcher searcher) {
this.searcher = searcher;
}
@Override
public void collect(int doc) {
try {
searcher.getIndexReader().deleteDocument(doc + docBase);
deleted++;
} catch (IOException e) {
// don't try to close the searcher on failure for now...
// try { closeSearcher(); } catch (Exception ee) { SolrException.log(log,ee); }
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,"Error deleting doc# "+doc,e,false);
}
}
@Override
public boolean acceptsDocsOutOfOrder() {
return false;
}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
}
}
/** /**
* NOTE: this function is not thread safe. However, it is safe to call within the * NOTE: this function is not thread safe. However, it is safe to call within the
* <code>inform( SolrCore core )</code> function for <code>SolrCoreAware</code> classes. * <code>inform( SolrCore core )</code> function for <code>SolrCoreAware</code> classes.

View File

@ -106,9 +106,9 @@ class LogUpdateProcessor extends UpdateRequestProcessor {
} }
if (adds.size() < maxNumToLog) { if (adds.size() < maxNumToLog) {
adds.add(cmd.getPrintableId(req.getSchema())); adds.add(cmd.getPrintableId());
} }
if (logDebug) { log.debug("add {}", cmd.getPrintableId(req.getSchema())); } if (logDebug) { log.debug("add {}", cmd.getPrintableId()); }
numAdds++; numAdds++;

View File

@ -57,7 +57,6 @@ class RunUpdateProcessor extends UpdateRequestProcessor
@Override @Override
public void processAdd(AddUpdateCommand cmd) throws IOException { public void processAdd(AddUpdateCommand cmd) throws IOException {
cmd.doc = DocumentBuilder.toDocument(cmd.getSolrInputDocument(), req.getSchema());
updateHandler.addDoc(cmd); updateHandler.addDoc(cmd);
super.processAdd(cmd); super.processAdd(cmd);
} }

View File

@ -294,7 +294,7 @@ public class TestRealTimeGet extends SolrTestCaseJ4 {
final boolean tombstones = false; final boolean tombstones = false;
// query variables // query variables
final AtomicLong operations = new AtomicLong(10000000); // number of query operations to perform in total // TODO: temporarily high due to lack of stability final AtomicLong operations = new AtomicLong(0); // number of query operations to perform in total // TODO: temporarily high due to lack of stability
int nReadThreads = 10; int nReadThreads = 10;
initModel(ndocs); initModel(ndocs);

View File

@ -18,6 +18,7 @@ package org.apache.solr.update;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.util.AbstractSolrTestCase; import org.apache.solr.util.AbstractSolrTestCase;
@ -55,9 +56,9 @@ public class DirectUpdateHandlerOptimizeTest extends AbstractSolrTestCase {
//the merge factor is 100 and the maxBufferedDocs is 2, so there should be 50 segments //the merge factor is 100 and the maxBufferedDocs is 2, so there should be 50 segments
for (int i = 0; i < 99; i++) { for (int i = 0; i < 99; i++) {
// Add a valid document // Add a valid document
cmd.doc = new Document(); cmd.solrDoc = new SolrInputDocument();
cmd.doc.add(new Field("id", "id_" + i, Field.Store.YES, Field.Index.NOT_ANALYZED)); cmd.solrDoc.addField("id", "id_" + i);
cmd.doc.add(new Field("subject", "subject_" + i, Field.Store.NO, Field.Index.ANALYZED)); cmd.solrDoc.addField("subject", "subject_" + i);
updater.addDoc(cmd); updater.addDoc(cmd);
} }

View File

@ -20,6 +20,7 @@ package org.apache.solr.update;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.util.AbstractSolrTestCase; import org.apache.solr.util.AbstractSolrTestCase;
@ -50,7 +51,6 @@ public class TestIndexingPerformance extends AbstractSolrTestCase {
int iter=1000; int iter=1000;
String iterS = System.getProperty("iter"); String iterS = System.getProperty("iter");
if (iterS != null) iter=Integer.parseInt(iterS); if (iterS != null) iter=Integer.parseInt(iterS);
boolean includeDoc = Boolean.parseBoolean(System.getProperty("includeDoc","true")); // include the time to create the document
boolean overwrite = Boolean.parseBoolean(System.getProperty("overwrite","false")); boolean overwrite = Boolean.parseBoolean(System.getProperty("overwrite","false"));
String doc = System.getProperty("doc"); String doc = System.getProperty("doc");
if (doc != null) { if (doc != null) {
@ -61,14 +61,15 @@ public class TestIndexingPerformance extends AbstractSolrTestCase {
SolrQueryRequest req = lrf.makeRequest(); SolrQueryRequest req = lrf.makeRequest();
IndexSchema schema = req.getSchema(); IndexSchema schema = req.getSchema();
UpdateHandler updateHandler = req.getCore().getUpdateHandler(); UpdateHandler updateHandler = req.getCore().getUpdateHandler();
String field = "textgap";
String[] fields = {"text","simple" String[] fields = {field,"simple"
,"text","test" ,field,"test"
,"text","how now brown cow" ,field,"how now brown cow"
,"text","what's that?" ,field,"what's that?"
,"text","radical!" ,field,"radical!"
,"text","what's all this about, anyway?" ,field,"what's all this about, anyway?"
,"text","just how fast is this text indexing?" ,field,"just how fast is this text indexing?"
}; };
@ -91,26 +92,21 @@ public class TestIndexingPerformance extends AbstractSolrTestCase {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
AddUpdateCommand add = new AddUpdateCommand(req); AddUpdateCommand add = new AddUpdateCommand(req);
add.overwrite = overwrite;
Field idField=null;
for (int i=0; i<iter; i++) { for (int i=0; i<iter; i++) {
if (includeDoc || add.doc==null) { add.clear();
add.doc = new Document(); add.solrDoc = new SolrInputDocument();
idField = new Field("id","", Field.Store.YES, Field.Index.NOT_ANALYZED); add.solrDoc.addField("id", Integer.toString(i));
add.doc.add(idField);
for (int j=0; j<fields.length; j+=2) { for (int j=0; j<fields.length; j+=2) {
String field = fields[j]; String f = fields[j];
String val = fields[j+1]; String val = fields[j+1];
Fieldable f = schema.getField(field).createField(val, 1.0f); add.solrDoc.addField(f, val);
add.doc.add(f);
} }
}
idField.setValue(Integer.toString(i));
updateHandler.addDoc(add); updateHandler.addDoc(add);
} }
long end = System.currentTimeMillis(); long end = System.currentTimeMillis();
log.info("includeDoc="+includeDoc+" doc="+ Arrays.toString(fields)); log.info("doc="+ Arrays.toString(fields));
log.info("iter="+iter +" time=" + (end-start) + " throughput=" + ((long)iter*1000)/(end-start)); log.info("iter="+iter +" time=" + (end-start) + " throughput=" + ((long)iter*1000)/(end-start));
//discard all the changes //discard all the changes