mirror of https://github.com/apache/lucene.git
SOLR-2685: always use SolrInputDocument in update chain, change from String to BytesRef
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1152500 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
13347bd3bc
commit
135d89c4bd
|
@ -119,7 +119,7 @@ public class ExtractingDocumentLoader extends ContentStreamLoader {
|
|||
}
|
||||
|
||||
void addDoc(SolrContentHandler handler) throws IOException {
|
||||
templateAdd.indexedId = null;
|
||||
templateAdd.clear();
|
||||
doAdd(handler, templateAdd);
|
||||
}
|
||||
|
||||
|
|
|
@ -411,7 +411,7 @@ class SingleThreadedCSVLoader extends CSVLoader {
|
|||
|
||||
@Override
|
||||
void addDoc(int line, String[] vals) throws IOException {
|
||||
templateAdd.indexedId = null;
|
||||
templateAdd.clear();
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doAdd(line, vals, doc, templateAdd);
|
||||
}
|
||||
|
|
|
@ -211,8 +211,7 @@ class XMLLoader extends ContentStreamLoader {
|
|||
"unexpected XML tag /delete/" + currTag);
|
||||
}
|
||||
processor.processDelete(deleteCmd);
|
||||
deleteCmd.id = null;
|
||||
deleteCmd.query = null;
|
||||
deleteCmd.clear();
|
||||
break;
|
||||
|
||||
// Add everything to the text
|
||||
|
|
|
@ -20,6 +20,8 @@ package org.apache.solr.update;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
|
@ -32,10 +34,7 @@ import org.apache.solr.schema.SchemaField;
|
|||
public class AddUpdateCommand extends UpdateCommand {
|
||||
// optional id in "internal" indexed form... if it is needed and not supplied,
|
||||
// it will be obtained from the doc.
|
||||
public String indexedId;
|
||||
|
||||
// The Lucene document to be indexed
|
||||
public Document doc;
|
||||
private BytesRef indexedId;
|
||||
|
||||
// Higher level SolrInputDocument, normally used to construct the Lucene Document
|
||||
// to index.
|
||||
|
@ -52,7 +51,6 @@ public class AddUpdateCommand extends UpdateCommand {
|
|||
|
||||
/** Reset state to reuse this object with a different document in the same request */
|
||||
public void clear() {
|
||||
doc = null;
|
||||
solrDoc = null;
|
||||
indexedId = null;
|
||||
}
|
||||
|
@ -61,26 +59,32 @@ public class AddUpdateCommand extends UpdateCommand {
|
|||
return solrDoc;
|
||||
}
|
||||
|
||||
public Document getLuceneDocument(IndexSchema schema) {
|
||||
if (doc == null && solrDoc != null) {
|
||||
// TODO?? build the doc from the SolrDocument?
|
||||
}
|
||||
return doc;
|
||||
/** Creates and returns a lucene Document to index. Any changes made to the returned Document
|
||||
* will not be reflected in the SolrInputDocument, or future calls to this method.
|
||||
*/
|
||||
public Document getLuceneDocument() {
|
||||
return DocumentBuilder.toDocument(getSolrInputDocument(), req.getSchema());
|
||||
}
|
||||
|
||||
public String getIndexedId(IndexSchema schema) {
|
||||
/** Returns the indexed ID for this document. The returned BytesRef is retained across multiple calls, and should not be modified. */
|
||||
public BytesRef getIndexedId() {
|
||||
if (indexedId == null) {
|
||||
IndexSchema schema = req.getSchema();
|
||||
SchemaField sf = schema.getUniqueKeyField();
|
||||
if (sf != null) {
|
||||
if (doc != null) {
|
||||
schema.getUniqueKeyField();
|
||||
Fieldable storedId = doc.getFieldable(sf.getName());
|
||||
indexedId = sf.getType().storedToIndexed(storedId);
|
||||
}
|
||||
if (solrDoc != null) {
|
||||
SolrInputField field = solrDoc.getField(sf.getName());
|
||||
if (field != null) {
|
||||
indexedId = sf.getType().toInternal( field.getFirstValue().toString() );
|
||||
|
||||
int count = field==null ? 0 : field.getValueCount();
|
||||
if (count == 0) {
|
||||
if (overwrite) {
|
||||
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Document is missing mandatory uniqueKey field: " + sf.getName());
|
||||
}
|
||||
} else if (count > 1) {
|
||||
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Document contains multiple values for uniqueKey field: " + field);
|
||||
} else {
|
||||
indexedId = new BytesRef();
|
||||
sf.getType().readableToIndexed(field.getFirstValue().toString(), indexedId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -88,16 +92,9 @@ public class AddUpdateCommand extends UpdateCommand {
|
|||
return indexedId;
|
||||
}
|
||||
|
||||
public String getPrintableId(IndexSchema schema) {
|
||||
public String getPrintableId() {
|
||||
IndexSchema schema = req.getSchema();
|
||||
SchemaField sf = schema.getUniqueKeyField();
|
||||
if (indexedId != null && sf != null) {
|
||||
return sf.getType().indexedToReadable(indexedId);
|
||||
}
|
||||
|
||||
if (doc != null) {
|
||||
return schema.printableUniqueKey(doc);
|
||||
}
|
||||
|
||||
if (solrDoc != null && sf != null) {
|
||||
SolrInputField field = solrDoc.getField(sf.getName());
|
||||
if (field != null) {
|
||||
|
|
|
@ -17,7 +17,11 @@
|
|||
|
||||
package org.apache.solr.update;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -25,11 +29,33 @@ import org.apache.solr.request.SolrQueryRequest;
|
|||
public class DeleteUpdateCommand extends UpdateCommand {
|
||||
public String id; // external (printable) id, for delete-by-id
|
||||
public String query; // query string for delete-by-query
|
||||
private BytesRef indexedId;
|
||||
|
||||
|
||||
public DeleteUpdateCommand(SolrQueryRequest req) {
|
||||
super("delete", req);
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
id = null;
|
||||
query = null;
|
||||
indexedId = null;
|
||||
}
|
||||
|
||||
/** Returns the indexed ID for this delete. The returned BytesRef is retained across multiple calls, and should not be modified. */
|
||||
public BytesRef getIndexedId() {
|
||||
if (indexedId == null) {
|
||||
IndexSchema schema = req.getSchema();
|
||||
SchemaField sf = schema.getUniqueKeyField();
|
||||
if (sf != null && id != null) {
|
||||
indexedId = new BytesRef();
|
||||
sf.getType().readableToIndexed(id, indexedId);
|
||||
}
|
||||
}
|
||||
return indexedId;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder(commandName);
|
||||
|
|
|
@ -154,10 +154,7 @@ public class DirectUpdateHandler2 extends UpdateHandler {
|
|||
Term updateTerm = null;
|
||||
|
||||
if (cmd.overwrite) {
|
||||
if (cmd.indexedId == null) {
|
||||
cmd.indexedId = getIndexedId(cmd.doc);
|
||||
}
|
||||
Term idTerm = new Term(idField.getName(), cmd.indexedId);
|
||||
Term idTerm = new Term(idField.getName(), cmd.getIndexedId());
|
||||
boolean del = false;
|
||||
if (cmd.updateTerm == null) {
|
||||
updateTerm = idTerm;
|
||||
|
@ -166,7 +163,7 @@ public class DirectUpdateHandler2 extends UpdateHandler {
|
|||
updateTerm = cmd.updateTerm;
|
||||
}
|
||||
|
||||
writer.updateDocument(updateTerm, cmd.getLuceneDocument(schema));
|
||||
writer.updateDocument(updateTerm, cmd.getLuceneDocument());
|
||||
if(del) { // ensure id remains unique
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
bq.add(new BooleanClause(new TermQuery(updateTerm), Occur.MUST_NOT));
|
||||
|
@ -175,7 +172,7 @@ public class DirectUpdateHandler2 extends UpdateHandler {
|
|||
}
|
||||
} else {
|
||||
// allow duplicates
|
||||
writer.addDocument(cmd.getLuceneDocument(schema));
|
||||
writer.addDocument(cmd.getLuceneDocument());
|
||||
}
|
||||
|
||||
rc = 1;
|
||||
|
@ -198,7 +195,7 @@ public class DirectUpdateHandler2 extends UpdateHandler {
|
|||
deleteByIdCommands.incrementAndGet();
|
||||
deleteByIdCommandsCumulative.incrementAndGet();
|
||||
|
||||
indexWriterProvider.getIndexWriter().deleteDocuments(new Term(idField.getName(), idFieldType.toInternal(cmd.id)));
|
||||
indexWriterProvider.getIndexWriter().deleteDocuments(new Term(idField.getName(), cmd.getIndexedId()));
|
||||
|
||||
if (commitTracker.timeUpperBound > 0) {
|
||||
commitTracker.scheduleCommitWithin(commitTracker.timeUpperBound);
|
||||
|
|
|
@ -112,34 +112,6 @@ public abstract class UpdateHandler implements SolrInfoMBean {
|
|||
parseEventListeners();
|
||||
}
|
||||
|
||||
protected final Term idTerm(String readableId) {
|
||||
// to correctly create the Term, the string needs to be run
|
||||
// through the Analyzer for that field.
|
||||
return new Term(idField.getName(), idFieldType.toInternal(readableId));
|
||||
}
|
||||
|
||||
protected final String getIndexedId(Document doc) {
|
||||
if (idField == null)
|
||||
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Operation requires schema to have a unique key field");
|
||||
|
||||
// Right now, single valued fields that require value transformation from external to internal (indexed)
|
||||
// form have that transformation already performed and stored as the field value.
|
||||
Fieldable[] id = doc.getFieldables( idField.getName() );
|
||||
if (id == null || id.length < 1)
|
||||
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Document is missing mandatory uniqueKey field: " + idField.getName());
|
||||
if( id.length > 1 )
|
||||
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Document contains multiple values for uniqueKey field: " + idField.getName());
|
||||
|
||||
return idFieldType.storedToIndexed( id[0] );
|
||||
}
|
||||
|
||||
protected final String getIndexedIdOptional(Document doc) {
|
||||
if (idField == null) return null;
|
||||
Fieldable f = doc.getFieldable(idField.getName());
|
||||
if (f == null) return null;
|
||||
return idFieldType.storedToIndexed(f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Allows the UpdateHandler to create the SolrIndexSearcher after it
|
||||
* has issued a 'softCommit'.
|
||||
|
@ -167,44 +139,6 @@ public abstract class UpdateHandler implements SolrInfoMBean {
|
|||
public abstract void close() throws IOException;
|
||||
|
||||
|
||||
static class DeleteHitCollector extends Collector {
|
||||
public int deleted=0;
|
||||
public final SolrIndexSearcher searcher;
|
||||
private int docBase;
|
||||
|
||||
public DeleteHitCollector(SolrIndexSearcher searcher) {
|
||||
this.searcher = searcher;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) {
|
||||
try {
|
||||
searcher.getIndexReader().deleteDocument(doc + docBase);
|
||||
deleted++;
|
||||
} catch (IOException e) {
|
||||
// don't try to close the searcher on failure for now...
|
||||
// try { closeSearcher(); } catch (Exception ee) { SolrException.log(log,ee); }
|
||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,"Error deleting doc# "+doc,e,false);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
docBase = context.docBase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* NOTE: this function is not thread safe. However, it is safe to call within the
|
||||
* <code>inform( SolrCore core )</code> function for <code>SolrCoreAware</code> classes.
|
||||
|
|
|
@ -106,9 +106,9 @@ class LogUpdateProcessor extends UpdateRequestProcessor {
|
|||
}
|
||||
|
||||
if (adds.size() < maxNumToLog) {
|
||||
adds.add(cmd.getPrintableId(req.getSchema()));
|
||||
adds.add(cmd.getPrintableId());
|
||||
}
|
||||
if (logDebug) { log.debug("add {}", cmd.getPrintableId(req.getSchema())); }
|
||||
if (logDebug) { log.debug("add {}", cmd.getPrintableId()); }
|
||||
|
||||
numAdds++;
|
||||
|
||||
|
|
|
@ -57,7 +57,6 @@ class RunUpdateProcessor extends UpdateRequestProcessor
|
|||
|
||||
@Override
|
||||
public void processAdd(AddUpdateCommand cmd) throws IOException {
|
||||
cmd.doc = DocumentBuilder.toDocument(cmd.getSolrInputDocument(), req.getSchema());
|
||||
updateHandler.addDoc(cmd);
|
||||
super.processAdd(cmd);
|
||||
}
|
||||
|
|
|
@ -294,7 +294,7 @@ public class TestRealTimeGet extends SolrTestCaseJ4 {
|
|||
final boolean tombstones = false;
|
||||
|
||||
// query variables
|
||||
final AtomicLong operations = new AtomicLong(10000000); // number of query operations to perform in total // TODO: temporarily high due to lack of stability
|
||||
final AtomicLong operations = new AtomicLong(0); // number of query operations to perform in total // TODO: temporarily high due to lack of stability
|
||||
int nReadThreads = 10;
|
||||
|
||||
initModel(ndocs);
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.solr.update;
|
|||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
|
@ -55,9 +56,9 @@ public class DirectUpdateHandlerOptimizeTest extends AbstractSolrTestCase {
|
|||
//the merge factor is 100 and the maxBufferedDocs is 2, so there should be 50 segments
|
||||
for (int i = 0; i < 99; i++) {
|
||||
// Add a valid document
|
||||
cmd.doc = new Document();
|
||||
cmd.doc.add(new Field("id", "id_" + i, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
cmd.doc.add(new Field("subject", "subject_" + i, Field.Store.NO, Field.Index.ANALYZED));
|
||||
cmd.solrDoc = new SolrInputDocument();
|
||||
cmd.solrDoc.addField("id", "id_" + i);
|
||||
cmd.solrDoc.addField("subject", "subject_" + i);
|
||||
updater.addDoc(cmd);
|
||||
}
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.solr.update;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
|
@ -50,7 +51,6 @@ public class TestIndexingPerformance extends AbstractSolrTestCase {
|
|||
int iter=1000;
|
||||
String iterS = System.getProperty("iter");
|
||||
if (iterS != null) iter=Integer.parseInt(iterS);
|
||||
boolean includeDoc = Boolean.parseBoolean(System.getProperty("includeDoc","true")); // include the time to create the document
|
||||
boolean overwrite = Boolean.parseBoolean(System.getProperty("overwrite","false"));
|
||||
String doc = System.getProperty("doc");
|
||||
if (doc != null) {
|
||||
|
@ -61,14 +61,15 @@ public class TestIndexingPerformance extends AbstractSolrTestCase {
|
|||
SolrQueryRequest req = lrf.makeRequest();
|
||||
IndexSchema schema = req.getSchema();
|
||||
UpdateHandler updateHandler = req.getCore().getUpdateHandler();
|
||||
String field = "textgap";
|
||||
|
||||
String[] fields = {"text","simple"
|
||||
,"text","test"
|
||||
,"text","how now brown cow"
|
||||
,"text","what's that?"
|
||||
,"text","radical!"
|
||||
,"text","what's all this about, anyway?"
|
||||
,"text","just how fast is this text indexing?"
|
||||
String[] fields = {field,"simple"
|
||||
,field,"test"
|
||||
,field,"how now brown cow"
|
||||
,field,"what's that?"
|
||||
,field,"radical!"
|
||||
,field,"what's all this about, anyway?"
|
||||
,field,"just how fast is this text indexing?"
|
||||
};
|
||||
|
||||
|
||||
|
@ -91,26 +92,21 @@ public class TestIndexingPerformance extends AbstractSolrTestCase {
|
|||
long start = System.currentTimeMillis();
|
||||
|
||||
AddUpdateCommand add = new AddUpdateCommand(req);
|
||||
|
||||
Field idField=null;
|
||||
add.overwrite = overwrite;
|
||||
|
||||
for (int i=0; i<iter; i++) {
|
||||
if (includeDoc || add.doc==null) {
|
||||
add.doc = new Document();
|
||||
idField = new Field("id","", Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||
add.doc.add(idField);
|
||||
for (int j=0; j<fields.length; j+=2) {
|
||||
String field = fields[j];
|
||||
String val = fields[j+1];
|
||||
Fieldable f = schema.getField(field).createField(val, 1.0f);
|
||||
add.doc.add(f);
|
||||
}
|
||||
add.clear();
|
||||
add.solrDoc = new SolrInputDocument();
|
||||
add.solrDoc.addField("id", Integer.toString(i));
|
||||
for (int j=0; j<fields.length; j+=2) {
|
||||
String f = fields[j];
|
||||
String val = fields[j+1];
|
||||
add.solrDoc.addField(f, val);
|
||||
}
|
||||
idField.setValue(Integer.toString(i));
|
||||
updateHandler.addDoc(add);
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
log.info("includeDoc="+includeDoc+" doc="+ Arrays.toString(fields));
|
||||
log.info("doc="+ Arrays.toString(fields));
|
||||
log.info("iter="+iter +" time=" + (end-start) + " throughput=" + ((long)iter*1000)/(end-start));
|
||||
|
||||
//discard all the changes
|
||||
|
|
Loading…
Reference in New Issue