fix delete-by-id when indexed id != readable id

git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@393619 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2006-04-12 21:27:06 +00:00
parent 861222dc75
commit c511815110
7 changed files with 68 additions and 45 deletions

View File

@ -670,7 +670,7 @@ public final class SolrCore {
// that there are multiple docs in the add... so make sure that
// objects can handle that.
cmd.id = null; // reset the id for this add
cmd.indexedId = null; // reset the id for this add
if (eventType !=0) {
eventType=xpp.getEventType();

View File

@ -177,6 +177,17 @@ public abstract class FieldType extends FieldProperties {
return indexedForm;
}
public String storedToReadable(Field f) {
return toExternal(f);
}
public String storedToIndexed(Field f) {
// right now, the transformation of single valued fields like SortableInt
// is done when the Field is created, not at analysis time... this means
// that the indexed form is the same as the stored field form.
return f.stringValue();
}
/*********
// default analyzer for non-text fields.

View File

@ -22,7 +22,10 @@ import org.apache.lucene.document.Document;
* @version $Id$
*/
public class AddUpdateCommand extends UpdateCommand {
public String id;
// optional id in "internal" indexed form... if it is needed and not supplied,
// it will be obtained from the doc.
public String indexedId;
public Document doc;
public boolean allowDups;
public boolean overwritePending;
@ -35,7 +38,7 @@ public class AddUpdateCommand extends UpdateCommand {
public String toString() {
StringBuilder sb = new StringBuilder(commandName);
sb.append(':');
if (id!=null) sb.append("id=").append(id);
if (indexedId !=null) sb.append("id=").append(indexedId);
sb.append(",allowDups=").append(allowDups);
sb.append(",overwritePending=").append(overwritePending);
sb.append(",overwriteCommitted=").append(overwriteCommitted);

View File

@ -20,8 +20,8 @@ package org.apache.solr.update;
* @version $Id$
*/
public class DeleteUpdateCommand extends UpdateCommand {
public String id;
public String query;
public String id; // external (printable) id, for delete-by-id
public String query; // query string for delete-by-query
public boolean fromPending;
public boolean fromCommitted;

View File

@ -23,6 +23,7 @@ package org.apache.solr.update;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.Term;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Query;
@ -105,15 +106,16 @@ public class DirectUpdateHandler extends UpdateHandler {
writer.addDocument(doc);
}
protected boolean existsInIndex(String id) throws IOException {
protected boolean existsInIndex(String indexedId) throws IOException {
if (idField == null) throw new SolrException(2,"Operation requires schema to have a unique key field");
closeWriter(); openSearcher();
closeWriter();
openSearcher();
IndexReader ir = searcher.getReader();
TermDocs tdocs = null;
boolean exists=false;
try {
tdocs = ir.termDocs(idTerm(id));
tdocs = ir.termDocs(idTerm(indexedId));
if (tdocs.next()) exists=true;
} finally {
try { if (tdocs != null) tdocs.close(); } catch (Exception e) {}
@ -122,7 +124,7 @@ public class DirectUpdateHandler extends UpdateHandler {
}
protected int deleteInIndex(String id) throws IOException {
protected int deleteInIndex(String indexedId) throws IOException {
if (idField == null) throw new SolrException(2,"Operation requires schema to have a unique key field");
closeWriter(); openSearcher();
@ -130,9 +132,10 @@ public class DirectUpdateHandler extends UpdateHandler {
TermDocs tdocs = null;
int num=0;
try {
num = ir.deleteDocuments(idTerm(id));
Term term = new Term(idField.getName(), indexedId);
num = ir.deleteDocuments(term);
if (SolrCore.log.isLoggable(Level.FINEST)) {
SolrCore.log.finest("deleted " + num + " docs matching id " + id);
SolrCore.log.finest("deleted " + num + " docs matching id " + idFieldType.indexedToReadable(indexedId));
}
} finally {
try { if (tdocs != null) tdocs.close(); } catch (Exception e) {}
@ -140,9 +143,9 @@ public class DirectUpdateHandler extends UpdateHandler {
return num;
}
protected void overwrite(String id, Document doc) throws IOException {
if (id==null) id=getId(doc);
deleteInIndex(id);
protected void overwrite(String indexedId, Document doc) throws IOException {
if (indexedId ==null) indexedId =getIndexedId(doc);
deleteInIndex(indexedId);
doAdd(doc);
}
@ -171,10 +174,10 @@ public class DirectUpdateHandler extends UpdateHandler {
throw new SolrException(400,"meaningless command: " + cmd);
if (!cmd.fromPending || !cmd.fromCommitted)
throw new SolrException(400,"operation not supported" + cmd);
String indexedId = idFieldType.toInternal(cmd.id);
synchronized(this) {
deleteInIndex(cmd.id);
pset.remove(cmd.id);
deleteInIndex(indexedId);
pset.remove(indexedId);
}
}
@ -265,26 +268,26 @@ public class DirectUpdateHandler extends UpdateHandler {
///////////////////////////////////////////////////////////////////
protected int addNoOverwriteNoDups(AddUpdateCommand cmd) throws IOException {
if (cmd.id==null) {
cmd.id=getId(cmd.doc);
if (cmd.indexedId ==null) {
cmd.indexedId =getIndexedId(cmd.doc);
}
synchronized (this) {
if (existsInIndex(cmd.id)) return 0;
if (existsInIndex(cmd.indexedId)) return 0;
doAdd(cmd.doc);
}
return 1;
}
protected int addConditionally(AddUpdateCommand cmd) throws IOException {
if (cmd.id==null) {
cmd.id=getId(cmd.doc);
if (cmd.indexedId ==null) {
cmd.indexedId =getIndexedId(cmd.doc);
}
synchronized(this) {
if (pset.contains(cmd.id)) return 0;
if (pset.contains(cmd.indexedId)) return 0;
// since case 001 is currently the only case to use pset, only add
// to it in that instance.
pset.add(cmd.id);
overwrite(cmd.id,cmd.doc);
pset.add(cmd.indexedId);
overwrite(cmd.indexedId,cmd.doc);
return 1;
}
}
@ -292,7 +295,7 @@ public class DirectUpdateHandler extends UpdateHandler {
// overwrite both pending and committed
protected synchronized int overwriteBoth(AddUpdateCommand cmd) throws IOException {
overwrite(cmd.id, cmd.doc);
overwrite(cmd.indexedId, cmd.doc);
return 1;
}

View File

@ -246,7 +246,7 @@ public class DirectUpdateHandler2 extends UpdateHandler {
}
synchronized(this) {
pset.put(cmd.id, ZERO);
pset.put(idFieldType.toInternal(cmd.id), ZERO);
}
}
@ -307,16 +307,16 @@ public class DirectUpdateHandler2 extends UpdateHandler {
protected int addConditionally(AddUpdateCommand cmd) throws IOException {
if (cmd.id==null) {
cmd.id=getId(cmd.doc);
if (cmd.indexedId ==null) {
cmd.indexedId =getIndexedId(cmd.doc);
}
synchronized(this) {
Integer saveCount = pset.get(cmd.id);
Integer saveCount = pset.get(cmd.indexedId);
if (saveCount!=null && saveCount!=0) {
// a doc with this id already exists in the pending set
return 0;
}
pset.put(cmd.id, ONE);
pset.put(cmd.indexedId, ONE);
doAdd(cmd.doc);
return 1;
}
@ -325,11 +325,11 @@ public class DirectUpdateHandler2 extends UpdateHandler {
// overwrite both pending and committed
protected synchronized int overwriteBoth(AddUpdateCommand cmd) throws IOException {
if (cmd.id==null) {
cmd.id=getId(cmd.doc);
if (cmd.indexedId ==null) {
cmd.indexedId =getIndexedId(cmd.doc);
}
synchronized (this) {
pset.put(cmd.id, ONE);
pset.put(cmd.indexedId, ONE);
doAdd(cmd.doc);
}
return 1;
@ -338,14 +338,14 @@ public class DirectUpdateHandler2 extends UpdateHandler {
// add without checking
protected synchronized int allowDups(AddUpdateCommand cmd) throws IOException {
if (cmd.id==null) {
cmd.id=getOptId(cmd.doc);
if (cmd.indexedId ==null) {
cmd.indexedId =getIndexedIdOptional(cmd.doc);
}
synchronized(this) {
doAdd(cmd.doc);
if (cmd.id != null) {
Integer saveCount = pset.get(cmd.id);
if (cmd.indexedId != null) {
Integer saveCount = pset.get(cmd.indexedId);
// if there weren't any docs marked for deletion before, then don't mark
// any for deletion now.
@ -358,7 +358,7 @@ public class DirectUpdateHandler2 extends UpdateHandler {
if (saveCount == ZERO) saveCount=ONE;
else saveCount++;
pset.put(cmd.id, saveCount);
pset.put(cmd.indexedId, saveCount);
}
}
return 1;

View File

@ -19,6 +19,7 @@ package org.apache.solr.update;
import org.apache.lucene.index.Term;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.HitCollector;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
@ -118,22 +119,27 @@ public abstract class UpdateHandler implements SolrInfoMBean {
return writer;
}
protected final Term idTerm(String id) {
protected final Term idTerm(String readableId) {
// to correctly create the Term, the string needs to be run
// through the Analyzer for that field.
return new Term(idField.getName(), idFieldType.toInternal(id));
return new Term(idField.getName(), idFieldType.toInternal(readableId));
}
protected final String getId(Document doc) {
protected final String getIndexedId(Document doc) {
if (idField == null) throw new SolrException(400,"Operation requires schema to have a unique key field");
String id = doc.get(idField.getName());
// Right now, single valued fields that require value transformation from external to internal (indexed)
// form have that transformation already performed and stored as the field value.
// This means
String id = idFieldType.storedToIndexed(doc.getField(idField.getName()));
if (id == null) throw new SolrException(400,"Document is missing uniqueKey field " + idField.getName());
return id;
}
protected final String getOptId(Document doc) {
protected final String getIndexedIdOptional(Document doc) {
if (idField == null) return null;
return doc.get(idField.getName());
Field f = doc.getField(idField.getName());
if (f == null) return null;
return idFieldType.storedToIndexed(f);
}