From c5118151102a36851ac5430332617c2ca389c6d2 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Wed, 12 Apr 2006 21:27:06 +0000 Subject: [PATCH] fix delete-by-id when indexed id != readable id git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@393619 13f79535-47bb-0310-9956-ffa450edef68 --- src/java/org/apache/solr/core/SolrCore.java | 2 +- .../org/apache/solr/schema/FieldType.java | 11 +++++ .../apache/solr/update/AddUpdateCommand.java | 7 ++- .../solr/update/DeleteUpdateCommand.java | 4 +- .../solr/update/DirectUpdateHandler.java | 45 ++++++++++--------- .../solr/update/DirectUpdateHandler2.java | 26 +++++------ .../org/apache/solr/update/UpdateHandler.java | 18 +++++--- 7 files changed, 68 insertions(+), 45 deletions(-) diff --git a/src/java/org/apache/solr/core/SolrCore.java b/src/java/org/apache/solr/core/SolrCore.java index 9c98df17e02..9f5b923f10b 100644 --- a/src/java/org/apache/solr/core/SolrCore.java +++ b/src/java/org/apache/solr/core/SolrCore.java @@ -670,7 +670,7 @@ public final class SolrCore { // that there are multiple docs in the add... so make sure that // objects can handle that. - cmd.id = null; // reset the id for this add + cmd.indexedId = null; // reset the id for this add if (eventType !=0) { eventType=xpp.getEventType(); diff --git a/src/java/org/apache/solr/schema/FieldType.java b/src/java/org/apache/solr/schema/FieldType.java index e2f47f574e8..9952587f9f9 100644 --- a/src/java/org/apache/solr/schema/FieldType.java +++ b/src/java/org/apache/solr/schema/FieldType.java @@ -177,6 +177,17 @@ public abstract class FieldType extends FieldProperties { return indexedForm; } + public String storedToReadable(Field f) { + return toExternal(f); + } + + public String storedToIndexed(Field f) { + // right now, the transformation of single valued fields like SortableInt + // is done when the Field is created, not at analysis time... this means + // that the indexed form is the same as the stored field form. + return f.stringValue(); + } + /********* // default analyzer for non-text fields. diff --git a/src/java/org/apache/solr/update/AddUpdateCommand.java b/src/java/org/apache/solr/update/AddUpdateCommand.java index cca5f07182e..3f009e6613f 100644 --- a/src/java/org/apache/solr/update/AddUpdateCommand.java +++ b/src/java/org/apache/solr/update/AddUpdateCommand.java @@ -22,7 +22,10 @@ import org.apache.lucene.document.Document; * @version $Id$ */ public class AddUpdateCommand extends UpdateCommand { - public String id; + // optional id in "internal" indexed form... if it is needed and not supplied, + // it will be obtained from the doc. + public String indexedId; + public Document doc; public boolean allowDups; public boolean overwritePending; @@ -35,7 +38,7 @@ public class AddUpdateCommand extends UpdateCommand { public String toString() { StringBuilder sb = new StringBuilder(commandName); sb.append(':'); - if (id!=null) sb.append("id=").append(id); + if (indexedId !=null) sb.append("id=").append(indexedId); sb.append(",allowDups=").append(allowDups); sb.append(",overwritePending=").append(overwritePending); sb.append(",overwriteCommitted=").append(overwriteCommitted); diff --git a/src/java/org/apache/solr/update/DeleteUpdateCommand.java b/src/java/org/apache/solr/update/DeleteUpdateCommand.java index 7eeb875b6e5..b13d69d0741 100644 --- a/src/java/org/apache/solr/update/DeleteUpdateCommand.java +++ b/src/java/org/apache/solr/update/DeleteUpdateCommand.java @@ -20,8 +20,8 @@ package org.apache.solr.update; * @version $Id$ */ public class DeleteUpdateCommand extends UpdateCommand { - public String id; - public String query; + public String id; // external (printable) id, for delete-by-id + public String query; // query string for delete-by-query public boolean fromPending; public boolean fromCommitted; diff --git a/src/java/org/apache/solr/update/DirectUpdateHandler.java b/src/java/org/apache/solr/update/DirectUpdateHandler.java index 938e2b01196..fbe1b5f445e 100644 --- a/src/java/org/apache/solr/update/DirectUpdateHandler.java +++ b/src/java/org/apache/solr/update/DirectUpdateHandler.java @@ -23,6 +23,7 @@ package org.apache.solr.update; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.Term; import org.apache.lucene.document.Document; import org.apache.lucene.search.Query; @@ -105,15 +106,16 @@ public class DirectUpdateHandler extends UpdateHandler { writer.addDocument(doc); } - protected boolean existsInIndex(String id) throws IOException { + protected boolean existsInIndex(String indexedId) throws IOException { if (idField == null) throw new SolrException(2,"Operation requires schema to have a unique key field"); - closeWriter(); openSearcher(); + closeWriter(); + openSearcher(); IndexReader ir = searcher.getReader(); TermDocs tdocs = null; boolean exists=false; try { - tdocs = ir.termDocs(idTerm(id)); + tdocs = ir.termDocs(idTerm(indexedId)); if (tdocs.next()) exists=true; } finally { try { if (tdocs != null) tdocs.close(); } catch (Exception e) {} @@ -122,7 +124,7 @@ public class DirectUpdateHandler extends UpdateHandler { } - protected int deleteInIndex(String id) throws IOException { + protected int deleteInIndex(String indexedId) throws IOException { if (idField == null) throw new SolrException(2,"Operation requires schema to have a unique key field"); closeWriter(); openSearcher(); @@ -130,9 +132,10 @@ public class DirectUpdateHandler extends UpdateHandler { TermDocs tdocs = null; int num=0; try { - num = ir.deleteDocuments(idTerm(id)); + Term term = new Term(idField.getName(), indexedId); + num = ir.deleteDocuments(term); if (SolrCore.log.isLoggable(Level.FINEST)) { - SolrCore.log.finest("deleted " + num + " docs matching id " + id); + SolrCore.log.finest("deleted " + num + " docs matching id " + idFieldType.indexedToReadable(indexedId)); } } finally { try { if (tdocs != null) tdocs.close(); } catch (Exception e) {} @@ -140,9 +143,9 @@ public class DirectUpdateHandler extends UpdateHandler { return num; } - protected void overwrite(String id, Document doc) throws IOException { - if (id==null) id=getId(doc); - deleteInIndex(id); + protected void overwrite(String indexedId, Document doc) throws IOException { + if (indexedId ==null) indexedId =getIndexedId(doc); + deleteInIndex(indexedId); doAdd(doc); } @@ -171,10 +174,10 @@ public class DirectUpdateHandler extends UpdateHandler { throw new SolrException(400,"meaningless command: " + cmd); if (!cmd.fromPending || !cmd.fromCommitted) throw new SolrException(400,"operation not supported" + cmd); - + String indexedId = idFieldType.toInternal(cmd.id); synchronized(this) { - deleteInIndex(cmd.id); - pset.remove(cmd.id); + deleteInIndex(indexedId); + pset.remove(indexedId); } } @@ -265,26 +268,26 @@ public class DirectUpdateHandler extends UpdateHandler { /////////////////////////////////////////////////////////////////// protected int addNoOverwriteNoDups(AddUpdateCommand cmd) throws IOException { - if (cmd.id==null) { - cmd.id=getId(cmd.doc); + if (cmd.indexedId ==null) { + cmd.indexedId =getIndexedId(cmd.doc); } synchronized (this) { - if (existsInIndex(cmd.id)) return 0; + if (existsInIndex(cmd.indexedId)) return 0; doAdd(cmd.doc); } return 1; } protected int addConditionally(AddUpdateCommand cmd) throws IOException { - if (cmd.id==null) { - cmd.id=getId(cmd.doc); + if (cmd.indexedId ==null) { + cmd.indexedId =getIndexedId(cmd.doc); } synchronized(this) { - if (pset.contains(cmd.id)) return 0; + if (pset.contains(cmd.indexedId)) return 0; // since case 001 is currently the only case to use pset, only add // to it in that instance. - pset.add(cmd.id); - overwrite(cmd.id,cmd.doc); + pset.add(cmd.indexedId); + overwrite(cmd.indexedId,cmd.doc); return 1; } } @@ -292,7 +295,7 @@ public class DirectUpdateHandler extends UpdateHandler { // overwrite both pending and committed protected synchronized int overwriteBoth(AddUpdateCommand cmd) throws IOException { - overwrite(cmd.id, cmd.doc); + overwrite(cmd.indexedId, cmd.doc); return 1; } diff --git a/src/java/org/apache/solr/update/DirectUpdateHandler2.java b/src/java/org/apache/solr/update/DirectUpdateHandler2.java index 690acce6dfd..5e3375cdc4e 100644 --- a/src/java/org/apache/solr/update/DirectUpdateHandler2.java +++ b/src/java/org/apache/solr/update/DirectUpdateHandler2.java @@ -246,7 +246,7 @@ public class DirectUpdateHandler2 extends UpdateHandler { } synchronized(this) { - pset.put(cmd.id, ZERO); + pset.put(idFieldType.toInternal(cmd.id), ZERO); } } @@ -307,16 +307,16 @@ public class DirectUpdateHandler2 extends UpdateHandler { protected int addConditionally(AddUpdateCommand cmd) throws IOException { - if (cmd.id==null) { - cmd.id=getId(cmd.doc); + if (cmd.indexedId ==null) { + cmd.indexedId =getIndexedId(cmd.doc); } synchronized(this) { - Integer saveCount = pset.get(cmd.id); + Integer saveCount = pset.get(cmd.indexedId); if (saveCount!=null && saveCount!=0) { // a doc with this id already exists in the pending set return 0; } - pset.put(cmd.id, ONE); + pset.put(cmd.indexedId, ONE); doAdd(cmd.doc); return 1; } @@ -325,11 +325,11 @@ public class DirectUpdateHandler2 extends UpdateHandler { // overwrite both pending and committed protected synchronized int overwriteBoth(AddUpdateCommand cmd) throws IOException { - if (cmd.id==null) { - cmd.id=getId(cmd.doc); + if (cmd.indexedId ==null) { + cmd.indexedId =getIndexedId(cmd.doc); } synchronized (this) { - pset.put(cmd.id, ONE); + pset.put(cmd.indexedId, ONE); doAdd(cmd.doc); } return 1; @@ -338,14 +338,14 @@ public class DirectUpdateHandler2 extends UpdateHandler { // add without checking protected synchronized int allowDups(AddUpdateCommand cmd) throws IOException { - if (cmd.id==null) { - cmd.id=getOptId(cmd.doc); + if (cmd.indexedId ==null) { + cmd.indexedId =getIndexedIdOptional(cmd.doc); } synchronized(this) { doAdd(cmd.doc); - if (cmd.id != null) { - Integer saveCount = pset.get(cmd.id); + if (cmd.indexedId != null) { + Integer saveCount = pset.get(cmd.indexedId); // if there weren't any docs marked for deletion before, then don't mark // any for deletion now. @@ -358,7 +358,7 @@ public class DirectUpdateHandler2 extends UpdateHandler { if (saveCount == ZERO) saveCount=ONE; else saveCount++; - pset.put(cmd.id, saveCount); + pset.put(cmd.indexedId, saveCount); } } return 1; diff --git a/src/java/org/apache/solr/update/UpdateHandler.java b/src/java/org/apache/solr/update/UpdateHandler.java index bc92cfa41dc..398f05506ed 100644 --- a/src/java/org/apache/solr/update/UpdateHandler.java +++ b/src/java/org/apache/solr/update/UpdateHandler.java @@ -19,6 +19,7 @@ package org.apache.solr.update; import org.apache.lucene.index.Term; import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; import org.apache.lucene.search.HitCollector; import org.w3c.dom.NodeList; import org.w3c.dom.Node; @@ -118,22 +119,27 @@ public abstract class UpdateHandler implements SolrInfoMBean { return writer; } - protected final Term idTerm(String id) { + protected final Term idTerm(String readableId) { // to correctly create the Term, the string needs to be run // through the Analyzer for that field. - return new Term(idField.getName(), idFieldType.toInternal(id)); + return new Term(idField.getName(), idFieldType.toInternal(readableId)); } - protected final String getId(Document doc) { + protected final String getIndexedId(Document doc) { if (idField == null) throw new SolrException(400,"Operation requires schema to have a unique key field"); - String id = doc.get(idField.getName()); + // Right now, single valued fields that require value transformation from external to internal (indexed) + // form have that transformation already performed and stored as the field value. + // This means + String id = idFieldType.storedToIndexed(doc.getField(idField.getName())); if (id == null) throw new SolrException(400,"Document is missing uniqueKey field " + idField.getName()); return id; } - protected final String getOptId(Document doc) { + protected final String getIndexedIdOptional(Document doc) { if (idField == null) return null; - return doc.get(idField.getName()); + Field f = doc.getField(idField.getName()); + if (f == null) return null; + return idFieldType.storedToIndexed(f); }