SOLR-2200: improve DIH perf for large delta-import updates

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1029325 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-10-31 12:45:07 +00:00
parent 8aaab49058
commit 0dd45398cf
2 changed files with 11 additions and 14 deletions

View File

@ -337,7 +337,8 @@ Optimizations
improvement is 5%, but can be much greater (up to 10x faster) when facet.offset
is very large (deep paging). (yonik)
* SOLR-2200: Improve the performance of DataImportHandler for large delta-import
updates. (Mark Waddle via rmuir)
Bug Fixes
----------------------

View File

@ -841,7 +841,7 @@ public class DocBuilder {
}
// identifying the modified rows for this entity
Set<Map<String, Object>> deltaSet = new HashSet<Map<String, Object>>();
Map<String, Map<String, Object>> deltaSet = new HashMap<String, Map<String, Object>>();
LOG.info("Running ModifiedRowKey() for Entity: " + entity.name);
//get the modified rows in this entity
while (true) {
@ -850,7 +850,7 @@ public class DocBuilder {
if (row == null)
break;
deltaSet.add(row);
deltaSet.put(row.get(entity.getPk()).toString(), row);
importStatistics.rowsCount.incrementAndGet();
// check for abort
if (stop.get())
@ -858,33 +858,29 @@ public class DocBuilder {
}
//get the deleted rows for this entity
Set<Map<String, Object>> deletedSet = new HashSet<Map<String, Object>>();
Set<Map<String, Object>> deltaRemoveSet = new HashSet<Map<String, Object>>();
while (true) {
Map<String, Object> row = entityProcessor.nextDeletedRowKey();
if (row == null)
break;
//Check to see if this delete is in the current delta set
for (Map<String, Object> modifiedRow : deltaSet) {
if (modifiedRow.get(entity.getPk()).equals(row.get(entity.getPk()))) {
deltaRemoveSet.add(modifiedRow);
}
deletedSet.add(row);
// Remove deleted rows from the delta rows
String deletedRowPk = row.get(entity.getPk()).toString();
if (deltaSet.containsKey(deletedRowPk)) {
deltaSet.remove(deletedRowPk);
}
deletedSet.add(row);
importStatistics.rowsCount.incrementAndGet();
// check for abort
if (stop.get())
return new HashSet();
}
//asymmetric Set difference
deltaSet.removeAll(deltaRemoveSet);
LOG.info("Completed ModifiedRowKey for Entity: " + entity.name + " rows obtained : " + deltaSet.size());
LOG.info("Completed DeletedRowKey for Entity: " + entity.name + " rows obtained : " + deletedSet.size());
myModifiedPks.addAll(deltaSet);
myModifiedPks.addAll(deltaSet.values());
Set<Map<String, Object>> parentKeyList = new HashSet<Map<String, Object>>();
//all that we have captured is useless (in a sub-entity) if no rows in the parent is modified because of these
//propogate up the changes in the chain