mirror of https://github.com/apache/lucene.git
SOLR-2200: improve DIH perf for large delta-import updates
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1029325 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8aaab49058
commit
0dd45398cf
|
@ -337,7 +337,8 @@ Optimizations
|
|||
improvement is 5%, but can be much greater (up to 10x faster) when facet.offset
|
||||
is very large (deep paging). (yonik)
|
||||
|
||||
|
||||
* SOLR-2200: Improve the performance of DataImportHandler for large delta-import
|
||||
updates. (Mark Waddle via rmuir)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
|
|
@ -841,7 +841,7 @@ public class DocBuilder {
|
|||
}
|
||||
// identifying the modified rows for this entity
|
||||
|
||||
Set<Map<String, Object>> deltaSet = new HashSet<Map<String, Object>>();
|
||||
Map<String, Map<String, Object>> deltaSet = new HashMap<String, Map<String, Object>>();
|
||||
LOG.info("Running ModifiedRowKey() for Entity: " + entity.name);
|
||||
//get the modified rows in this entity
|
||||
while (true) {
|
||||
|
@ -850,7 +850,7 @@ public class DocBuilder {
|
|||
if (row == null)
|
||||
break;
|
||||
|
||||
deltaSet.add(row);
|
||||
deltaSet.put(row.get(entity.getPk()).toString(), row);
|
||||
importStatistics.rowsCount.incrementAndGet();
|
||||
// check for abort
|
||||
if (stop.get())
|
||||
|
@ -858,33 +858,29 @@ public class DocBuilder {
|
|||
}
|
||||
//get the deleted rows for this entity
|
||||
Set<Map<String, Object>> deletedSet = new HashSet<Map<String, Object>>();
|
||||
Set<Map<String, Object>> deltaRemoveSet = new HashSet<Map<String, Object>>();
|
||||
while (true) {
|
||||
Map<String, Object> row = entityProcessor.nextDeletedRowKey();
|
||||
if (row == null)
|
||||
break;
|
||||
|
||||
//Check to see if this delete is in the current delta set
|
||||
for (Map<String, Object> modifiedRow : deltaSet) {
|
||||
if (modifiedRow.get(entity.getPk()).equals(row.get(entity.getPk()))) {
|
||||
deltaRemoveSet.add(modifiedRow);
|
||||
}
|
||||
deletedSet.add(row);
|
||||
|
||||
// Remove deleted rows from the delta rows
|
||||
String deletedRowPk = row.get(entity.getPk()).toString();
|
||||
if (deltaSet.containsKey(deletedRowPk)) {
|
||||
deltaSet.remove(deletedRowPk);
|
||||
}
|
||||
|
||||
deletedSet.add(row);
|
||||
importStatistics.rowsCount.incrementAndGet();
|
||||
// check for abort
|
||||
if (stop.get())
|
||||
return new HashSet();
|
||||
}
|
||||
|
||||
//asymmetric Set difference
|
||||
deltaSet.removeAll(deltaRemoveSet);
|
||||
|
||||
LOG.info("Completed ModifiedRowKey for Entity: " + entity.name + " rows obtained : " + deltaSet.size());
|
||||
LOG.info("Completed DeletedRowKey for Entity: " + entity.name + " rows obtained : " + deletedSet.size());
|
||||
|
||||
myModifiedPks.addAll(deltaSet);
|
||||
myModifiedPks.addAll(deltaSet.values());
|
||||
Set<Map<String, Object>> parentKeyList = new HashSet<Map<String, Object>>();
|
||||
//all that we have captured is useless (in a sub-entity) if no rows in the parent is modified because of these
|
||||
//propogate up the changes in the chain
|
||||
|
|
Loading…
Reference in New Issue