mirror of
https://github.com/apache/lucene.git
synced 2025-02-12 13:05:29 +00:00
SOLR-2200: improve DIH perf for large delta-import updates
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1029325 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8aaab49058
commit
0dd45398cf
@ -337,7 +337,8 @@ Optimizations
|
|||||||
improvement is 5%, but can be much greater (up to 10x faster) when facet.offset
|
improvement is 5%, but can be much greater (up to 10x faster) when facet.offset
|
||||||
is very large (deep paging). (yonik)
|
is very large (deep paging). (yonik)
|
||||||
|
|
||||||
|
* SOLR-2200: Improve the performance of DataImportHandler for large delta-import
|
||||||
|
updates. (Mark Waddle via rmuir)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
----------------------
|
----------------------
|
||||||
|
@ -841,7 +841,7 @@ public class DocBuilder {
|
|||||||
}
|
}
|
||||||
// identifying the modified rows for this entity
|
// identifying the modified rows for this entity
|
||||||
|
|
||||||
Set<Map<String, Object>> deltaSet = new HashSet<Map<String, Object>>();
|
Map<String, Map<String, Object>> deltaSet = new HashMap<String, Map<String, Object>>();
|
||||||
LOG.info("Running ModifiedRowKey() for Entity: " + entity.name);
|
LOG.info("Running ModifiedRowKey() for Entity: " + entity.name);
|
||||||
//get the modified rows in this entity
|
//get the modified rows in this entity
|
||||||
while (true) {
|
while (true) {
|
||||||
@ -850,7 +850,7 @@ public class DocBuilder {
|
|||||||
if (row == null)
|
if (row == null)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
deltaSet.add(row);
|
deltaSet.put(row.get(entity.getPk()).toString(), row);
|
||||||
importStatistics.rowsCount.incrementAndGet();
|
importStatistics.rowsCount.incrementAndGet();
|
||||||
// check for abort
|
// check for abort
|
||||||
if (stop.get())
|
if (stop.get())
|
||||||
@ -858,33 +858,29 @@ public class DocBuilder {
|
|||||||
}
|
}
|
||||||
//get the deleted rows for this entity
|
//get the deleted rows for this entity
|
||||||
Set<Map<String, Object>> deletedSet = new HashSet<Map<String, Object>>();
|
Set<Map<String, Object>> deletedSet = new HashSet<Map<String, Object>>();
|
||||||
Set<Map<String, Object>> deltaRemoveSet = new HashSet<Map<String, Object>>();
|
|
||||||
while (true) {
|
while (true) {
|
||||||
Map<String, Object> row = entityProcessor.nextDeletedRowKey();
|
Map<String, Object> row = entityProcessor.nextDeletedRowKey();
|
||||||
if (row == null)
|
if (row == null)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
//Check to see if this delete is in the current delta set
|
deletedSet.add(row);
|
||||||
for (Map<String, Object> modifiedRow : deltaSet) {
|
|
||||||
if (modifiedRow.get(entity.getPk()).equals(row.get(entity.getPk()))) {
|
// Remove deleted rows from the delta rows
|
||||||
deltaRemoveSet.add(modifiedRow);
|
String deletedRowPk = row.get(entity.getPk()).toString();
|
||||||
}
|
if (deltaSet.containsKey(deletedRowPk)) {
|
||||||
|
deltaSet.remove(deletedRowPk);
|
||||||
}
|
}
|
||||||
|
|
||||||
deletedSet.add(row);
|
|
||||||
importStatistics.rowsCount.incrementAndGet();
|
importStatistics.rowsCount.incrementAndGet();
|
||||||
// check for abort
|
// check for abort
|
||||||
if (stop.get())
|
if (stop.get())
|
||||||
return new HashSet();
|
return new HashSet();
|
||||||
}
|
}
|
||||||
|
|
||||||
//asymmetric Set difference
|
|
||||||
deltaSet.removeAll(deltaRemoveSet);
|
|
||||||
|
|
||||||
LOG.info("Completed ModifiedRowKey for Entity: " + entity.name + " rows obtained : " + deltaSet.size());
|
LOG.info("Completed ModifiedRowKey for Entity: " + entity.name + " rows obtained : " + deltaSet.size());
|
||||||
LOG.info("Completed DeletedRowKey for Entity: " + entity.name + " rows obtained : " + deletedSet.size());
|
LOG.info("Completed DeletedRowKey for Entity: " + entity.name + " rows obtained : " + deletedSet.size());
|
||||||
|
|
||||||
myModifiedPks.addAll(deltaSet);
|
myModifiedPks.addAll(deltaSet.values());
|
||||||
Set<Map<String, Object>> parentKeyList = new HashSet<Map<String, Object>>();
|
Set<Map<String, Object>> parentKeyList = new HashSet<Map<String, Object>>();
|
||||||
//all that we have captured is useless (in a sub-entity) if no rows in the parent is modified because of these
|
//all that we have captured is useless (in a sub-entity) if no rows in the parent is modified because of these
|
||||||
//propogate up the changes in the chain
|
//propogate up the changes in the chain
|
||||||
|
Loading…
x
Reference in New Issue
Block a user