SOLR-1059 -- New methods in Context and special flags introduced for deleting documents, skipping rows and stopping transforms

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@755897 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shalin Shekhar Mangar 2009-03-19 10:27:33 +00:00
parent 54819167be
commit 766c0e6c7d
8 changed files with 164 additions and 29 deletions

View File

@ -100,6 +100,12 @@ New Features
23.SOLR-1061: Improve RegexTransformer to create multiple columns from regex groups.
(Noble Paul via shalin)
24.SOLR-1059: Special flags introduced for deleting documents by query or id, skipping rows and stopping further
transforms. Use $deleteDocById, $deleteDocByQuery for deleting by id and query respectively.
Use $skipRow to skip the current row but continue with the document. Use $stopTransform to stop
further transformers. New methods are introduced in Context for deleting by id and query.
(Noble Paul, Fergus McMenemie, shalin)
Optimizations
----------------------
1. SOLR-846: Reduce memory consumption during delta import by removing keys when used

View File

@ -183,5 +183,13 @@ public abstract class AbstractDataImportHandlerTest extends
public String getScriptLanguage() {
return scriptlang == null ? delegate.getScriptLanguage() : scriptlang;
}
public void deleteDoc(String id) {
}
public void deleteDocByQuery(String query) {
}
}
}

View File

@ -190,4 +190,16 @@ public abstract class Context {
* Returns the language of the script as specified in the script tag in data-config.xml
*/
public abstract String getScriptLanguage();
/**delete a document by id
* @param id
*/
public abstract void deleteDoc(String id);
/**delete documents by query
* @param query
*/
public abstract void deleteDocByQuery(String query);
}

View File

@ -195,4 +195,16 @@ public class ContextImpl extends Context {
}
return null;
}
public void deleteDoc(String id) {
if(docBuilder != null){
docBuilder.writer.deleteDoc(id);
}
}
public void deleteDocByQuery(String query) {
if(docBuilder != null){
docBuilder.writer.deleteByQuery(query);
}
}
}

View File

@ -104,16 +104,11 @@ public class DataImporter {
for (DataConfig.Entity e : config.document.entities) {
Map<String, DataConfig.Field> fields = new HashMap<String, DataConfig.Field>();
initEntity(e, fields, false);
String errs = verifyWithSchema(fields);
if (errs != null) {
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE, errs);
}
verifyWithSchema(fields);
}
}
private String verifyWithSchema(Map<String, DataConfig.Field> fields) {
List<String> errors = new ArrayList<String>();
private void verifyWithSchema(Map<String, DataConfig.Field> fields) {
Map<String, SchemaField> schemaFields = schema.getFields();
for (Map.Entry<String, SchemaField> entry : schemaFields.entrySet()) {
SchemaField sf = entry.getValue();
@ -131,21 +126,11 @@ public class DataImporter {
if (field == null) {
field = config.lowerNameVsSchemaField.get(fld.getName().toLowerCase());
if (field == null) {
errors.add("The field :" + fld.getName() + " present in DataConfig does not have a counterpart in Solr Schema");
LOG.info("The field :" + fld.getName() + " present in DataConfig does not have a counterpart in Solr Schema");
}
}
}
if (!errors.isEmpty()) {
StringBuilder sb = new StringBuilder("There are errors in the Schema\n");
for (String error : errors) {
sb.append(error).append("\n");
}
return sb.toString();
}
return null;
}
/**

View File

@ -370,10 +370,12 @@ public class DocBuilder {
if (entity.isDocRoot) {
if (stop.get())
return;
boolean result = writer.upload(doc);
doc = null;
if (result)
importStatistics.docCount.incrementAndGet();
if (!doc.isEmpty()) {
boolean result = writer.upload(doc);
doc = null;
if (result)
importStatistics.docCount.incrementAndGet();
}
}
} catch (DataImportHandlerException e) {
@ -424,9 +426,32 @@ public class DocBuilder {
@SuppressWarnings("unchecked")
private void addFields(DataConfig.Entity entity, SolrInputDocument doc, Map<String, Object> arow) {
Object s = arow.get("$skipRow");
if (s != null && Boolean.parseBoolean(s.toString())) {
return;
}
for (Map.Entry<String, Object> entry : arow.entrySet()) {
String key = entry.getKey();
Object value = entry.getValue();
if (value == null) continue;
if (key.startsWith("$")) {
if ("$deleteDocById".equals(key)) {
if (value instanceof Collection) {
Collection collection = (Collection) value;
for (Object o : collection) {
writer.deleteDoc(o.toString());
}
} else {
writer.deleteDoc(value);
}
}
if ("$deleteDocByQuery".equals(key)) {
writer.deleteByQuery(entry.getValue().toString());
}
if ("$skipDoc".equals(key) && Boolean.parseBoolean(value.toString())) {
throw new DataImportHandlerException(DataImportHandlerException.SKIP,
"Document skipped :" + arow);
}
// All fields starting with $ are special values and don't need to be added
continue;
}

View File

@ -160,7 +160,9 @@ public class EntityProcessorBase extends EntityProcessor {
return row;
Map<String, Object> transformedRow = row;
List<Map<String, Object>> rows = null;
boolean stopTransform = checkStopTransform(row);
for (Transformer t : transformers) {
if(stopTransform) break;
try {
if (rows != null) {
List<Map<String, Object>> tmpRows = new ArrayList<Map<String, Object>>();
@ -171,7 +173,7 @@ public class EntityProcessorBase extends EntityProcessor {
continue;
if (o instanceof Map) {
Map oMap = (Map) o;
checkSkipDoc(oMap, t);
stopTransform = checkStopTransform(oMap);
tmpRows.add((Map) o);
} else if (o instanceof List) {
tmpRows.addAll((List) o);
@ -187,7 +189,7 @@ public class EntityProcessorBase extends EntityProcessor {
return null;
if (o instanceof Map) {
Map oMap = (Map) o;
checkSkipDoc(oMap, t);
stopTransform = checkStopTransform(oMap);
transformedRow = (Map) o;
} else if (o instanceof List) {
rows = (List) o;
@ -214,11 +216,9 @@ public class EntityProcessorBase extends EntityProcessor {
}
private void checkSkipDoc(Map oMap, Transformer t) {
if (oMap.get(SKIP_DOC) != null
&& Boolean.parseBoolean(oMap.get(SKIP_DOC).toString()))
throw new DataImportHandlerException(DataImportHandlerException.SKIP,
"Document skipped by: " + DebugLogger.getTransformerName(t));
private boolean checkStopTransform(Map oMap) {
return oMap.get("$stopTransform") != null
&& Boolean.parseBoolean(oMap.get("$stopTransform").toString());
}
protected Map<String, Object> getNext() {

View File

@ -120,6 +120,70 @@ public class TestDocBuilder2 extends AbstractDataImportHandlerTest {
super.runFullImport(loadDataConfig("data-config-with-transformer.xml"));
}
@Test
@SuppressWarnings("unchecked")
public void testSkipDoc() throws Exception {
List rows = new ArrayList();
rows.add(createMap("id", "1", "desc", "one"));
rows.add(createMap("id", "2", "desc", "two", "$skipDoc", "true"));
MockDataSource.setIterator("select * from x", rows.iterator());
super.runFullImport(dataConfigWithDynamicTransformer);
assertQ(req("id:1"), "//*[@numFound='1']");
assertQ(req("id:2"), "//*[@numFound='0']");
}
@Test
@SuppressWarnings("unchecked")
public void testSkipRow() throws Exception {
List rows = new ArrayList();
rows.add(createMap("id", "1", "desc", "one"));
rows.add(createMap("id", "2", "desc", "two", "$skipRow", "true"));
MockDataSource.setIterator("select * from x", rows.iterator());
super.runFullImport(dataConfigWithDynamicTransformer);
assertQ(req("id:1"), "//*[@numFound='1']");
assertQ(req("id:2"), "//*[@numFound='0']");
MockDataSource.clearCache();
rows = new ArrayList();
rows.add(createMap("id", "3", "desc", "one"));
rows.add(createMap("id", "4", "desc", "two"));
MockDataSource.setIterator("select * from x", rows.iterator());
rows = new ArrayList();
rows.add(createMap("name_s", "abcd"));
MockDataSource.setIterator("3", rows.iterator());
rows = new ArrayList();
rows.add(createMap("name_s", "xyz", "$skipRow", "true"));
MockDataSource.setIterator("4", rows.iterator());
super.runFullImport(dataConfigWithTwoEntities);
assertQ(req("id:3"), "//*[@numFound='1']");
assertQ(req("id:4"), "//*[@numFound='1']");
assertQ(req("name_s:abcd"), "//*[@numFound='1']");
assertQ(req("name_s:xyz"), "//*[@numFound='0']");
}
@Test
@SuppressWarnings("unchecked")
public void testStopTransform() throws Exception {
List rows = new ArrayList();
rows.add(createMap("id", "1", "desc", "one"));
rows.add(createMap("id", "2", "desc", "two", "$stopTransform", "true"));
MockDataSource.setIterator("select * from x", rows.iterator());
super.runFullImport(dataConfigForSkipTransform);
assertQ(req("id:1"), "//*[@numFound='1']");
assertQ(req("id:2"), "//*[@numFound='1']");
assertQ(req("name_s:xyz"), "//*[@numFound='1']");
}
public static class MockTransformer extends Transformer {
public Object transformRow(Map<String, Object> row, Context context) {
Assert.assertTrue("Context gave incorrect data source", context.getDataSource("mockDs") instanceof MockDataSource2);
@ -175,6 +239,29 @@ public class TestDocBuilder2 extends AbstractDataImportHandlerTest {
" </document>\n" +
"</dataConfig>";
private final String dataConfigForSkipTransform = "<dataConfig>\n" +
" <document>\n" +
" <entity name=\"books\" query=\"select * from x\"" +
" transformer=\"TemplateTransformer\">\n" +
" <field column=\"id\" />\n" +
" <field column=\"desc\" />\n" +
" <field column=\"name_s\" template=\"xyz\" />\n" +
" </entity>\n" +
" </document>\n" +
"</dataConfig>";
private final String dataConfigWithTwoEntities = "<dataConfig>\n" +
" <document>\n" +
" <entity name=\"books\" query=\"select * from x\">" +
" <field column=\"id\" />\n" +
" <field column=\"desc\" />\n" +
" <entity name=\"authors\" query=\"${books.id}\">" +
" <field column=\"name_s\" />" +
" </entity>" +
" </entity>\n" +
" </document>\n" +
"</dataConfig>";
private final String dataConfigWithCaseInsensitiveFields = "<dataConfig>\n" +
" <document onImportStart=\"TestDocBuilder2$StartEventListener\" onImportEnd=\"TestDocBuilder2$EndEventListener\">\n" +
" <entity name=\"books\" query=\"select * from x\">\n" +