mirror of https://github.com/apache/lucene.git
SOLR-1059 -- New methods in Context and special flags introduced for deleting documents, skipping rows and stopping transforms
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@755897 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
54819167be
commit
766c0e6c7d
|
@ -100,6 +100,12 @@ New Features
|
|||
23.SOLR-1061: Improve RegexTransformer to create multiple columns from regex groups.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
24.SOLR-1059: Special flags introduced for deleting documents by query or id, skipping rows and stopping further
|
||||
transforms. Use $deleteDocById, $deleteDocByQuery for deleting by id and query respectively.
|
||||
Use $skipRow to skip the current row but continue with the document. Use $stopTransform to stop
|
||||
further transformers. New methods are introduced in Context for deleting by id and query.
|
||||
(Noble Paul, Fergus McMenemie, shalin)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
1. SOLR-846: Reduce memory consumption during delta import by removing keys when used
|
||||
|
|
|
@ -183,5 +183,13 @@ public abstract class AbstractDataImportHandlerTest extends
|
|||
public String getScriptLanguage() {
|
||||
return scriptlang == null ? delegate.getScriptLanguage() : scriptlang;
|
||||
}
|
||||
|
||||
public void deleteDoc(String id) {
|
||||
|
||||
}
|
||||
|
||||
public void deleteDocByQuery(String query) {
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -190,4 +190,16 @@ public abstract class Context {
|
|||
* Returns the language of the script as specified in the script tag in data-config.xml
|
||||
*/
|
||||
public abstract String getScriptLanguage();
|
||||
|
||||
/**delete a document by id
|
||||
* @param id
|
||||
*/
|
||||
public abstract void deleteDoc(String id);
|
||||
|
||||
/**delete documents by query
|
||||
* @param query
|
||||
*/
|
||||
public abstract void deleteDocByQuery(String query);
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -195,4 +195,16 @@ public class ContextImpl extends Context {
|
|||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public void deleteDoc(String id) {
|
||||
if(docBuilder != null){
|
||||
docBuilder.writer.deleteDoc(id);
|
||||
}
|
||||
}
|
||||
|
||||
public void deleteDocByQuery(String query) {
|
||||
if(docBuilder != null){
|
||||
docBuilder.writer.deleteByQuery(query);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -104,16 +104,11 @@ public class DataImporter {
|
|||
for (DataConfig.Entity e : config.document.entities) {
|
||||
Map<String, DataConfig.Field> fields = new HashMap<String, DataConfig.Field>();
|
||||
initEntity(e, fields, false);
|
||||
String errs = verifyWithSchema(fields);
|
||||
if (errs != null) {
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE, errs);
|
||||
}
|
||||
verifyWithSchema(fields);
|
||||
}
|
||||
}
|
||||
|
||||
private String verifyWithSchema(Map<String, DataConfig.Field> fields) {
|
||||
List<String> errors = new ArrayList<String>();
|
||||
private void verifyWithSchema(Map<String, DataConfig.Field> fields) {
|
||||
Map<String, SchemaField> schemaFields = schema.getFields();
|
||||
for (Map.Entry<String, SchemaField> entry : schemaFields.entrySet()) {
|
||||
SchemaField sf = entry.getValue();
|
||||
|
@ -131,21 +126,11 @@ public class DataImporter {
|
|||
if (field == null) {
|
||||
field = config.lowerNameVsSchemaField.get(fld.getName().toLowerCase());
|
||||
if (field == null) {
|
||||
errors.add("The field :" + fld.getName() + " present in DataConfig does not have a counterpart in Solr Schema");
|
||||
LOG.info("The field :" + fld.getName() + " present in DataConfig does not have a counterpart in Solr Schema");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!errors.isEmpty()) {
|
||||
StringBuilder sb = new StringBuilder("There are errors in the Schema\n");
|
||||
for (String error : errors) {
|
||||
sb.append(error).append("\n");
|
||||
}
|
||||
return sb.toString();
|
||||
|
||||
}
|
||||
return null;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -370,10 +370,12 @@ public class DocBuilder {
|
|||
if (entity.isDocRoot) {
|
||||
if (stop.get())
|
||||
return;
|
||||
boolean result = writer.upload(doc);
|
||||
doc = null;
|
||||
if (result)
|
||||
importStatistics.docCount.incrementAndGet();
|
||||
if (!doc.isEmpty()) {
|
||||
boolean result = writer.upload(doc);
|
||||
doc = null;
|
||||
if (result)
|
||||
importStatistics.docCount.incrementAndGet();
|
||||
}
|
||||
}
|
||||
|
||||
} catch (DataImportHandlerException e) {
|
||||
|
@ -424,9 +426,32 @@ public class DocBuilder {
|
|||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void addFields(DataConfig.Entity entity, SolrInputDocument doc, Map<String, Object> arow) {
|
||||
Object s = arow.get("$skipRow");
|
||||
if (s != null && Boolean.parseBoolean(s.toString())) {
|
||||
return;
|
||||
}
|
||||
for (Map.Entry<String, Object> entry : arow.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
Object value = entry.getValue();
|
||||
if (value == null) continue;
|
||||
if (key.startsWith("$")) {
|
||||
if ("$deleteDocById".equals(key)) {
|
||||
if (value instanceof Collection) {
|
||||
Collection collection = (Collection) value;
|
||||
for (Object o : collection) {
|
||||
writer.deleteDoc(o.toString());
|
||||
}
|
||||
} else {
|
||||
writer.deleteDoc(value);
|
||||
}
|
||||
}
|
||||
if ("$deleteDocByQuery".equals(key)) {
|
||||
writer.deleteByQuery(entry.getValue().toString());
|
||||
}
|
||||
if ("$skipDoc".equals(key) && Boolean.parseBoolean(value.toString())) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SKIP,
|
||||
"Document skipped :" + arow);
|
||||
}
|
||||
// All fields starting with $ are special values and don't need to be added
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -160,7 +160,9 @@ public class EntityProcessorBase extends EntityProcessor {
|
|||
return row;
|
||||
Map<String, Object> transformedRow = row;
|
||||
List<Map<String, Object>> rows = null;
|
||||
boolean stopTransform = checkStopTransform(row);
|
||||
for (Transformer t : transformers) {
|
||||
if(stopTransform) break;
|
||||
try {
|
||||
if (rows != null) {
|
||||
List<Map<String, Object>> tmpRows = new ArrayList<Map<String, Object>>();
|
||||
|
@ -171,7 +173,7 @@ public class EntityProcessorBase extends EntityProcessor {
|
|||
continue;
|
||||
if (o instanceof Map) {
|
||||
Map oMap = (Map) o;
|
||||
checkSkipDoc(oMap, t);
|
||||
stopTransform = checkStopTransform(oMap);
|
||||
tmpRows.add((Map) o);
|
||||
} else if (o instanceof List) {
|
||||
tmpRows.addAll((List) o);
|
||||
|
@ -187,7 +189,7 @@ public class EntityProcessorBase extends EntityProcessor {
|
|||
return null;
|
||||
if (o instanceof Map) {
|
||||
Map oMap = (Map) o;
|
||||
checkSkipDoc(oMap, t);
|
||||
stopTransform = checkStopTransform(oMap);
|
||||
transformedRow = (Map) o;
|
||||
} else if (o instanceof List) {
|
||||
rows = (List) o;
|
||||
|
@ -214,11 +216,9 @@ public class EntityProcessorBase extends EntityProcessor {
|
|||
|
||||
}
|
||||
|
||||
private void checkSkipDoc(Map oMap, Transformer t) {
|
||||
if (oMap.get(SKIP_DOC) != null
|
||||
&& Boolean.parseBoolean(oMap.get(SKIP_DOC).toString()))
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SKIP,
|
||||
"Document skipped by: " + DebugLogger.getTransformerName(t));
|
||||
private boolean checkStopTransform(Map oMap) {
|
||||
return oMap.get("$stopTransform") != null
|
||||
&& Boolean.parseBoolean(oMap.get("$stopTransform").toString());
|
||||
}
|
||||
|
||||
protected Map<String, Object> getNext() {
|
||||
|
|
|
@ -120,6 +120,70 @@ public class TestDocBuilder2 extends AbstractDataImportHandlerTest {
|
|||
super.runFullImport(loadDataConfig("data-config-with-transformer.xml"));
|
||||
}
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testSkipDoc() throws Exception {
|
||||
List rows = new ArrayList();
|
||||
rows.add(createMap("id", "1", "desc", "one"));
|
||||
rows.add(createMap("id", "2", "desc", "two", "$skipDoc", "true"));
|
||||
MockDataSource.setIterator("select * from x", rows.iterator());
|
||||
|
||||
super.runFullImport(dataConfigWithDynamicTransformer);
|
||||
|
||||
assertQ(req("id:1"), "//*[@numFound='1']");
|
||||
assertQ(req("id:2"), "//*[@numFound='0']");
|
||||
}
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testSkipRow() throws Exception {
|
||||
List rows = new ArrayList();
|
||||
rows.add(createMap("id", "1", "desc", "one"));
|
||||
rows.add(createMap("id", "2", "desc", "two", "$skipRow", "true"));
|
||||
MockDataSource.setIterator("select * from x", rows.iterator());
|
||||
|
||||
super.runFullImport(dataConfigWithDynamicTransformer);
|
||||
|
||||
assertQ(req("id:1"), "//*[@numFound='1']");
|
||||
assertQ(req("id:2"), "//*[@numFound='0']");
|
||||
|
||||
MockDataSource.clearCache();
|
||||
|
||||
rows = new ArrayList();
|
||||
rows.add(createMap("id", "3", "desc", "one"));
|
||||
rows.add(createMap("id", "4", "desc", "two"));
|
||||
MockDataSource.setIterator("select * from x", rows.iterator());
|
||||
|
||||
rows = new ArrayList();
|
||||
rows.add(createMap("name_s", "abcd"));
|
||||
MockDataSource.setIterator("3", rows.iterator());
|
||||
|
||||
rows = new ArrayList();
|
||||
rows.add(createMap("name_s", "xyz", "$skipRow", "true"));
|
||||
MockDataSource.setIterator("4", rows.iterator());
|
||||
|
||||
super.runFullImport(dataConfigWithTwoEntities);
|
||||
assertQ(req("id:3"), "//*[@numFound='1']");
|
||||
assertQ(req("id:4"), "//*[@numFound='1']");
|
||||
assertQ(req("name_s:abcd"), "//*[@numFound='1']");
|
||||
assertQ(req("name_s:xyz"), "//*[@numFound='0']");
|
||||
}
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testStopTransform() throws Exception {
|
||||
List rows = new ArrayList();
|
||||
rows.add(createMap("id", "1", "desc", "one"));
|
||||
rows.add(createMap("id", "2", "desc", "two", "$stopTransform", "true"));
|
||||
MockDataSource.setIterator("select * from x", rows.iterator());
|
||||
|
||||
super.runFullImport(dataConfigForSkipTransform);
|
||||
|
||||
assertQ(req("id:1"), "//*[@numFound='1']");
|
||||
assertQ(req("id:2"), "//*[@numFound='1']");
|
||||
assertQ(req("name_s:xyz"), "//*[@numFound='1']");
|
||||
}
|
||||
|
||||
public static class MockTransformer extends Transformer {
|
||||
public Object transformRow(Map<String, Object> row, Context context) {
|
||||
Assert.assertTrue("Context gave incorrect data source", context.getDataSource("mockDs") instanceof MockDataSource2);
|
||||
|
@ -175,6 +239,29 @@ public class TestDocBuilder2 extends AbstractDataImportHandlerTest {
|
|||
" </document>\n" +
|
||||
"</dataConfig>";
|
||||
|
||||
private final String dataConfigForSkipTransform = "<dataConfig>\n" +
|
||||
" <document>\n" +
|
||||
" <entity name=\"books\" query=\"select * from x\"" +
|
||||
" transformer=\"TemplateTransformer\">\n" +
|
||||
" <field column=\"id\" />\n" +
|
||||
" <field column=\"desc\" />\n" +
|
||||
" <field column=\"name_s\" template=\"xyz\" />\n" +
|
||||
" </entity>\n" +
|
||||
" </document>\n" +
|
||||
"</dataConfig>";
|
||||
|
||||
private final String dataConfigWithTwoEntities = "<dataConfig>\n" +
|
||||
" <document>\n" +
|
||||
" <entity name=\"books\" query=\"select * from x\">" +
|
||||
" <field column=\"id\" />\n" +
|
||||
" <field column=\"desc\" />\n" +
|
||||
" <entity name=\"authors\" query=\"${books.id}\">" +
|
||||
" <field column=\"name_s\" />" +
|
||||
" </entity>" +
|
||||
" </entity>\n" +
|
||||
" </document>\n" +
|
||||
"</dataConfig>";
|
||||
|
||||
private final String dataConfigWithCaseInsensitiveFields = "<dataConfig>\n" +
|
||||
" <document onImportStart=\"TestDocBuilder2$StartEventListener\" onImportEnd=\"TestDocBuilder2$EndEventListener\">\n" +
|
||||
" <entity name=\"books\" query=\"select * from x\">\n" +
|
||||
|
|
Loading…
Reference in New Issue