SOLR-284: Add defaultField capability

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@815293 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2009-09-15 12:31:11 +00:00
parent 91e071d18e
commit d8bf83fe56
4 changed files with 55 additions and 1 deletions

View File

@ -133,4 +133,10 @@ public interface ExtractingParams {
* to setup a dynamic field to automatically capture it
*/
public static final String UNKNOWN_FIELD_PREFIX = "uprefix";
/**
* Optional. If specified and the name of a potential field cannot be determined, the default Field specified
* will be used instead.
*/
public static final String DEFAULT_FIELD = "defaultField";
}

View File

@ -64,7 +64,7 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
private String contentFieldName = "content";
private String unknownFieldPrefix = "";
private String defaultField = "";
public SolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
this(metadata, params, schema, DateUtil.DEFAULT_DATE_FORMATS);
@ -82,6 +82,7 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
this.lowerNames = params.getBool(LOWERNAMES, false);
this.captureAttribs = params.getBool(CAPTURE_ATTRIBUTES, false);
this.unknownFieldPrefix = params.get(UNKNOWN_FIELD_PREFIX, "");
this.defaultField = params.get(DEFAULT_FIELD, "");
String[] captureFields = params.getParams(CAPTURE_ELEMENTS);
if (captureFields != null && captureFields.length > 0) {
fieldBuilders = new HashMap<String, StringBuilder>();
@ -155,6 +156,9 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
if (sf==null && unknownFieldPrefix.length() > 0) {
name = unknownFieldPrefix + name;
sf = schema.getFieldOrNull(name);
} else if (sf == null && defaultField.length() > 0 && name.equals(Metadata.RESOURCE_NAME_KEY) == false /*let the fall through below handle this*/){
name = defaultField;
sf = schema.getFieldOrNull(name);
}
// Arguably we should handle this as a special case. Why? Because unlike basically

View File

@ -134,6 +134,49 @@ public class ExtractingRequestHandlerTest extends AbstractSolrTestCase {
}
public void testDefaultField() throws Exception {
ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");
assertTrue("handler is null and it shouldn't be", handler != null);
try {
loadLocal("simple.html",
"literal.id","simple2",
"lowernames", "true",
"captureAttr", "true",
//"map.content_type", "abcxyz",
"commit", "true" // test immediate commit
);
assertTrue(false);
} catch (SolrException e) {
//do nothing
}
loadLocal("simple.html",
"literal.id","simple2",
ExtractingParams.DEFAULT_FIELD, "defaultExtr",//test that unmapped fields go to the text field when no uprefix is specified
"lowernames", "true",
"captureAttr", "true",
//"map.content_type", "abcxyz",
"commit", "true" // test immediate commit
);
assertQ(req("id:simple2"), "//*[@numFound='1']");
assertQ(req("defaultExtr:http\\://www.apache.org"), "//*[@numFound='1']");
//Test when both uprefix and default are specified.
loadLocal("simple.html",
"literal.id","simple2",
ExtractingParams.DEFAULT_FIELD, "defaultExtr",//test that unmapped fields go to the text field when no uprefix is specified
ExtractingParams.UNKNOWN_FIELD_PREFIX, "t_",
"lowernames", "true",
"captureAttr", "true",
"map.a","t_href",
//"map.content_type", "abcxyz",
"commit", "true" // test immediate commit
);
assertQ(req("+id:simple2 +t_href:[* TO *]"), "//*[@numFound='1']");
}
public void testLiterals() throws Exception {
ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");

View File

@ -405,6 +405,7 @@
<field name="extractionLiteralMV" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="extractionLiteral" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="defaultExtr" type="string" indexed="true" stored="false" />
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns.