mirror of https://github.com/apache/lucene.git
SOLR-2511: slight refactoring to make it easier to override
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1102718 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c678de49ee
commit
ed573a067b
|
@ -47,23 +47,23 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
public class SolrContentHandler extends DefaultHandler implements ExtractingParams {
|
public class SolrContentHandler extends DefaultHandler implements ExtractingParams {
|
||||||
private transient static Logger log = LoggerFactory.getLogger(SolrContentHandler.class);
|
private transient static Logger log = LoggerFactory.getLogger(SolrContentHandler.class);
|
||||||
private SolrInputDocument document;
|
protected SolrInputDocument document;
|
||||||
|
|
||||||
private Collection<String> dateFormats = DateUtil.DEFAULT_DATE_FORMATS;
|
protected Collection<String> dateFormats = DateUtil.DEFAULT_DATE_FORMATS;
|
||||||
|
|
||||||
private Metadata metadata;
|
protected Metadata metadata;
|
||||||
private SolrParams params;
|
protected SolrParams params;
|
||||||
private StringBuilder catchAllBuilder = new StringBuilder(2048);
|
protected StringBuilder catchAllBuilder = new StringBuilder(2048);
|
||||||
private IndexSchema schema;
|
protected IndexSchema schema;
|
||||||
private Map<String, StringBuilder> fieldBuilders = Collections.emptyMap();
|
protected Map<String, StringBuilder> fieldBuilders = Collections.emptyMap();
|
||||||
private LinkedList<StringBuilder> bldrStack = new LinkedList<StringBuilder>();
|
private LinkedList<StringBuilder> bldrStack = new LinkedList<StringBuilder>();
|
||||||
|
|
||||||
private boolean captureAttribs;
|
protected boolean captureAttribs;
|
||||||
private boolean lowerNames;
|
protected boolean lowerNames;
|
||||||
private String contentFieldName = "content";
|
protected String contentFieldName = "content";
|
||||||
|
|
||||||
private String unknownFieldPrefix = "";
|
protected String unknownFieldPrefix = "";
|
||||||
private String defaultField = "";
|
protected String defaultField = "";
|
||||||
|
|
||||||
public SolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
|
public SolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
|
||||||
this(metadata, params, schema, DateUtil.DEFAULT_DATE_FORMATS);
|
this(metadata, params, schema, DateUtil.DEFAULT_DATE_FORMATS);
|
||||||
|
@ -99,16 +99,57 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
|
||||||
* The base implementation adds the metadata as fields, allowing for potential remapping.
|
* The base implementation adds the metadata as fields, allowing for potential remapping.
|
||||||
*
|
*
|
||||||
* @return The {@link org.apache.solr.common.SolrInputDocument}.
|
* @return The {@link org.apache.solr.common.SolrInputDocument}.
|
||||||
|
*
|
||||||
|
* @see #addMetadata()
|
||||||
|
* @see #addCapturedContent()
|
||||||
|
* @see #addContent()
|
||||||
|
* @see #addLiterals()
|
||||||
*/
|
*/
|
||||||
public SolrInputDocument newDocument() {
|
public SolrInputDocument newDocument() {
|
||||||
float boost = 1.0f;
|
float boost = 1.0f;
|
||||||
//handle the metadata extracted from the document
|
//handle the metadata extracted from the document
|
||||||
for (String name : metadata.names()) {
|
addMetadata();
|
||||||
String[] vals = metadata.getValues(name);
|
|
||||||
addField(name, null, vals);
|
|
||||||
}
|
|
||||||
|
|
||||||
//handle the literals from the params
|
//handle the literals from the params
|
||||||
|
addLiterals();
|
||||||
|
|
||||||
|
|
||||||
|
//add in the content
|
||||||
|
addContent();
|
||||||
|
|
||||||
|
//add in the captured content
|
||||||
|
addCapturedContent();
|
||||||
|
|
||||||
|
if (log.isDebugEnabled()) {
|
||||||
|
log.debug("Doc: {}", document);
|
||||||
|
}
|
||||||
|
return document;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add the per field captured content to the Solr Document. Default implementation uses the
|
||||||
|
* {@link #fieldBuilders} info
|
||||||
|
*/
|
||||||
|
protected void addCapturedContent() {
|
||||||
|
for (Map.Entry<String, StringBuilder> entry : fieldBuilders.entrySet()) {
|
||||||
|
if (entry.getValue().length() > 0) {
|
||||||
|
addField(entry.getKey(), entry.getValue().toString(), null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add in the catch all content to the field. Default impl. uses the {@link #contentFieldName}
|
||||||
|
* and the {@link #catchAllBuilder}
|
||||||
|
*/
|
||||||
|
protected void addContent() {
|
||||||
|
addField(contentFieldName, catchAllBuilder.toString(), null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add in the literals to the document using the {@link #params} and the {@link #LITERALS_PREFIX}.
|
||||||
|
*/
|
||||||
|
protected void addLiterals() {
|
||||||
Iterator<String> paramNames = params.getParameterNamesIterator();
|
Iterator<String> paramNames = params.getParameterNamesIterator();
|
||||||
while (paramNames.hasNext()) {
|
while (paramNames.hasNext()) {
|
||||||
String pname = paramNames.next();
|
String pname = paramNames.next();
|
||||||
|
@ -117,28 +158,23 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
|
||||||
String name = pname.substring(LITERALS_PREFIX.length());
|
String name = pname.substring(LITERALS_PREFIX.length());
|
||||||
addField(name, null, params.getParams(pname));
|
addField(name, null, params.getParams(pname));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//add in the content
|
|
||||||
addField(contentFieldName, catchAllBuilder.toString(), null);
|
|
||||||
|
|
||||||
//add in the captured content
|
|
||||||
for (Map.Entry<String, StringBuilder> entry : fieldBuilders.entrySet()) {
|
|
||||||
if (entry.getValue().length() > 0) {
|
|
||||||
addField(entry.getKey(), entry.getValue().toString(), null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add in any metadata using {@link #metadata} as the source.
|
||||||
|
*/
|
||||||
|
protected void addMetadata() {
|
||||||
|
for (String name : metadata.names()) {
|
||||||
|
String[] vals = metadata.getValues(name);
|
||||||
|
addField(name, null, vals);
|
||||||
}
|
}
|
||||||
if (log.isDebugEnabled()) {
|
|
||||||
log.debug("Doc: " + document);
|
|
||||||
}
|
|
||||||
return document;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Naming rules:
|
// Naming rules:
|
||||||
// 1) optionally map names to nicenames (lowercase+underscores)
|
// 1) optionally map names to nicenames (lowercase+underscores)
|
||||||
// 2) execute "map" commands
|
// 2) execute "map" commands
|
||||||
// 3) if resulting field is unknown, map it to a common prefix
|
// 3) if resulting field is unknown, map it to a common prefix
|
||||||
private void addField(String fname, String fval, String[] vals) {
|
protected void addField(String fname, String fval, String[] vals) {
|
||||||
if (lowerNames) {
|
if (lowerNames) {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for (int i=0; i<fname.length(); i++) {
|
for (int i=0; i<fname.length(); i++) {
|
||||||
|
|
Loading…
Reference in New Issue