diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 41e0111ff3d..b036fe1b587 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -160,6 +160,9 @@ New Features
& json.facet={ colors:{type:terms, field:color, excludeTags=COLOR} }
(yonik)
+* SOLR-7231: DIH-TikaEntityprocessor, create lat-lon field from Metadata
+ (Tim Allison via Noble Paul)
+
Bug Fixes
----------------------
diff --git a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java b/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
index fc16113b2c7..fb8cdb81287 100644
--- a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
+++ b/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
@@ -45,6 +45,7 @@ import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import java.util.HashMap;
+import java.util.Locale;
import java.util.Map;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
@@ -55,6 +56,10 @@ import static org.apache.solr.handler.dataimport.XPathEntityProcessor.URL;
*
An implementation of {@link EntityProcessor} which reads data from rich docs
* using Apache Tika
*
+ *
To index latitude/longitude data that might
+ * be extracted from a file's metadata, identify
+ * the geo field for this information with this attribute:
+ * spatialMetadataField
*
* @since solr 3.1
*/
@@ -67,6 +72,7 @@ public class TikaEntityProcessor extends EntityProcessorBase {
private String parser;
static final String AUTO_PARSER = "org.apache.tika.parser.AutoDetectParser";
private String htmlMapper;
+ private String spatialMetadataField;
@Override
public void init(Context context) {
@@ -113,6 +119,8 @@ public class TikaEntityProcessor extends EntityProcessorBase {
if(parser == null) {
parser = AUTO_PARSER;
}
+
+ spatialMetadataField = context.getResolvedEntityAttribute("spatialMetadataField");
}
@Override
@@ -167,10 +175,20 @@ public class TikaEntityProcessor extends EntityProcessorBase {
if (s != null) row.put(col, s);
}
if(!"none".equals(format) ) row.put("text", sw.toString());
+ tryToAddLatLon(metadata, row);
done = true;
return row;
}
+ private void tryToAddLatLon(Metadata metadata, Map row) {
+ if (spatialMetadataField == null) return;
+ String latString = metadata.get(Metadata.LATITUDE);
+ String lonString = metadata.get(Metadata.LONGITUDE);
+ if (latString != null && lonString != null) {
+ row.put(spatialMetadataField, String.format(Locale.ROOT, "%s,%s", latString, lonString));
+ }
+ }
+
private static ContentHandler getHtmlHandler(Writer writer)
throws TransformerConfigurationException {
SAXTransformerFactory factory = (SAXTransformerFactory)
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml
index 1ebb9d9d953..2d9fa64d618 100644
--- a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml
+++ b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml
@@ -77,6 +77,8 @@
+
+
text
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_jpeg.jpg b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_jpeg.jpg
new file mode 100644
index 00000000000..10d1ebb2d32
Binary files /dev/null and b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_jpeg.jpg differ
diff --git a/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java b/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
index 3d4b1ab2d86..fe769bc8c87 100644
--- a/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
+++ b/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
@@ -51,6 +51,18 @@ public class TestTikaEntityProcessor extends AbstractDataImportHandlerTestCase {
" " +
"";
+ private String spatialConf =
+ "" +
+ " " +
+ " " +
+ " " +
+ " " +
+ " " +
+ " " +
+ "";
+
+
private String[] tests = {
"//*[@numFound='1']"
,"//str[@name='author'][.='Grant Ingersoll']"
@@ -74,6 +86,10 @@ public class TestTikaEntityProcessor extends AbstractDataImportHandlerTestCase {
, "//str[@name='text'][contains(.,'class=\"classAttribute\"')]" //attributes are lower-cased
};
+ private String[] testsSpatial = {
+ "//*[@numFound='1']"
+ };
+
private String[] testsEmbedded = {
"//*[@numFound='1']",
"//str[@name='text'][contains(.,'When in the Course')]"
@@ -121,6 +137,16 @@ public class TestTikaEntityProcessor extends AbstractDataImportHandlerTestCase {
assertQ(req("*:*"), testsHTMLIdentity);
}
+ @Test
+ public void testTikaGeoMetadata() throws Exception {
+ runFullImport(spatialConf);
+ String pt = "38.97,-77.018";
+ Double distance = 5.0d;
+ assertQ(req("q", "*:* OR foo_i:" + random().nextInt(100), "fq",
+ "{!geofilt sfield=\"home\"}\"",
+ "pt", pt, "d", String.valueOf(distance)), testsSpatial);
+ }
+
private String getConfigHTML(String htmlMapper) {
return
"" +