SOLR-7546: bin/post (and SimplePostTool in -Dauto=yes mode) now sends rather than skips files without a known content type, as application/octet-stream, provided it still is in the allowed filetypes setting

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1680047 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Erik Hatcher 2015-05-18 15:51:44 +00:00
parent 9e4a25adf7
commit 64274166b8
3 changed files with 19 additions and 19 deletions

View File

@ -195,6 +195,9 @@ New Features
Example: json.facet={ numProducts : "hll(product_id)" }
(yonik)
* SOLR-7546: bin/post (and SimplePostTool in -Dauto=yes mode) now sends rather than skips files
without a known content type, as "application/octet-stream", provided it still is in the
allowed filetypes setting. (ehatcher)
Bug Fixes
----------------------

View File

@ -360,8 +360,6 @@ public class SimplePostTool {
}
private void reset() {
fileTypes = DEFAULT_FILE_TYPES;
globFileFilter = this.getFileFilterFromFileTypes(fileTypes);
backlog = new ArrayList<>();
visited = new HashSet<>();
}
@ -774,22 +772,19 @@ public class SimplePostTool {
if(type == null) {
type = guessType(file);
}
if(type != null) {
if(type.equals("application/xml") || type.equals("text/csv") || type.equals("application/json")) {
// Default handler
} else {
// SolrCell
suffix = "/extract";
String urlStr = appendUrlPath(solrUrl, suffix).toString();
if(urlStr.indexOf("resource.name")==-1)
urlStr = appendParam(urlStr, "resource.name=" + URLEncoder.encode(file.getAbsolutePath(), "UTF-8"));
if(urlStr.indexOf("literal.id")==-1)
urlStr = appendParam(urlStr, "literal.id=" + URLEncoder.encode(file.getAbsolutePath(), "UTF-8"));
url = new URL(urlStr);
}
// TODO: Add a flag that disables /update and sends all to /update/extract, to avoid CSV, JSON, and XML files
// TODO: from being interpreted as Solr documents internally
if(type.equals("application/xml") || type.equals("text/csv") || type.equals("application/json")) {
// Default handler
} else {
warn("Skipping "+file.getName()+". Unsupported file type for auto mode.");
return;
// SolrCell
suffix = "/extract";
String urlStr = appendUrlPath(solrUrl, suffix).toString();
if(urlStr.indexOf("resource.name")==-1)
urlStr = appendParam(urlStr, "resource.name=" + URLEncoder.encode(file.getAbsolutePath(), "UTF-8"));
if(urlStr.indexOf("literal.id")==-1)
urlStr = appendParam(urlStr, "literal.id=" + URLEncoder.encode(file.getAbsolutePath(), "UTF-8"));
url = new URL(urlStr);
}
} else {
if(type == null) type = DEFAULT_CONTENT_TYPE;
@ -821,13 +816,15 @@ public class SimplePostTool {
/**
* Guesses the type of a file, based on file name suffix
* Returns "application/octet-stream" if no corresponding mimeMap type.
* @param file the file
* @return the content-type guessed
*/
protected static String guessType(File file) {
String name = file.getName();
String suffix = name.substring(name.lastIndexOf(".")+1);
return mimeMap.get(suffix.toLowerCase(Locale.ROOT));
String type = mimeMap.get(suffix.toLowerCase(Locale.ROOT));
return (type != null) ? type : "application/octet-stream";
}
/**

View File

@ -145,7 +145,7 @@ public class SimplePostToolTest extends SolrTestCaseJ4 {
File f = new File("foo.doc");
assertEquals("application/msword", SimplePostTool.guessType(f));
f = new File("foobar");
assertEquals(null, SimplePostTool.guessType(f));
assertEquals("application/octet-stream", SimplePostTool.guessType(f));
}
@Test