diff --git a/contrib/dataimporthandler/CHANGES.txt b/contrib/dataimporthandler/CHANGES.txt index 9f0e04b3499..97eebb73aa1 100644 --- a/contrib/dataimporthandler/CHANGES.txt +++ b/contrib/dataimporthandler/CHANGES.txt @@ -109,6 +109,9 @@ Bug Fixes 14. SOLR-999: XPathRecordReader fails on XMLs with nodes mixed with CDATA content. (Fergus McMenemie, Noble Paul via shalin) +15.SOLR-1000: FileListEntityProcessor should not apply fileName filter to directory names. + (Fergus McMenemie via shalin) + Documentation ---------------------- diff --git a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java index a0874e9c37e..3a72fd85b3b 100644 --- a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java +++ b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java @@ -85,11 +85,10 @@ public class FileListEntityProcessor extends EntityProcessorBase { if (r != null) recursive = Boolean.parseBoolean(r); excludes = context.getEntityAttribute(EXCLUDES); - if (excludes != null) + if (excludes != null) { excludes = resolver.replaceTokens(excludes); - if (excludes != null) excludesPattern = Pattern.compile(excludes); - + } } private Date getDate(String dateStr) { @@ -148,20 +147,23 @@ public class FileListEntityProcessor extends EntityProcessorBase { } } - private void getFolderFiles(File dir, - final List> fileDetails) { + private void getFolderFiles(File dir, final List> fileDetails) { + // Fetch an array of file objects that pass the filter, however the + // returned array is never populated; accept() always returns false. + // Rather we make use of the fileDetails array which is populated as + // a side affect of the accept method. dir.list(new FilenameFilter() { public boolean accept(File dir, String name) { - if (fileNamePattern == null) { + File fileObj = new File(dir, name); + if (fileObj.isDirectory()) { + if (recursive) getFolderFiles(fileObj, fileDetails); + } else if (fileNamePattern == null) { addDetails(fileDetails, dir, name); - return false; - } - if (fileNamePattern.matcher(name).find()) { + } else if (fileNamePattern.matcher(name).find()) { if (excludesPattern != null && excludesPattern.matcher(name).find()) return false; addDetails(fileDetails, dir, name); } - return false; } }); @@ -170,12 +172,7 @@ public class FileListEntityProcessor extends EntityProcessorBase { private void addDetails(List> files, File dir, String name) { Map details = new HashMap(); File aFile = new File(dir, name); - if (aFile.isDirectory()) { - if (!recursive) - return; - getFolderFiles(aFile, files); - return; - } + if (aFile.isDirectory()) return; long sz = aFile.length(); Date lastModified = new Date(aFile.lastModified()); if (biggerThan != -1 && sz <= biggerThan) diff --git a/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java b/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java index 8387fda706c..f2da214ffaa 100644 --- a/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java +++ b/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java @@ -91,7 +91,7 @@ public class TestFileListEntityProcessor { System.out.println("List of files when given OLDER_THAN -- " + fList); Assert.assertEquals(2, fList.size()); attrs = AbstractDataImportHandlerTest.createMap( - FileListEntityProcessor.FILE_NAME, "xml$", + FileListEntityProcessor.FILE_NAME, ".xml$", FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath(), FileListEntityProcessor.NEWER_THAN, "'NOW-2HOURS'"); c = AbstractDataImportHandlerTest.getContext(null, @@ -108,6 +108,39 @@ public class TestFileListEntityProcessor { Assert.assertEquals(2, fList.size()); } + @Test + public void testRECURSION() throws IOException { + long time = System.currentTimeMillis(); + File tmpdir = new File("." + time); + tmpdir.mkdir(); + tmpdir.deleteOnExit(); + File childdir = new File(tmpdir + "/child" ); + childdir.mkdirs(); + childdir.deleteOnExit(); + createFile(childdir, "a.xml", "a.xml".getBytes(), true); + createFile(childdir, "b.xml", "b.xml".getBytes(), true); + createFile(childdir, "c.props", "c.props".getBytes(), true); + Map attrs = AbstractDataImportHandlerTest.createMap( + FileListEntityProcessor.FILE_NAME, "^.*\\.xml$", + FileListEntityProcessor.BASE_DIR, childdir.getAbsolutePath(), + FileListEntityProcessor.RECURSIVE, "true"); + Context c = AbstractDataImportHandlerTest.getContext(null, + new VariableResolverImpl(), null, 0, Collections.EMPTY_LIST, attrs); + FileListEntityProcessor fileListEntityProcessor = new FileListEntityProcessor(); + fileListEntityProcessor.init(c); + List fList = new ArrayList(); + while (true) { + // Add the documents to the index. NextRow() should only + // find two filesnames that match the pattern in fileName + Map f = fileListEntityProcessor.nextRow(); + if (f == null) + break; + fList.add((String) f.get(FileListEntityProcessor.ABSOLUTE_FILE)); + } + System.out.println("List of files indexed -- " + fList); + Assert.assertEquals(2, fList.size()); + } + public static File createFile(File tmpdir, String name, byte[] content, boolean changeModifiedTime) throws IOException { File file = new File(tmpdir.getAbsolutePath() + File.separator + name);