mirror of https://github.com/apache/lucene.git
SOLR-1865: ignore BOMs in SolrResourceLoader.getLines
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@942288 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
399d373089
commit
f3c25f02d5
|
@ -347,6 +347,10 @@ Other Changes
|
|||
(Chris Male via rmuir)
|
||||
|
||||
* SOLR-1851: luceneAutoCommit no longer has any effect - it has been remove (Mark Miller)
|
||||
|
||||
* SOLR-1865: SolrResourceLoader.getLines ignores Byte Order Markers (BOMs) at the
|
||||
beginning of input files, these are often created by editors such as Windows
|
||||
Notepad. (rmuir, hossman)
|
||||
|
||||
|
||||
Build
|
||||
|
|
|
@ -309,6 +309,9 @@ public class SolrResourceLoader implements ResourceLoader
|
|||
|
||||
lines = new ArrayList<String>();
|
||||
for (String word=null; (word=input.readLine())!=null;) {
|
||||
// skip initial bom marker
|
||||
if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF')
|
||||
word = word.substring(1);
|
||||
// skip comments
|
||||
if (word.startsWith("#")) continue;
|
||||
word=word.trim();
|
||||
|
|
|
@ -30,6 +30,9 @@ import org.apache.solr.util.plugin.ResourceLoaderAware;
|
|||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class ResourceLoaderTest extends TestCase
|
||||
{
|
||||
|
@ -88,4 +91,30 @@ public class ResourceLoaderTest extends TestCase
|
|||
catch( SolrException ex ) { } // OK
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testBOMMarkers() throws Exception {
|
||||
final String fileWithBom = "stopwithbom.txt";
|
||||
SolrResourceLoader loader = new SolrResourceLoader(null);
|
||||
|
||||
// preliminary sanity check
|
||||
InputStream bomStream = loader.openResource(fileWithBom);
|
||||
try {
|
||||
final byte[] bomExpected = new byte[] { -17, -69, -65 };
|
||||
final byte[] firstBytes = new byte[3];
|
||||
|
||||
assertEquals("Should have been able to read 3 bytes from bomStream",
|
||||
3, bomStream.read(firstBytes));
|
||||
|
||||
assertTrue("This test only works if " + fileWithBom +
|
||||
" contains a BOM -- it appears someone removed it.",
|
||||
Arrays.equals(bomExpected, firstBytes));
|
||||
} finally {
|
||||
try { bomStream.close(); } catch (Exception e) { /* IGNORE */ }
|
||||
}
|
||||
|
||||
// now make sure getLines skips the BOM...
|
||||
List<String> lines = loader.getLines(fileWithBom);
|
||||
assertEquals(1, lines.size());
|
||||
assertEquals("BOMsAreEvil", lines.get(0));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
BOMsAreEvil
|
Loading…
Reference in New Issue