SOLR-1865: ignore BOMs in SolrResourceLoader.getLines

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@942288 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-05-08 00:42:04 +00:00
parent 399d373089
commit f3c25f02d5
4 changed files with 38 additions and 1 deletions

View File

@ -348,6 +348,10 @@ Other Changes
* SOLR-1851: luceneAutoCommit no longer has any effect - it has been remove (Mark Miller) * SOLR-1851: luceneAutoCommit no longer has any effect - it has been remove (Mark Miller)
* SOLR-1865: SolrResourceLoader.getLines ignores Byte Order Markers (BOMs) at the
beginning of input files, these are often created by editors such as Windows
Notepad. (rmuir, hossman)
Build Build
---------------------- ----------------------

View File

@ -309,6 +309,9 @@ public class SolrResourceLoader implements ResourceLoader
lines = new ArrayList<String>(); lines = new ArrayList<String>();
for (String word=null; (word=input.readLine())!=null;) { for (String word=null; (word=input.readLine())!=null;) {
// skip initial bom marker
if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF')
word = word.substring(1);
// skip comments // skip comments
if (word.startsWith("#")) continue; if (word.startsWith("#")) continue;
word=word.trim(); word=word.trim();

View File

@ -30,6 +30,9 @@ import org.apache.solr.util.plugin.ResourceLoaderAware;
import org.apache.solr.util.plugin.SolrCoreAware; import org.apache.solr.util.plugin.SolrCoreAware;
import java.io.File; import java.io.File;
import java.io.InputStream;
import java.util.Arrays;
import java.util.List;
public class ResourceLoaderTest extends TestCase public class ResourceLoaderTest extends TestCase
{ {
@ -88,4 +91,30 @@ public class ResourceLoaderTest extends TestCase
catch( SolrException ex ) { } // OK catch( SolrException ex ) { } // OK
} }
} }
public void testBOMMarkers() throws Exception {
final String fileWithBom = "stopwithbom.txt";
SolrResourceLoader loader = new SolrResourceLoader(null);
// preliminary sanity check
InputStream bomStream = loader.openResource(fileWithBom);
try {
final byte[] bomExpected = new byte[] { -17, -69, -65 };
final byte[] firstBytes = new byte[3];
assertEquals("Should have been able to read 3 bytes from bomStream",
3, bomStream.read(firstBytes));
assertTrue("This test only works if " + fileWithBom +
" contains a BOM -- it appears someone removed it.",
Arrays.equals(bomExpected, firstBytes));
} finally {
try { bomStream.close(); } catch (Exception e) { /* IGNORE */ }
}
// now make sure getLines skips the BOM...
List<String> lines = loader.getLines(fileWithBom);
assertEquals(1, lines.size());
assertEquals("BOMsAreEvil", lines.get(0));
}
} }

View File

@ -0,0 +1 @@
BOMsAreEvil