mirror of https://github.com/apache/lucene.git
SOLR-1865: ignore BOMs in SolrResourceLoader.getLines
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@942288 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
399d373089
commit
f3c25f02d5
|
@ -348,6 +348,10 @@ Other Changes
|
||||||
|
|
||||||
* SOLR-1851: luceneAutoCommit no longer has any effect - it has been remove (Mark Miller)
|
* SOLR-1851: luceneAutoCommit no longer has any effect - it has been remove (Mark Miller)
|
||||||
|
|
||||||
|
* SOLR-1865: SolrResourceLoader.getLines ignores Byte Order Markers (BOMs) at the
|
||||||
|
beginning of input files, these are often created by editors such as Windows
|
||||||
|
Notepad. (rmuir, hossman)
|
||||||
|
|
||||||
|
|
||||||
Build
|
Build
|
||||||
----------------------
|
----------------------
|
||||||
|
|
|
@ -309,6 +309,9 @@ public class SolrResourceLoader implements ResourceLoader
|
||||||
|
|
||||||
lines = new ArrayList<String>();
|
lines = new ArrayList<String>();
|
||||||
for (String word=null; (word=input.readLine())!=null;) {
|
for (String word=null; (word=input.readLine())!=null;) {
|
||||||
|
// skip initial bom marker
|
||||||
|
if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF')
|
||||||
|
word = word.substring(1);
|
||||||
// skip comments
|
// skip comments
|
||||||
if (word.startsWith("#")) continue;
|
if (word.startsWith("#")) continue;
|
||||||
word=word.trim();
|
word=word.trim();
|
||||||
|
|
|
@ -30,6 +30,9 @@ import org.apache.solr.util.plugin.ResourceLoaderAware;
|
||||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class ResourceLoaderTest extends TestCase
|
public class ResourceLoaderTest extends TestCase
|
||||||
{
|
{
|
||||||
|
@ -88,4 +91,30 @@ public class ResourceLoaderTest extends TestCase
|
||||||
catch( SolrException ex ) { } // OK
|
catch( SolrException ex ) { } // OK
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testBOMMarkers() throws Exception {
|
||||||
|
final String fileWithBom = "stopwithbom.txt";
|
||||||
|
SolrResourceLoader loader = new SolrResourceLoader(null);
|
||||||
|
|
||||||
|
// preliminary sanity check
|
||||||
|
InputStream bomStream = loader.openResource(fileWithBom);
|
||||||
|
try {
|
||||||
|
final byte[] bomExpected = new byte[] { -17, -69, -65 };
|
||||||
|
final byte[] firstBytes = new byte[3];
|
||||||
|
|
||||||
|
assertEquals("Should have been able to read 3 bytes from bomStream",
|
||||||
|
3, bomStream.read(firstBytes));
|
||||||
|
|
||||||
|
assertTrue("This test only works if " + fileWithBom +
|
||||||
|
" contains a BOM -- it appears someone removed it.",
|
||||||
|
Arrays.equals(bomExpected, firstBytes));
|
||||||
|
} finally {
|
||||||
|
try { bomStream.close(); } catch (Exception e) { /* IGNORE */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
// now make sure getLines skips the BOM...
|
||||||
|
List<String> lines = loader.getLines(fileWithBom);
|
||||||
|
assertEquals(1, lines.size());
|
||||||
|
assertEquals("BOMsAreEvil", lines.get(0));
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -0,0 +1 @@
|
||||||
|
BOMsAreEvil
|
Loading…
Reference in New Issue