don't skip the reader of the huge linedocs file, skip the inputstream instead

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1336904 13f79535-47bb-0310-9956-ffa450edef68
2012-05-10 21:26:13 +00:00 · 2012-05-10 21:26:13 +00:00 · a41f0ca63b
parent 54a5e34d35
commit a41f0ca63b
1 changed files with 8 additions and 3 deletions
--- a/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java
@ -24,6 +24,9 @@ import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
 import java.util.Random;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.zip.GZIPInputStream;
@ -88,8 +91,6 @@ public class LineFileDocs implements Closeable {
      size *= 2.8;
    }

-    reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), BUFFER_SIZE);
-
    // Override sizes for currently "known" line files:
    if (path.equals("europarl.lines.txt.gz")) {
      size = 15129506L;
@ -103,7 +104,11 @@ public class LineFileDocs implements Closeable {
      if (LuceneTestCase.VERBOSE) {
        System.out.println("TEST: LineFileDocs: seek to fp=" + seekTo + " on open");
      }
-      reader.skip(seekTo);
+      is.skip(seekTo);
+      CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
+          .onMalformedInput(CodingErrorAction.IGNORE)
+          .onUnmappableCharacter(CodingErrorAction.IGNORE);
+      reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE);
      reader.readLine();
    }
  }