Use the HTMLParser constructor that takes a FileInputStream and make sure it gets closed. This was not the case with the constructor that takes a File.

Thus I deprecated that one. I guess the demo isn't part of the "official" API but there are surely people who use it for more than just testing. PR: 28187 git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150395 13f79535-47bb-0310-9956-ffa450edef68
2004-08-06 19:26:16 +00:00 · 2004-08-06 19:26:16 +00:00 · 35d5406541
parent fecb54ff64
commit 35d5406541
4 changed files with 45 additions and 25 deletions
--- a/src/demo/org/apache/lucene/demo/HTMLDocument.java
+++ b/src/demo/org/apache/lucene/demo/HTMLDocument.java
@ -61,19 +61,26 @@ public class HTMLDocument {
    // tokenized prior to indexing.
    doc.add(new Field("uid", uid(f), false, true, false));

-    HTMLParser parser = new HTMLParser(f);
+    FileInputStream fis = null;
+    try {
+      fis = new FileInputStream(f);
+      HTMLParser parser = new HTMLParser(fis);
+      
+      // Add the tag-stripped contents as a Reader-valued Text field so it will
+      // get tokenized and indexed.
+      doc.add(Field.Text("contents", parser.getReader()));

-    // Add the tag-stripped contents as a Reader-valued Text field so it will
-    // get tokenized and indexed.
-    doc.add(Field.Text("contents", parser.getReader()));
+      // Add the summary as an UnIndexed field, so that it is stored and returned
+      // with hit documents for display.
+      doc.add(Field.UnIndexed("summary", parser.getSummary()));

-    // Add the summary as an UnIndexed field, so that it is stored and returned
-    // with hit documents for display.
-    doc.add(Field.UnIndexed("summary", parser.getSummary()));
-
-    // Add the title as a separate Text field, so that it can be searched
-    // separately.
-    doc.add(Field.Text("title", parser.getTitle()));
+      // Add the title as a separate Text field, so that it can be searched
+      // separately.
+      doc.add(Field.Text("title", parser.getTitle()));
+    } finally {
+      if (fis != null)
+        fis.close();
+    }

    // return the document
    return doc;
--- a/src/demo/org/apache/lucene/demo/html/HTMLParser.java
+++ b/src/demo/org/apache/lucene/demo/html/HTMLParser.java
@ -40,6 +40,9 @@ public class HTMLParser implements HTMLParserConstants {
    }
  }

+  /**
+   * @deprecated Use HTMLParser(FileInputStream) instead
+   */
  public HTMLParser(File file) throws FileNotFoundException {
    this(new FileInputStream(file));
  }
@ -450,18 +453,18 @@ null)
    finally { jj_save(1, xla); }
  }

-  final private boolean jj_3_1() {
-    if (jj_scan_token(ArgQuote1)) return true;
-    if (jj_scan_token(CloseQuote1)) return true;
-    return false;
-  }
-
  final private boolean jj_3_2() {
    if (jj_scan_token(ArgQuote2)) return true;
    if (jj_scan_token(CloseQuote2)) return true;
    return false;
  }

+  final private boolean jj_3_1() {
+    if (jj_scan_token(ArgQuote1)) return true;
+    if (jj_scan_token(CloseQuote1)) return true;
+    return false;
+  }
+
  public HTMLParserTokenManager token_source;
  SimpleCharStream jj_input_stream;
  public Token token, jj_nt;
--- a/src/demo/org/apache/lucene/demo/html/HTMLParser.jj
+++ b/src/demo/org/apache/lucene/demo/html/HTMLParser.jj
@ -104,6 +104,9 @@ public class HTMLParser {
    }
  }

+  /**
+   * @deprecated Use HTMLParser(FileInputStream) instead
+   */
  public HTMLParser(File file) throws FileNotFoundException {
    this(new FileInputStream(file));
  }
--- a/src/demo/org/apache/lucene/demo/html/Test.java
+++ b/src/demo/org/apache/lucene/demo/html/Test.java
@ -19,7 +19,7 @@ package org.apache.lucene.demo.html;
 import java.io.*;

 class Test {
-  public static void main(String[] argv) throws Exception {
+  public static void main(String[] argv) throws IOException, InterruptedException {
    if ("-dir".equals(argv[0])) {
      String[] files = new File(argv[1]).list();
      java.util.Arrays.sort(files);
@ -32,12 +32,19 @@ class Test {
      parse(new File(argv[0]));
  }

-  public static void parse(File file) throws Exception {
-    HTMLParser parser = new HTMLParser(file);
-    System.out.println("Title: " + Entities.encode(parser.getTitle()));
-    System.out.println("Summary: " + Entities.encode(parser.getSummary()));
-    LineNumberReader reader = new LineNumberReader(parser.getReader());
-    for (String l = reader.readLine(); l != null; l = reader.readLine())
-      System.out.println(l);
+  public static void parse(File file) throws IOException, InterruptedException {
+    FileInputStream fis = null;
+    try {
+      fis = new FileInputStream(file);
+      HTMLParser parser = new HTMLParser(fis);
+      System.out.println("Title: " + Entities.encode(parser.getTitle()));
+      System.out.println("Summary: " + Entities.encode(parser.getSummary()));
+      System.out.println("Content:");
+      LineNumberReader reader = new LineNumberReader(parser.getReader());
+      for (String l = reader.readLine(); l != null; l = reader.readLine())
+        System.out.println(l);
+    } finally {
+      if (fis != null) fis.close();
+    }
  }
 }