Use the HTMLParser constructor that takes a FileInputStream and make sure it gets closed. This was not the case with the constructor that takes a File.

Thus I deprecated that one. I guess the demo isn't part of the "official" API but there are surely people who use it for more than just testing.
PR: 28187


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150395 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Daniel Naber 2004-08-06 19:26:16 +00:00
parent fecb54ff64
commit 35d5406541
4 changed files with 45 additions and 25 deletions

View File

@ -61,19 +61,26 @@ public class HTMLDocument {
// tokenized prior to indexing. // tokenized prior to indexing.
doc.add(new Field("uid", uid(f), false, true, false)); doc.add(new Field("uid", uid(f), false, true, false));
HTMLParser parser = new HTMLParser(f); FileInputStream fis = null;
try {
fis = new FileInputStream(f);
HTMLParser parser = new HTMLParser(fis);
// Add the tag-stripped contents as a Reader-valued Text field so it will
// get tokenized and indexed.
doc.add(Field.Text("contents", parser.getReader()));
// Add the tag-stripped contents as a Reader-valued Text field so it will // Add the summary as an UnIndexed field, so that it is stored and returned
// get tokenized and indexed. // with hit documents for display.
doc.add(Field.Text("contents", parser.getReader())); doc.add(Field.UnIndexed("summary", parser.getSummary()));
// Add the summary as an UnIndexed field, so that it is stored and returned // Add the title as a separate Text field, so that it can be searched
// with hit documents for display. // separately.
doc.add(Field.UnIndexed("summary", parser.getSummary())); doc.add(Field.Text("title", parser.getTitle()));
} finally {
// Add the title as a separate Text field, so that it can be searched if (fis != null)
// separately. fis.close();
doc.add(Field.Text("title", parser.getTitle())); }
// return the document // return the document
return doc; return doc;

View File

@ -40,6 +40,9 @@ public class HTMLParser implements HTMLParserConstants {
} }
} }
/**
* @deprecated Use HTMLParser(FileInputStream) instead
*/
public HTMLParser(File file) throws FileNotFoundException { public HTMLParser(File file) throws FileNotFoundException {
this(new FileInputStream(file)); this(new FileInputStream(file));
} }
@ -450,18 +453,18 @@ null)
finally { jj_save(1, xla); } finally { jj_save(1, xla); }
} }
final private boolean jj_3_1() {
if (jj_scan_token(ArgQuote1)) return true;
if (jj_scan_token(CloseQuote1)) return true;
return false;
}
final private boolean jj_3_2() { final private boolean jj_3_2() {
if (jj_scan_token(ArgQuote2)) return true; if (jj_scan_token(ArgQuote2)) return true;
if (jj_scan_token(CloseQuote2)) return true; if (jj_scan_token(CloseQuote2)) return true;
return false; return false;
} }
final private boolean jj_3_1() {
if (jj_scan_token(ArgQuote1)) return true;
if (jj_scan_token(CloseQuote1)) return true;
return false;
}
public HTMLParserTokenManager token_source; public HTMLParserTokenManager token_source;
SimpleCharStream jj_input_stream; SimpleCharStream jj_input_stream;
public Token token, jj_nt; public Token token, jj_nt;

View File

@ -104,6 +104,9 @@ public class HTMLParser {
} }
} }
/**
* @deprecated Use HTMLParser(FileInputStream) instead
*/
public HTMLParser(File file) throws FileNotFoundException { public HTMLParser(File file) throws FileNotFoundException {
this(new FileInputStream(file)); this(new FileInputStream(file));
} }

View File

@ -19,7 +19,7 @@ package org.apache.lucene.demo.html;
import java.io.*; import java.io.*;
class Test { class Test {
public static void main(String[] argv) throws Exception { public static void main(String[] argv) throws IOException, InterruptedException {
if ("-dir".equals(argv[0])) { if ("-dir".equals(argv[0])) {
String[] files = new File(argv[1]).list(); String[] files = new File(argv[1]).list();
java.util.Arrays.sort(files); java.util.Arrays.sort(files);
@ -32,12 +32,19 @@ class Test {
parse(new File(argv[0])); parse(new File(argv[0]));
} }
public static void parse(File file) throws Exception { public static void parse(File file) throws IOException, InterruptedException {
HTMLParser parser = new HTMLParser(file); FileInputStream fis = null;
System.out.println("Title: " + Entities.encode(parser.getTitle())); try {
System.out.println("Summary: " + Entities.encode(parser.getSummary())); fis = new FileInputStream(file);
LineNumberReader reader = new LineNumberReader(parser.getReader()); HTMLParser parser = new HTMLParser(fis);
for (String l = reader.readLine(); l != null; l = reader.readLine()) System.out.println("Title: " + Entities.encode(parser.getTitle()));
System.out.println(l); System.out.println("Summary: " + Entities.encode(parser.getSummary()));
System.out.println("Content:");
LineNumberReader reader = new LineNumberReader(parser.getReader());
for (String l = reader.readLine(); l != null; l = reader.readLine())
System.out.println(l);
} finally {
if (fis != null) fis.close();
}
} }
} }