diff --git a/src/demo/org/apache/lucene/demo/html/HTMLParser.java b/src/demo/org/apache/lucene/demo/html/HTMLParser.java index 5179476b60e..154c6a7db35 100644 --- a/src/demo/org/apache/lucene/demo/html/HTMLParser.java +++ b/src/demo/org/apache/lucene/demo/html/HTMLParser.java @@ -10,7 +10,8 @@ public class HTMLParser implements HTMLParserConstants { StringBuffer title = new StringBuffer(SUMMARY_LENGTH); StringBuffer summary = new StringBuffer(SUMMARY_LENGTH * 2); Properties metaTags=new Properties(); - String currentMetaTag=""; + String currentMetaTag=null; + String currentMetaContent=null; int length = 0; boolean titleComplete = false; boolean inTitle = false; @@ -120,11 +121,6 @@ InterruptedException { void addText(String text) throws IOException { if (inStyle) return; - if (inMetaTag) - { - metaTags.setProperty(currentMetaTag, text); - return; - } if (inTitle) title.append(text); else { @@ -143,6 +139,13 @@ InterruptedException { afterSpace = false; } + void addMetaTag() throws IOException { + metaTags.setProperty(currentMetaTag, currentMetaContent); + currentMetaTag = null; + currentMetaContent = null; + return; + } + void addSpace() throws IOException { if (!afterSpace) { if (inTitle) @@ -262,11 +265,17 @@ InterruptedException { && t2 != null) { currentMetaTag=t2.image.toLowerCase(); + if(currentMetaTag != null && currentMetaContent != null) { + addMetaTag(); + } } if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 != null) { - addText(t2.image); + currentMetaContent=t2.image.toLowerCase(); + if(currentMetaTag != null && currentMetaContent != null) { + addMetaTag(); + } } break; default: @@ -441,18 +450,18 @@ null) finally { jj_save(1, xla); } } - final private boolean jj_3_2() { - if (jj_scan_token(ArgQuote2)) return true; - if (jj_scan_token(CloseQuote2)) return true; - return false; - } - final private boolean jj_3_1() { if (jj_scan_token(ArgQuote1)) return true; if (jj_scan_token(CloseQuote1)) return true; return false; } + final private boolean jj_3_2() { + if (jj_scan_token(ArgQuote2)) return true; + if (jj_scan_token(CloseQuote2)) return true; + return false; + } + public HTMLParserTokenManager token_source; SimpleCharStream jj_input_stream; public Token token, jj_nt; diff --git a/src/demo/org/apache/lucene/demo/html/HTMLParser.jj b/src/demo/org/apache/lucene/demo/html/HTMLParser.jj index 5dd81eb4f08..b394599f75a 100644 --- a/src/demo/org/apache/lucene/demo/html/HTMLParser.jj +++ b/src/demo/org/apache/lucene/demo/html/HTMLParser.jj @@ -74,7 +74,8 @@ public class HTMLParser { StringBuffer title = new StringBuffer(SUMMARY_LENGTH); StringBuffer summary = new StringBuffer(SUMMARY_LENGTH * 2); Properties metaTags=new Properties(); - String currentMetaTag=""; + String currentMetaTag=null; + String currentMetaContent=null; int length = 0; boolean titleComplete = false; boolean inTitle = false; @@ -184,11 +185,6 @@ InterruptedException { void addText(String text) throws IOException { if (inStyle) return; - if (inMetaTag) - { - metaTags.setProperty(currentMetaTag, text); - return; - } if (inTitle) title.append(text); else { @@ -206,6 +202,13 @@ InterruptedException { afterSpace = false; } + + void addMetaTag() throws IOException { + metaTags.setProperty(currentMetaTag, currentMetaContent); + currentMetaTag = null; + currentMetaContent = null; + return; + } void addSpace() throws IOException { if (!afterSpace) { @@ -284,11 +287,17 @@ void Tag() throws IOException : && t2 != null) { currentMetaTag=t2.image.toLowerCase(); + if(currentMetaTag != null && currentMetaContent != null) { + addMetaTag(); + } } if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 != null) { - addText(t2.image); + currentMetaContent=t2.image.toLowerCase(); + if(currentMetaTag != null && currentMetaContent != null) { + addMetaTag(); + } } } )?