mirror of https://github.com/apache/lucene.git
LUCENE-591: index meta keywords in contrib/demo
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1031474 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c54ea4da67
commit
fe82745769
|
@ -143,6 +143,9 @@ Bug fixes
|
|||
|
||||
* LUCENE-590: Demo HTML parser gives incorrect summaries when title is repeated as a heading
|
||||
(Curtis d'Entremont via Robert Muir)
|
||||
|
||||
* LUCENE-591: The demo indexer now indexes meta keywords.
|
||||
(Curtis d'Entremont via Robert Muir)
|
||||
|
||||
API Changes
|
||||
|
||||
|
|
|
@ -70,6 +70,11 @@ public class HTMLDocument {
|
|||
// Add the tag-stripped contents as a Reader-valued Text field so it will
|
||||
// get tokenized and indexed.
|
||||
doc.add(new Field("contents", parser.getReader()));
|
||||
|
||||
// add any document keywords if they exist
|
||||
String keywords = parser.getMetaTags().getProperty("keywords");
|
||||
if (keywords != null)
|
||||
doc.add(new Field("contents", keywords, Field.Store.NO, Field.Index.ANALYZED));
|
||||
|
||||
// Add the summary as a field that is stored and returned with
|
||||
// hit documents for display.
|
||||
|
|
|
@ -43,4 +43,24 @@ public class TestDemo extends LuceneTestCase {
|
|||
System.setOut(outSave);
|
||||
}
|
||||
}
|
||||
|
||||
// LUCENE-591
|
||||
public void testIndexKeywords() throws Exception {
|
||||
File dir = getDataFile("test-files/html");
|
||||
File indexDir = new File(TEMP_DIR, "demoIndex2");
|
||||
IndexHTML.main(new String[] { "-create", "-index", indexDir.getPath(), dir.getPath() });
|
||||
File queries = getDataFile("test-files/queries2.txt");
|
||||
PrintStream outSave = System.out;
|
||||
try {
|
||||
ByteArrayOutputStream bytes = new ByteArrayOutputStream();
|
||||
PrintStream fakeSystemOut = new PrintStream(bytes);
|
||||
System.setOut(fakeSystemOut);
|
||||
SearchFiles.main(new String[] { "-index", indexDir.getPath(), "-queries", queries.getPath()});
|
||||
fakeSystemOut.flush();
|
||||
String output = bytes.toString(); // intentionally use default encoding
|
||||
assertTrue(output.contains("1 total matching documents"));
|
||||
} finally {
|
||||
System.setOut(outSave);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8" />
|
||||
<meta name="keywords" content="dogs,fish" />
|
||||
</head>
|
||||
<body>
|
||||
This document is actually not about cats!
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1 @@
|
|||
+contents:dogs +contents:fish
|
Loading…
Reference in New Issue