mirror of https://github.com/apache/lucene.git
PR:
Obtained from: Reviewed by: Doug Cutting / Lucene Community new demo build target added getting started guide modified tests moved demo to demo subpackage added war demo git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@149646 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d0473acefc
commit
e120b8bd51
|
@ -52,14 +52,14 @@ releases are available for download at:
|
|||
Download either a zip or a tarred/gzipped version of the archive, and
|
||||
uncompress it into a directory of your choice.
|
||||
|
||||
Step 3) Connect to the top-level of your Lucene installation
|
||||
Step 2) Connect to the top-level of your Lucene installation
|
||||
|
||||
Lucene's top-level directory contains the build.properties and
|
||||
build.xml files. You don't need to change any of the settings in
|
||||
these files, but you do need to run ant from this location so it knows
|
||||
where to find them.
|
||||
|
||||
Step 4) Run ant.
|
||||
Step 3) Run ant.
|
||||
|
||||
Assuming you have ant in your PATH and have set ANT_HOME to the
|
||||
location of your ant installation, typing "ant" at the shell prompt
|
||||
|
|
|
@ -14,6 +14,7 @@ docs.dest = ./docs
|
|||
|
||||
src.dir = ./src/java
|
||||
demo.src = ./src/demo
|
||||
demo.jsp = ./src/jsp
|
||||
test.src = ./src/test
|
||||
docs.dir = ./docs
|
||||
lib.dir = ./lib
|
||||
|
@ -37,6 +38,8 @@ build.src = ${build.dir}/src
|
|||
build.demo = ${build.dir}/demo
|
||||
build.demo.src = ${build.demo}/src
|
||||
build.demo.classes = ${build.demo}/classes
|
||||
build.demo.name = ${name}-demos-${version}
|
||||
build.war.name = luceneweb
|
||||
|
||||
build.test = ${build.dir}/test
|
||||
build.test.src = ${build.test}/src
|
||||
|
|
48
build.xml
48
build.xml
|
@ -121,6 +121,45 @@
|
|||
/>
|
||||
</target>
|
||||
|
||||
<target name="jardemo" depends="compile,demo" if="javacc.present">
|
||||
<jar
|
||||
jarfile="${build.demo}/${build.demo.name}.jar"
|
||||
basedir="${build.demo.classes}"
|
||||
excludes="**/*.java"
|
||||
/>
|
||||
</target>
|
||||
|
||||
<target name="wardemo" depends="compile,demo,jar,jardemo" if="javacc.present">
|
||||
<mkdir dir="${build.demo}/${build.war.name}"/>
|
||||
<mkdir dir="${build.demo}/${build.war.name}/WEB-INF"/>
|
||||
<mkdir dir="${build.demo}/${build.war.name}/WEB-INF/lib"/>
|
||||
|
||||
<copy todir="${build.demo}/${build.war.name}">
|
||||
<fileset dir="${demo.jsp}">
|
||||
<include name="**/*.jsp"/>
|
||||
<include name="**/*.xml"/>
|
||||
</fileset>
|
||||
</copy>
|
||||
|
||||
<copy todir="${build.demo}/${build.war.name}/WEB-INF/lib">
|
||||
<fileset dir="${build.dir}">
|
||||
<include name="*.jar"/>
|
||||
</fileset>
|
||||
</copy>
|
||||
|
||||
<copy todir="${build.demo}/${build.war.name}/WEB-INF/lib">
|
||||
<fileset dir="${build.demo}">
|
||||
<include name="*.jar"/>
|
||||
</fileset>
|
||||
</copy>
|
||||
|
||||
<jar
|
||||
jarfile="${build.demo}/${build.war.name}.war"
|
||||
basedir="${build.demo}/${build.war.name}"
|
||||
excludes="**/*.java"
|
||||
/>
|
||||
</target>
|
||||
|
||||
<!-- ================================================================== -->
|
||||
<!-- J A R S O U R C E -->
|
||||
<!-- ================================================================== -->
|
||||
|
@ -163,9 +202,9 @@
|
|||
</copy>
|
||||
|
||||
<javacc
|
||||
target="${build.demo.src}/org/apache/lucene/HTMLParser/HTMLParser.jj"
|
||||
target="${build.demo.src}/org/apache/lucene/demo/html/HTMLParser.jj"
|
||||
javacchome="${javacc.zip.dir}"
|
||||
outputdirectory="${build.demo.src}/org/apache/lucene/HTMLParser"
|
||||
outputdirectory="${build.demo.src}/org/apache/lucene/demo/html"
|
||||
/>
|
||||
|
||||
<mkdir dir="${build.demo.classes}"/>
|
||||
|
@ -321,7 +360,7 @@
|
|||
<!-- ================================================================== -->
|
||||
<!-- -->
|
||||
<!-- ================================================================== -->
|
||||
<target name="package" depends="jar, javadocs, demo">
|
||||
<target name="package" depends="jar, javadocs, demo, wardemo">
|
||||
<mkdir dir="${dist.dir}"/>
|
||||
<mkdir dir="${dist.dir}/docs"/>
|
||||
<mkdir dir="${dist.dir}/docs/api"/>
|
||||
|
@ -339,6 +378,7 @@
|
|||
<fileset dir="${build.demo.classes}"/>
|
||||
</copy>
|
||||
|
||||
|
||||
<copy todir="${dist.dir}/src">
|
||||
<fileset dir="src"/>
|
||||
</copy>
|
||||
|
@ -353,6 +393,8 @@
|
|||
</fileset>
|
||||
</copy>
|
||||
<copy file="${build.dir}/${final.name}.jar" todir="${dist.dir}"/>
|
||||
<copy file="${build.demo}/${build.demo.name}.jar" todir="${dist.dir}"/>
|
||||
<copy file="${build.demo}/${build.war.name}.war" todir="${dist.dir}"/>
|
||||
</target>
|
||||
|
||||
<!-- ================================================================== -->
|
||||
|
|
|
@ -6,12 +6,13 @@
|
|||
javax.servlet.*
|
||||
javax.servlet.http.*
|
||||
java.io.*
|
||||
com.lucene.analysis.*
|
||||
com.lucene.document.*
|
||||
com.lucene.index.*
|
||||
com.lucene.search.*
|
||||
com.lucene.queryParser.*
|
||||
demo.HTMLParser.Entities
|
||||
org.apache.lucene.analysis.*
|
||||
org.apache.lucene.document.*
|
||||
org.apache.lucene.index.*
|
||||
org.apache.lucene.search.*
|
||||
org.apache.lucene.queryParser.*
|
||||
org.apache.lucene.demo.*
|
||||
org.apache.lucene.demo.html.Entities
|
||||
</java>
|
||||
|
||||
<java>
|
|
@ -1,7 +0,0 @@
|
|||
HTMLParser.java
|
||||
HTMLParserTokenManager.java
|
||||
TokenMgrError.java
|
||||
ParseException.java
|
||||
Token.java
|
||||
ASCII_CharStream.java
|
||||
HTMLParserConstants.java
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene;
|
||||
package org.apache.lucene.demo;
|
||||
|
||||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene;
|
||||
package org.apache.lucene.demo;
|
||||
|
||||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene;
|
||||
package org.apache.lucene.demo;
|
||||
|
||||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
||||
|
@ -56,7 +56,7 @@ package org.apache.lucene;
|
|||
|
||||
import java.io.*;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.HTMLParser.HTMLParser;
|
||||
import org.apache.lucene.demo.html.HTMLParser;
|
||||
|
||||
/** A utility for making Lucene Documents for HTML documents. */
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene;
|
||||
package org.apache.lucene.demo;
|
||||
|
||||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
||||
|
@ -54,7 +54,7 @@ package org.apache.lucene;
|
|||
* <http://www.apache.org/>.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
||||
import java.io.File;
|
||||
|
@ -65,9 +65,7 @@ class IndexFiles {
|
|||
try {
|
||||
Date start = new Date();
|
||||
|
||||
IndexWriter writer = new IndexWriter("index", new StopAnalyzer(), true);
|
||||
writer.mergeFactor = 20;
|
||||
|
||||
IndexWriter writer = new IndexWriter("index", new StandardAnalyzer(), true);
|
||||
indexDocs(writer, new File(args[0]));
|
||||
|
||||
writer.optimize();
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene;
|
||||
package org.apache.lucene.demo;
|
||||
|
||||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
||||
|
@ -54,11 +54,11 @@ package org.apache.lucene;
|
|||
* <http://www.apache.org/>.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.util.Arrays;
|
||||
import org.apache.lucene.HTMLParser.HTMLParser;
|
||||
import org.apache.lucene.demo.html.HTMLParser;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Date;
|
||||
|
@ -101,8 +101,7 @@ class IndexHTML {
|
|||
indexDocs(root, index, create);
|
||||
}
|
||||
|
||||
writer = new IndexWriter(index, new StopAnalyzer(), create);
|
||||
writer.mergeFactor = 20;
|
||||
writer = new IndexWriter(index, new StandardAnalyzer(), create);
|
||||
writer.maxFieldLength = 1000000;
|
||||
|
||||
indexDocs(root, index, create); // add new docs
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene;
|
||||
package org.apache.lucene.demo;
|
||||
|
||||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
||||
|
@ -59,7 +59,7 @@ import java.io.BufferedReader;
|
|||
import java.io.InputStreamReader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -71,7 +71,7 @@ class SearchFiles {
|
|||
public static void main(String[] args) {
|
||||
try {
|
||||
Searcher searcher = new IndexSearcher("index");
|
||||
Analyzer analyzer = new StopAnalyzer();
|
||||
Analyzer analyzer = new StandardAnalyzer();
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
|
||||
while (true) {
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.HTMLParser;
|
||||
package org.apache.lucene.demo.html;
|
||||
|
||||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
|
@ -63,7 +63,7 @@ options {
|
|||
|
||||
PARSER_BEGIN(HTMLParser)
|
||||
|
||||
package org.apache.lucene.HTMLParser;
|
||||
package org.apache.lucene.demo.html;
|
||||
|
||||
import java.io.*;
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.HTMLParser;
|
||||
package org.apache.lucene.demo.html;
|
||||
|
||||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.HTMLParser;
|
||||
package org.apache.lucene.demo.html;
|
||||
|
||||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
|
@ -0,0 +1,8 @@
|
|||
To build the Jakarta Lucene web app demo just run
|
||||
"ant wardemo" from the Jakarta Lucene Installation
|
||||
directory (follow the master instructions in
|
||||
BUILD.txt). If you have questions please post
|
||||
them to the Jakarta Lucene mailing lists. To
|
||||
actually figure this out you really need to
|
||||
read the Lucene "Getting Started" guide provided
|
||||
with the doc build ("ant docs").
|
|
@ -0,0 +1,10 @@
|
|||
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
|
||||
<!DOCTYPE web-app
|
||||
PUBLIC "-//Sun Microsystems, Inc.//DTD Web Application 2.3//EN"
|
||||
"http://java.sun.com/dtd/web-app_2_3.dtd">
|
||||
|
||||
<web-app>
|
||||
|
||||
|
||||
</web-app>
|
|
@ -0,0 +1,7 @@
|
|||
<%
|
||||
/* Author: Andrew C. Oliver (acoliver2@users.sourceforge.net) */
|
||||
String appTitle = "Jakarta Lucene Example - Intranet Server Search Application";
|
||||
/* make sure you point the below string to the index you created with IndexHTML */
|
||||
String indexLocation = "/opt/lucene/index";
|
||||
String appfooter = "Jakarta Lucene Template WebApp 1.0";
|
||||
%>
|
|
@ -0,0 +1,8 @@
|
|||
<% /* Author Andrew C. Oliver (acoliver2@users.sourceforge.net) */ %>
|
||||
<p>
|
||||
<center>
|
||||
<%=appfooter%>
|
||||
</center>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,12 @@
|
|||
<%@include file="configuration.jsp"%>
|
||||
<% /* Author: Andrew C. Oliver (acoliver2@users.sourceforge.net */ %>
|
||||
<html>
|
||||
<header>
|
||||
<title><%=appTitle%></title>
|
||||
</header>
|
||||
<body>
|
||||
<center>
|
||||
<p>
|
||||
Welcome to the Lucene Template application. (This is the header)
|
||||
</p>
|
||||
</center>
|
|
@ -0,0 +1,14 @@
|
|||
<%@include file="header.jsp"%>
|
||||
<% /* Author: Andrew C. Oliver (acoliver2@users.sourceforge.net) */ %>
|
||||
<center>
|
||||
<form name="search" action="results.jsp" method="get">
|
||||
<p>
|
||||
<input name="query" size="44"/> Search Criteria
|
||||
</p>
|
||||
<p>
|
||||
<input name="maxresults" size="4" value="100"/> Results Per Page
|
||||
<input type="submit" value="Search"/>
|
||||
</p>
|
||||
</form>
|
||||
</center>
|
||||
<%@include file="footer.jsp"%>
|
|
@ -0,0 +1,143 @@
|
|||
<%@ page import = " javax.servlet.*, javax.servlet.http.*, java.io.*, org.apache.lucene.analysis.*, org.apache.lucene.document.*, org.apache.lucene.index.*, org.apache.lucene.search.*, org.apache.lucene.queryParser.*, org.apache.lucene.demo.*, org.apache.lucene.demo.html.Entities" %>
|
||||
|
||||
<%
|
||||
/*
|
||||
Author: Andrew C. Oliver, SuperLink Software, Inc. (acoliver2@users.sourceforge.net)
|
||||
|
||||
This jsp page is deliberatly written in the horrble java directly embedded
|
||||
in the page style for an easy and conceise demonstration of Lucene.
|
||||
Due note...if you write pages that look like this...sooner or later
|
||||
you'll have a maintenance nightmere. If you use jsps...use taglibs
|
||||
and beans! That being said, this should be acceptable for a small
|
||||
page demonstrating how one uses Lucene in a web app.
|
||||
|
||||
This is also deliberately overcommented. ;-)
|
||||
|
||||
*/
|
||||
%>
|
||||
<%@include file="header.jsp"%>
|
||||
<%
|
||||
boolean error = false; //used to control flow for error messages
|
||||
String indexName = indexLocation; //local copy of the configuration variable
|
||||
IndexSearcher searcher = null; //the searcher used to open/search the index
|
||||
Query query = null; //the Query created by the QueryParser
|
||||
Hits hits = null; //the search results
|
||||
int startindex = 0; //the first index displayed on this page
|
||||
int maxpage = 50; //the maximum items displayed on this page
|
||||
String queryString = null; //the query entered in the previous page
|
||||
String startVal = null; //string version of startindex
|
||||
String maxresults = null; //string version of maxpage
|
||||
int thispage = 0; //used for the for/next either maxpage or
|
||||
//hits.length() - startindex - whichever is
|
||||
//less
|
||||
|
||||
try {
|
||||
searcher = new IndexSearcher(
|
||||
IndexReader.open(indexName) //create an indexSearcher for our page
|
||||
);
|
||||
} catch (Exception e) { //any error that happens is probably due
|
||||
//to a permission problem or non-existant
|
||||
//or otherwise corrupt index
|
||||
%>
|
||||
<p>ERROR opening the Index - contact sysadmin!</p>
|
||||
<p>While parsing query: <%=e.getMessage()%></p>
|
||||
<% error = true; //don't do anything up to the footer
|
||||
}
|
||||
%>
|
||||
<%
|
||||
if (error == false) { //did we open the index?
|
||||
queryString = request.getParameter("query"); //get the search criteria
|
||||
startVal = request.getParameter("startat"); //get the start index
|
||||
maxresults = request.getParameter("maxresults"); //get max results per page
|
||||
try {
|
||||
maxpage = Integer.parseInt(maxresults); //parse the max results first
|
||||
startindex = Integer.parseInt(startVal); //then the start index
|
||||
} catch (Exception e) { } //we don't care if something happens we'll just start at 0
|
||||
//or end at 50
|
||||
|
||||
|
||||
|
||||
if (queryString == null)
|
||||
throw new ServletException("no query "+ //if you don't have a query then
|
||||
"specified"); //you probably played on the
|
||||
//query string so you get the
|
||||
//treatment
|
||||
|
||||
Analyzer analyzer = new StopAnalyzer(); //construct our usual analyzer
|
||||
try {
|
||||
query = QueryParser.parse(queryString, "contents", analyzer); //parse the
|
||||
} catch (ParseException e) { //query and construct the Query
|
||||
//object
|
||||
//if its just "operator error"
|
||||
//send them a nice error HTML
|
||||
|
||||
%>
|
||||
<p>Error While parsing query: <%=e.getMessage()%></p>
|
||||
<%
|
||||
error = true; //don't bother with the rest of
|
||||
//the page
|
||||
}
|
||||
}
|
||||
%>
|
||||
<%
|
||||
if (error == false && searcher != null) { // if we've had no errors
|
||||
// searcher != null was to handle
|
||||
// a weird compilation bug
|
||||
thispage = maxpage; // default last element to maxpage
|
||||
hits = searcher.search(query); // run the query
|
||||
if (hits.length() == 0) { // if we got no results tell the user
|
||||
%>
|
||||
<p> I'm sorry I couldn't find what you were looking for. </p>
|
||||
<%
|
||||
error = true; // don't bother with the rest of the
|
||||
// page
|
||||
}
|
||||
}
|
||||
|
||||
if (error == false && searcher != null) {
|
||||
%>
|
||||
<table>
|
||||
<tr>
|
||||
<td>Document</td>
|
||||
<td>Summary</td>
|
||||
</tr>
|
||||
<%
|
||||
if ((startindex + maxpage) > hits.length()) {
|
||||
thispage = hits.length() - startindex; // set the max index to maxpage or last
|
||||
} // actual search result whichever is less
|
||||
|
||||
for (int i = startindex; i < (thispage + startindex); i++) { // for each element
|
||||
%>
|
||||
<tr>
|
||||
<%
|
||||
Document doc = hits.doc(i); //get the next document
|
||||
String doctitle = doc.get("title"); //get its title
|
||||
String url = doc.get("url"); //get its url field
|
||||
if (doctitle.equals("")) //use the url if it has no title
|
||||
doctitle = url;
|
||||
//then output!
|
||||
%>
|
||||
<td><a href="<%=url%>"><%=doctitle%></a></td>
|
||||
<td><%=doc.get("summary")%></td>
|
||||
</tr>
|
||||
<%
|
||||
}
|
||||
%>
|
||||
<% if ( (startindex + maxpage) < hits.length()) { //if there are more results...display
|
||||
//the more link
|
||||
|
||||
String moreurl="results.jsp?query=" + queryString + //construct the "more" link
|
||||
"&maxresults=" + maxpage +
|
||||
"&startat=" + (startindex + maxpage);
|
||||
%>
|
||||
<tr>
|
||||
<td></td><td><a href="<%=moreurl%>">More Results>></a></td>
|
||||
</tr>
|
||||
<%
|
||||
}
|
||||
%>
|
||||
</table>
|
||||
|
||||
<% } //then include our footer.
|
||||
%>
|
||||
<%@include file="footer.jsp"%>
|
|
@ -58,7 +58,7 @@ import org.apache.lucene.analysis.SimpleAnalyzer;
|
|||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.TermPositions;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.FileDocument;
|
||||
import org.apache.lucene.demo.FileDocument;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Date;
|
||||
|
|
|
@ -59,7 +59,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.FileDocument;
|
||||
import org.apache.lucene.demo.FileDocument;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Date;
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<properties>
|
||||
<author email="acoliver@apache.org">Andrew C. Oliver</author>
|
||||
<title>Jakarta Lucene - Building and Installing the Basic Demo</title>
|
||||
</properties>
|
||||
<body>
|
||||
|
||||
<section name="About this Document">
|
||||
<p>
|
||||
This document is intended as a "getting started" guide to using and running the
|
||||
Jakarta Lucene demos. It walks you through some basic installation and configuration.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
|
||||
<section name="About the Demos">
|
||||
<p>
|
||||
The Lucene Demo code is a set of command line example applications that demonstrate various
|
||||
functionality of Lucene and how one should go about adding it to their
|
||||
applications.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Setting your classpath">
|
||||
<p>
|
||||
First, extract the latest Lucene distribution.
|
||||
</p>
|
||||
<p>
|
||||
You should see the Jakarta Lucene jar file in the directory you created
|
||||
when you extracted the archive. It should be named something like
|
||||
<b>lucene-{version}.jar</b>.
|
||||
</p>
|
||||
<p>
|
||||
You should also see a file called called <b>lucene-demos-{version}.jar</b>.
|
||||
Put both of these files in your Java CLASSPATH.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Indexing Files">
|
||||
<p>
|
||||
Once you've gotten this far you're probably itching to go. Let's <b> build an index!</b>
|
||||
Assuming you've set your classpath correctly, just type
|
||||
"java org.apache.lucene.demo.IndexFiles {full-path-to-lucene}/src". This will produce
|
||||
a subdirectory called "index" which will contain an index of all of the Lucene
|
||||
sourcecode.
|
||||
</p>
|
||||
<p>
|
||||
<b> To search the index </b> type "java org.apache.lucene.demo.SearchFiles". You'll be prompted
|
||||
for a query. Type in a swear word and press the enter key. You'll see that the Lucene
|
||||
developers are very well mannered and get no results. Now try entering the word "vector".
|
||||
That should return a whole bunch of documents. The results will page at every tenth
|
||||
result and ask you whether you want more results.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="About the code...">
|
||||
<p>
|
||||
<a href="demo2.html">read on>>></a>
|
||||
</p>
|
||||
</section>
|
||||
|
||||
</body>
|
||||
</document>
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<properties>
|
||||
<author email="acoliver@apache.org">Andrew C. Oliver</author>
|
||||
<title>Jakarta Lucene - Basic Demo Sources Walkthrough</title>
|
||||
</properties>
|
||||
<body>
|
||||
|
||||
<section name="About the Code">
|
||||
<p>
|
||||
In this section we walk through the sources behind the basic Lucene demo such as where to
|
||||
find it, its parts and their function. This section is intended for Java developers
|
||||
wishing to understand how to use Jakarta Lucene in their applications.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
|
||||
<section name="Location of the source">
|
||||
<p>
|
||||
Relative to the directory created when you extracted Lucene or retreived it from CVS, you
|
||||
should see a directory called "src" which in turn contains a directory called "demo".
|
||||
This is the root for all of the Lucene demos. Under this directory is org/apache/lucene/demo,
|
||||
this is where all the Java sources live.
|
||||
</p>
|
||||
<p>
|
||||
Within this directory you should see the IndexFiles class we executed earlier. Bring that
|
||||
up in vi or your alternative text editor and lets take a look at it.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="IndexFiles">
|
||||
<p>
|
||||
As we discussed in the previous walkthrough, the IndexFiles class creates a Lucene Index.
|
||||
Lets take a look at how it does this.
|
||||
</p>
|
||||
<p>
|
||||
The first substantial thing the main function does is instantiate an instance
|
||||
of IndexWriter. It passes a string called "index" and a new instance of a class called
|
||||
"StandardAnalyzer". The "index" string is the name of the directory that all index information
|
||||
should be stored in. Because we're not passing any path information, one must assume this
|
||||
will be created as a subdirectory of the current directory (if does not already exist). On
|
||||
some platforms this may actually result in it being created in other directories (such as
|
||||
the user's home directory).
|
||||
</p>
|
||||
<p>
|
||||
The <b>IndexWriter</b> is the main class responsible for creating indicies. To use it you
|
||||
must instantiate it with a path that it can write the index into, if this path does not
|
||||
exist it will create it, otherwise it will refresh the index living at that path. You
|
||||
must a also pass an instance of <b>org.apache.analysis.Analyzer</b>.
|
||||
</p>
|
||||
<p>
|
||||
The <b>Analyzer</b>, in this case, the <b>Stop Analyzer</b> is little more than a standard Java
|
||||
Tokenizer, converting all strings to lowercase and filtering out useless words from the index.
|
||||
By useless words I mean common language words such as articles (a,an,the) and other words that
|
||||
would be useless for searching. It should be noted that there are different rules for every
|
||||
language, and you should use the proper analyzer for each. Lucene currently provides Analyzers
|
||||
for English and German.
|
||||
</p>
|
||||
<p>
|
||||
Looking down further in the file, you should see the indexDocs() code. This recursive function
|
||||
simply crawls the directories and uses FileDocument to create Document objects. The Document
|
||||
is simply a data object to represent the content in the file as well as its creation time and
|
||||
location. These instances are added to the indexWriter. Take a look inside FileDocument. Its
|
||||
not particularly complicated, it just adds fields to the Document.
|
||||
</p>
|
||||
<p>
|
||||
As you can see there isn't much to creating an index. The devil is in the details. You may also
|
||||
wish to examine the other samples in this directory, particularly the IndexHTML class. It is
|
||||
a bit more complex but builds upon this example.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Searching Files">
|
||||
<p>
|
||||
The SearchFiles class is quite simple. It primarily collaborates with an IndexSearcher, StandardAnalyzer
|
||||
(which is used in the IndexFiles class as well) and a QueryParser. The query parser is constructed
|
||||
with an analyzer used to interperate your query in the same way the Index was interperated: finding
|
||||
the end of words and removing useless words like 'a', 'an' and 'the'. The Query object contains the
|
||||
results from the QueryParser which is passed to the searcher. The searcher results are returned in
|
||||
a collection of Documents called "Hits" which is then iterated through and displayed to the user.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="The Web example...">
|
||||
<p>
|
||||
<a href="demo3.html">read on>>></a>
|
||||
</p>
|
||||
</section>
|
||||
|
||||
</body>
|
||||
</document>
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<document>
|
||||
<properties>
|
||||
<author email="acoliver@apache.org">Andrew C. Oliver</author>
|
||||
<title>Jakarta Lucene - Building and Installing the Basic Demo</title>
|
||||
</properties>
|
||||
<body>
|
||||
|
||||
<section name="About this Document">
|
||||
<p>
|
||||
This document is intended as a "getting started" guide to installing and running the
|
||||
Jakarta Lucene web application demo. This guide assumes that you have read the
|
||||
information in the previous two examples or already know it anyhow. We'll use
|
||||
Tomcat 4.0.1 as our reference web container. These demos should work with nearly
|
||||
any container, but it is up to you to adapt them appropriately.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
|
||||
<section name="About the Demos">
|
||||
<p>
|
||||
The Lucene Web Application demo is a template web application intended for deployment
|
||||
on Tomcat or a similar web container. It's NOT designed as a "best practices"
|
||||
implementation by ANY means. Its more of a "hello world" type Lucene Web App.
|
||||
The purpose of this application is to demonstrate Lucene. With that being said,
|
||||
it should be relatively simple to create a small searchable website in Tomcat or
|
||||
a similar application server.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Indexing Files">
|
||||
<p>
|
||||
Once you've gotten this far you're probably itching to go.
|
||||
Let's start by creating the index you'll need for the web examples.
|
||||
Since you've already set your classpath in the previous examples,
|
||||
all you need to do is type
|
||||
<b> "java org.apache.lucene.demo.IndexHTML -create -index {index-dir} .."</b>.
|
||||
You'll need to do this from your {tomcat}/webapps/luceneweb directory. {index-dir}
|
||||
should be a directory that Tomcat has permission to read and write, but is
|
||||
outside of a web accessible context. By default the webapp is configured
|
||||
to look in <b>/opt/lucene/index</b> for this index.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Deploying the Demos">
|
||||
<p>Located in your distribution directory you should see
|
||||
a war file called luceneweb.war. Copy this to your
|
||||
{tomcat-home}/webapps directory. You may need to restart
|
||||
Tomcat. </p>
|
||||
</section>
|
||||
|
||||
<section name="Configuration">
|
||||
<p>
|
||||
From your Tomcat directory look in the webapps/luceneweb subdirectory. If its not
|
||||
present, try browsing to "http://localhost:8080/luceneweb" then look again.
|
||||
Edit a file called configuration.jsp. Ensure that the indexLocation is equal to the
|
||||
location you used for your index. You may also customize the appTitle and appFooter
|
||||
strings as you see fit. Once you have finsihed altering the configuration you should
|
||||
restart Tomcat. You may also wish to update the war file by typing
|
||||
<b>jar -uf luceneweb.war configuration.jsp</b> from the luceneweb subdirectory.
|
||||
(The u option is not available in all versions of jar. In this case recreate the war file).
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Running the Demos">
|
||||
<p>Now you're ready to roll. In your browser set the url to "http://localhost:8080/luceneweb"
|
||||
enter "test" and the number of items per page and press search.</p>
|
||||
<p>You should now be looking either at a number of results (provided you didn't erase the
|
||||
Tomcat examples) or nothing. Try other search terms. Depending on the number of items
|
||||
per page you set and results returned, there may be a link at the bottom that says "more results>>",
|
||||
clicking it goes to subsequent pages. If you get an error regarding opening the index, then you
|
||||
probably set the path in "configuration" incorrectly or Tomcat doesn't have permissions to the
|
||||
index (or you skipped the step of creating it).</p>
|
||||
</section>
|
||||
|
||||
<section name="About the code...">
|
||||
<p>
|
||||
If you want to know more about how this web app works or how to customize it then
|
||||
<a href="demo4.html">read on>>></a>.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
</body>
|
||||
</document>
|
||||
|
|
@ -0,0 +1,134 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<properties>
|
||||
<author email="acoliver@apache.org">Andrew C. Oliver</author>
|
||||
<title>Jakarta Lucene - Basic Demo Sources Walkthrough</title>
|
||||
</properties>
|
||||
<body>
|
||||
|
||||
<section name="About the Code">
|
||||
<p>
|
||||
In this section we walk through the sources behind the basic Lucene Web Application demo.
|
||||
Where to find it, its parts, and their function. This section is intended for Java developers
|
||||
wishing to understand how to use Jakarta Lucene in their applications or for those involved
|
||||
in deploying web applications based on Lucene.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
|
||||
<section name="Location of the source (developers/deployers)">
|
||||
<p>
|
||||
Relative the directory created when you extracted Lucene or retreived it from CVS, you
|
||||
should see a directory called "src" which in turn contains a directory called "jsp".
|
||||
This is the root for all of the Lucene web demo.
|
||||
</p>
|
||||
<p>
|
||||
Within this directory you should see the index.jsp class. Bring this up in vi or your
|
||||
editor of choice.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="index.jsp (developers/deployers)">
|
||||
<p>
|
||||
This jsp page is pretty boring by itself. All it does is include a header, display a form and
|
||||
include a footer. If you look at the form, it has two fields: query (where you enter your
|
||||
search criteria) and maxresults where you specify the number of results per page. If you look
|
||||
at the form tag, you'll notice it uses the get method as opposed to the post. While this is
|
||||
considered deprecated functionality by the latest w3c specs, its unlikely to go away due to the
|
||||
usefulness of being able to bookmark things like searches. By the structure of this JSP it should
|
||||
be easy to customize it without even editing this particular file. You could simply change the
|
||||
header and footer. Let's look at the header.jsp (located in the same directory) next.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="header.jsp (developers/deployers)">
|
||||
<p>
|
||||
The header is also very simple by itself. The only thing it does is include the configuration.jsp
|
||||
(which you looked at in the last section of this guide) and set the title and a brief header. This
|
||||
would be a good place to put your own custom HTML to "pretty" things up a bit. We won't cover the
|
||||
footer because all it does is display the footer and close your tags. Let's look at the results.jsp,
|
||||
the meat of this application next.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="results.jsp (developers)">
|
||||
<p>
|
||||
The results.jsp had a lot more functionality. Much of it is for paging the search results we'll not
|
||||
cover this as its commented well enough. It does not peform any optimizations such as caching results,
|
||||
etc. as that would make this a more complex example. The first thing in this page is the actual imports
|
||||
for the Lucene classes and Lucene demo classes. These classes are loaded from the jars included in the
|
||||
WEB-INF/lib directory in the final war file.
|
||||
</p>
|
||||
<p>
|
||||
You'll notice that this file includes the same header and footer as the "index.jsp". From there the jsp
|
||||
constructs an IndexSearcher with the "indexLocation" that was specified in the "configuration.jsp". If there
|
||||
is an error of any kind in opening the index, it is diplayed ot the user and a boolean flag is set to tell
|
||||
the rest of the sections of the jsp not to continue.
|
||||
</p>
|
||||
<p>
|
||||
From there, this jsp attempts to get the search criteria, the start index (used for paging) and the maximum
|
||||
number of results per page. If the maximum results per page is not set or not valid then it and the
|
||||
start index are set to default values. If only the start index is invalid it is set to a default value. If
|
||||
the criteria isn't provided then a servlet error is thrown (it is assumed that this is the result of url tampering
|
||||
or some form of browser malfunction).
|
||||
</p>
|
||||
<p>
|
||||
The jsp moves on to construct a StandardAnalyzer just as in the simple demo, to analyze the search critieria, it
|
||||
is passed to the QueryParser along with the criteria to construct a Query object. You'll also notice the
|
||||
string literal "contents" included. This is to specify the search should include the the contents and not
|
||||
the title, url or some other field in the indexed documents. If there is any error in constructing a Query
|
||||
object an error is displayed to the user.
|
||||
</p>
|
||||
<p>
|
||||
In the next section of the jsp the IndexSearcher is asked to search given the query object. the results are
|
||||
returned in a collection called "hits". If the length property of the hits collection is 0 then an error
|
||||
is displayed to the user and the error flag is set.
|
||||
</p>
|
||||
<p>
|
||||
Finally the jsp iterates through the hits collection and displayed properties of the "Document" objects we talked
|
||||
about in the first walkthrough. These objects contain "known" fields specific to their indexer (in this case
|
||||
"IndexHTML" constructs a document with "url", "title" and "contents"). You'll notice that these results are paged
|
||||
but the search is repeated every time. This is an area where optimization could improve performance for large
|
||||
result sets.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="More sources (developers)">
|
||||
<p>
|
||||
There are additional sources used by the web app that were not specifically covered by either walkthrough. For
|
||||
example the HTML parser, the IndexHTML class and HTMLDocument class. These are very similar to the classes
|
||||
covered in the first example, however they have properties sepecific to parsing and indexing HTML. This is
|
||||
beyond our scope; however, by now you should feel like you're "getting started" with Lucene.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Where to go from here? (Everyone!)">
|
||||
<p>
|
||||
There are a number of things this demo doesn't do or doesn't do quite right. For instance, you may
|
||||
have noticed that documents in the root context are unreachable (unless you reconfigure Tomcat to
|
||||
support that context or redirect to it), anywhere where the directory doesn't quite match the context mapping,
|
||||
you'll have a broken link in your results. If you want to index non-local files or have some other
|
||||
needs this isn't supported, plus there may be security issues with running the indexing application from
|
||||
your webapps directory. There are a number of things left for you the implementor or developer to do.
|
||||
</p>
|
||||
<p>
|
||||
In time some of these things may be added to Lucene as features (if you've got a good idea we'd love to hear it!),
|
||||
but for now: this is where you begin and the search engine/indexer ends. Lastly, one would assume you'd
|
||||
want to follow the above advice and customize the application to look a little more fancy than black on
|
||||
white with "Lucene Template" at the top. We'll see you on the Lucene Users' or Developers' mailing lists!
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="When to contact the Author">
|
||||
<p>
|
||||
Please resist the urge to contact the authors of this document (without bribes of fame and fortune attached). First
|
||||
contact the <a href="http://jakarta.apache.org/site/mail.html">mailing lists</a>. That being said feedback,
|
||||
and modifications to this document and samples are ever so greatly appreciatedThey are just best sent to the
|
||||
lists so that everyone can share in them. Certainly you'll get the most help there as well.
|
||||
Thanks for understanding.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
</body>
|
||||
</document>
|
||||
|
|
@ -15,6 +15,7 @@
|
|||
|
||||
<menu name="Documentation">
|
||||
<item name="FAQ" href="http://www.lucene.com/cgi-bin/faq/faqmanager.cgi" target="_blank"/>
|
||||
<item name="Getting Started" href="/gettingstarted.html"/>
|
||||
<item name="Articles" href="/resources.html"/>
|
||||
<item name="Javadoc" href="/api/index.html"/>
|
||||
</menu>
|
||||
|
|
Loading…
Reference in New Issue