Obtained from:
Reviewed by:	Doug Cutting / Lucene Community
new demo build target
added getting started guide
modified tests
moved demo to demo subpackage
added war demo


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@149646 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew C. Oliver 2002-01-26 15:01:32 +00:00
parent d0473acefc
commit e120b8bd51
30 changed files with 657 additions and 41 deletions

View File

@ -52,14 +52,14 @@ releases are available for download at:
Download either a zip or a tarred/gzipped version of the archive, and
uncompress it into a directory of your choice.
Step 3) Connect to the top-level of your Lucene installation
Step 2) Connect to the top-level of your Lucene installation
Lucene's top-level directory contains the build.properties and
build.xml files. You don't need to change any of the settings in
these files, but you do need to run ant from this location so it knows
where to find them.
Step 4) Run ant.
Step 3) Run ant.
Assuming you have ant in your PATH and have set ANT_HOME to the
location of your ant installation, typing "ant" at the shell prompt

View File

@ -14,6 +14,7 @@ docs.dest = ./docs
src.dir = ./src/java
demo.src = ./src/demo
demo.jsp = ./src/jsp
test.src = ./src/test
docs.dir = ./docs
lib.dir = ./lib
@ -37,6 +38,8 @@ build.src = ${build.dir}/src
build.demo = ${build.dir}/demo
build.demo.src = ${build.demo}/src
build.demo.classes = ${build.demo}/classes
build.demo.name = ${name}-demos-${version}
build.war.name = luceneweb
build.test = ${build.dir}/test
build.test.src = ${build.test}/src

View File

@ -121,6 +121,45 @@
/>
</target>
<target name="jardemo" depends="compile,demo" if="javacc.present">
<jar
jarfile="${build.demo}/${build.demo.name}.jar"
basedir="${build.demo.classes}"
excludes="**/*.java"
/>
</target>
<target name="wardemo" depends="compile,demo,jar,jardemo" if="javacc.present">
<mkdir dir="${build.demo}/${build.war.name}"/>
<mkdir dir="${build.demo}/${build.war.name}/WEB-INF"/>
<mkdir dir="${build.demo}/${build.war.name}/WEB-INF/lib"/>
<copy todir="${build.demo}/${build.war.name}">
<fileset dir="${demo.jsp}">
<include name="**/*.jsp"/>
<include name="**/*.xml"/>
</fileset>
</copy>
<copy todir="${build.demo}/${build.war.name}/WEB-INF/lib">
<fileset dir="${build.dir}">
<include name="*.jar"/>
</fileset>
</copy>
<copy todir="${build.demo}/${build.war.name}/WEB-INF/lib">
<fileset dir="${build.demo}">
<include name="*.jar"/>
</fileset>
</copy>
<jar
jarfile="${build.demo}/${build.war.name}.war"
basedir="${build.demo}/${build.war.name}"
excludes="**/*.java"
/>
</target>
<!-- ================================================================== -->
<!-- J A R S O U R C E -->
<!-- ================================================================== -->
@ -163,9 +202,9 @@
</copy>
<javacc
target="${build.demo.src}/org/apache/lucene/HTMLParser/HTMLParser.jj"
target="${build.demo.src}/org/apache/lucene/demo/html/HTMLParser.jj"
javacchome="${javacc.zip.dir}"
outputdirectory="${build.demo.src}/org/apache/lucene/HTMLParser"
outputdirectory="${build.demo.src}/org/apache/lucene/demo/html"
/>
<mkdir dir="${build.demo.classes}"/>
@ -321,7 +360,7 @@
<!-- ================================================================== -->
<!-- -->
<!-- ================================================================== -->
<target name="package" depends="jar, javadocs, demo">
<target name="package" depends="jar, javadocs, demo, wardemo">
<mkdir dir="${dist.dir}"/>
<mkdir dir="${dist.dir}/docs"/>
<mkdir dir="${dist.dir}/docs/api"/>
@ -339,6 +378,7 @@
<fileset dir="${build.demo.classes}"/>
</copy>
<copy todir="${dist.dir}/src">
<fileset dir="src"/>
</copy>
@ -353,6 +393,8 @@
</fileset>
</copy>
<copy file="${build.dir}/${final.name}.jar" todir="${dist.dir}"/>
<copy file="${build.demo}/${build.demo.name}.jar" todir="${dist.dir}"/>
<copy file="${build.demo}/${build.war.name}.war" todir="${dist.dir}"/>
</target>
<!-- ================================================================== -->

View File

@ -6,12 +6,13 @@
javax.servlet.*
javax.servlet.http.*
java.io.*
com.lucene.analysis.*
com.lucene.document.*
com.lucene.index.*
com.lucene.search.*
com.lucene.queryParser.*
demo.HTMLParser.Entities
org.apache.lucene.analysis.*
org.apache.lucene.document.*
org.apache.lucene.index.*
org.apache.lucene.search.*
org.apache.lucene.queryParser.*
org.apache.lucene.demo.*
org.apache.lucene.demo.html.Entities
</java>
<java>

View File

@ -1,7 +0,0 @@
HTMLParser.java
HTMLParserTokenManager.java
TokenMgrError.java
ParseException.java
Token.java
ASCII_CharStream.java
HTMLParserConstants.java

View File

@ -1,4 +1,4 @@
package org.apache.lucene;
package org.apache.lucene.demo;
/* ====================================================================
* The Apache Software License, Version 1.1

View File

@ -1,4 +1,4 @@
package org.apache.lucene;
package org.apache.lucene.demo;
/* ====================================================================
* The Apache Software License, Version 1.1

View File

@ -1,4 +1,4 @@
package org.apache.lucene;
package org.apache.lucene.demo;
/* ====================================================================
* The Apache Software License, Version 1.1
@ -56,7 +56,7 @@ package org.apache.lucene;
import java.io.*;
import org.apache.lucene.document.*;
import org.apache.lucene.HTMLParser.HTMLParser;
import org.apache.lucene.demo.html.HTMLParser;
/** A utility for making Lucene Documents for HTML documents. */

View File

@ -1,4 +1,4 @@
package org.apache.lucene;
package org.apache.lucene.demo;
/* ====================================================================
* The Apache Software License, Version 1.1
@ -54,7 +54,7 @@ package org.apache.lucene;
* <http://www.apache.org/>.
*/
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import java.io.File;
@ -65,9 +65,7 @@ class IndexFiles {
try {
Date start = new Date();
IndexWriter writer = new IndexWriter("index", new StopAnalyzer(), true);
writer.mergeFactor = 20;
IndexWriter writer = new IndexWriter("index", new StandardAnalyzer(), true);
indexDocs(writer, new File(args[0]));
writer.optimize();

View File

@ -1,4 +1,4 @@
package org.apache.lucene;
package org.apache.lucene.demo;
/* ====================================================================
* The Apache Software License, Version 1.1
@ -54,11 +54,11 @@ package org.apache.lucene;
* <http://www.apache.org/>.
*/
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.util.Arrays;
import org.apache.lucene.HTMLParser.HTMLParser;
import org.apache.lucene.demo.html.HTMLParser;
import java.io.File;
import java.util.Date;
@ -101,8 +101,7 @@ class IndexHTML {
indexDocs(root, index, create);
}
writer = new IndexWriter(index, new StopAnalyzer(), create);
writer.mergeFactor = 20;
writer = new IndexWriter(index, new StandardAnalyzer(), create);
writer.maxFieldLength = 1000000;
indexDocs(root, index, create); // add new docs

View File

@ -1,4 +1,4 @@
package org.apache.lucene;
package org.apache.lucene.demo;
/* ====================================================================
* The Apache Software License, Version 1.1
@ -59,7 +59,7 @@ import java.io.BufferedReader;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.IndexSearcher;
@ -71,7 +71,7 @@ class SearchFiles {
public static void main(String[] args) {
try {
Searcher searcher = new IndexSearcher("index");
Analyzer analyzer = new StopAnalyzer();
Analyzer analyzer = new StandardAnalyzer();
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
while (true) {

View File

@ -1,4 +1,4 @@
package org.apache.lucene.HTMLParser;
package org.apache.lucene.demo.html;
/* ====================================================================
* The Apache Software License, Version 1.1

View File

@ -63,7 +63,7 @@ options {
PARSER_BEGIN(HTMLParser)
package org.apache.lucene.HTMLParser;
package org.apache.lucene.demo.html;
import java.io.*;

View File

@ -1,4 +1,4 @@
package org.apache.lucene.HTMLParser;
package org.apache.lucene.demo.html;
/* ====================================================================
* The Apache Software License, Version 1.1

View File

@ -1,4 +1,4 @@
package org.apache.lucene.HTMLParser;
package org.apache.lucene.demo.html;
/* ====================================================================
* The Apache Software License, Version 1.1

8
src/jsp/README.txt Normal file
View File

@ -0,0 +1,8 @@
To build the Jakarta Lucene web app demo just run
"ant wardemo" from the Jakarta Lucene Installation
directory (follow the master instructions in
BUILD.txt). If you have questions please post
them to the Jakarta Lucene mailing lists. To
actually figure this out you really need to
read the Lucene "Getting Started" guide provided
with the doc build ("ant docs").

10
src/jsp/WEB-INF/web.xml Executable file
View File

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE web-app
PUBLIC "-//Sun Microsystems, Inc.//DTD Web Application 2.3//EN"
"http://java.sun.com/dtd/web-app_2_3.dtd">
<web-app>
</web-app>

View File

@ -0,0 +1,7 @@
<%
/* Author: Andrew C. Oliver (acoliver2@users.sourceforge.net) */
String appTitle = "Jakarta Lucene Example - Intranet Server Search Application";
/* make sure you point the below string to the index you created with IndexHTML */
String indexLocation = "/opt/lucene/index";
String appfooter = "Jakarta Lucene Template WebApp 1.0";
%>

8
src/jsp/footer.jsp Normal file
View File

@ -0,0 +1,8 @@
<% /* Author Andrew C. Oliver (acoliver2@users.sourceforge.net) */ %>
<p>
<center>
<%=appfooter%>
</center>
</p>
</body>
</html>

12
src/jsp/header.jsp Normal file
View File

@ -0,0 +1,12 @@
<%@include file="configuration.jsp"%>
<% /* Author: Andrew C. Oliver (acoliver2@users.sourceforge.net */ %>
<html>
<header>
<title><%=appTitle%></title>
</header>
<body>
<center>
<p>
Welcome to the Lucene Template application. (This is the header)
</p>
</center>

14
src/jsp/index.jsp Executable file
View File

@ -0,0 +1,14 @@
<%@include file="header.jsp"%>
<% /* Author: Andrew C. Oliver (acoliver2@users.sourceforge.net) */ %>
<center>
<form name="search" action="results.jsp" method="get">
<p>
<input name="query" size="44"/>&nbsp;Search Criteria
</p>
<p>
<input name="maxresults" size="4" value="100"/>&nbsp;Results Per Page&nbsp;
<input type="submit" value="Search"/>
</p>
</form>
</center>
<%@include file="footer.jsp"%>

143
src/jsp/results.jsp Executable file
View File

@ -0,0 +1,143 @@
<%@ page import = " javax.servlet.*, javax.servlet.http.*, java.io.*, org.apache.lucene.analysis.*, org.apache.lucene.document.*, org.apache.lucene.index.*, org.apache.lucene.search.*, org.apache.lucene.queryParser.*, org.apache.lucene.demo.*, org.apache.lucene.demo.html.Entities" %>
<%
/*
Author: Andrew C. Oliver, SuperLink Software, Inc. (acoliver2@users.sourceforge.net)
This jsp page is deliberatly written in the horrble java directly embedded
in the page style for an easy and conceise demonstration of Lucene.
Due note...if you write pages that look like this...sooner or later
you'll have a maintenance nightmere. If you use jsps...use taglibs
and beans! That being said, this should be acceptable for a small
page demonstrating how one uses Lucene in a web app.
This is also deliberately overcommented. ;-)
*/
%>
<%@include file="header.jsp"%>
<%
boolean error = false; //used to control flow for error messages
String indexName = indexLocation; //local copy of the configuration variable
IndexSearcher searcher = null; //the searcher used to open/search the index
Query query = null; //the Query created by the QueryParser
Hits hits = null; //the search results
int startindex = 0; //the first index displayed on this page
int maxpage = 50; //the maximum items displayed on this page
String queryString = null; //the query entered in the previous page
String startVal = null; //string version of startindex
String maxresults = null; //string version of maxpage
int thispage = 0; //used for the for/next either maxpage or
//hits.length() - startindex - whichever is
//less
try {
searcher = new IndexSearcher(
IndexReader.open(indexName) //create an indexSearcher for our page
);
} catch (Exception e) { //any error that happens is probably due
//to a permission problem or non-existant
//or otherwise corrupt index
%>
<p>ERROR opening the Index - contact sysadmin!</p>
<p>While parsing query: <%=e.getMessage()%></p>
<% error = true; //don't do anything up to the footer
}
%>
<%
if (error == false) { //did we open the index?
queryString = request.getParameter("query"); //get the search criteria
startVal = request.getParameter("startat"); //get the start index
maxresults = request.getParameter("maxresults"); //get max results per page
try {
maxpage = Integer.parseInt(maxresults); //parse the max results first
startindex = Integer.parseInt(startVal); //then the start index
} catch (Exception e) { } //we don't care if something happens we'll just start at 0
//or end at 50
if (queryString == null)
throw new ServletException("no query "+ //if you don't have a query then
"specified"); //you probably played on the
//query string so you get the
//treatment
Analyzer analyzer = new StopAnalyzer(); //construct our usual analyzer
try {
query = QueryParser.parse(queryString, "contents", analyzer); //parse the
} catch (ParseException e) { //query and construct the Query
//object
//if its just "operator error"
//send them a nice error HTML
%>
<p>Error While parsing query: <%=e.getMessage()%></p>
<%
error = true; //don't bother with the rest of
//the page
}
}
%>
<%
if (error == false && searcher != null) { // if we've had no errors
// searcher != null was to handle
// a weird compilation bug
thispage = maxpage; // default last element to maxpage
hits = searcher.search(query); // run the query
if (hits.length() == 0) { // if we got no results tell the user
%>
<p> I'm sorry I couldn't find what you were looking for. </p>
<%
error = true; // don't bother with the rest of the
// page
}
}
if (error == false && searcher != null) {
%>
<table>
<tr>
<td>Document</td>
<td>Summary</td>
</tr>
<%
if ((startindex + maxpage) > hits.length()) {
thispage = hits.length() - startindex; // set the max index to maxpage or last
} // actual search result whichever is less
for (int i = startindex; i < (thispage + startindex); i++) { // for each element
%>
<tr>
<%
Document doc = hits.doc(i); //get the next document
String doctitle = doc.get("title"); //get its title
String url = doc.get("url"); //get its url field
if (doctitle.equals("")) //use the url if it has no title
doctitle = url;
//then output!
%>
<td><a href="<%=url%>"><%=doctitle%></a></td>
<td><%=doc.get("summary")%></td>
</tr>
<%
}
%>
<% if ( (startindex + maxpage) < hits.length()) { //if there are more results...display
//the more link
String moreurl="results.jsp?query=" + queryString + //construct the "more" link
"&maxresults=" + maxpage +
"&startat=" + (startindex + maxpage);
%>
<tr>
<td></td><td><a href="<%=moreurl%>">More Results>></a></td>
</tr>
<%
}
%>
</table>
<% } //then include our footer.
%>
<%@include file="footer.jsp"%>

View File

@ -58,7 +58,7 @@ import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.document.Document;
import org.apache.lucene.FileDocument;
import org.apache.lucene.demo.FileDocument;
import java.io.File;
import java.util.Date;

View File

@ -59,7 +59,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.document.Document;
import org.apache.lucene.FileDocument;
import org.apache.lucene.demo.FileDocument;
import java.io.File;
import java.util.Date;

65
xdocs/demo.xml Normal file
View File

@ -0,0 +1,65 @@
<?xml version="1.0"?>
<document>
<properties>
<author email="acoliver@apache.org">Andrew C. Oliver</author>
<title>Jakarta Lucene - Building and Installing the Basic Demo</title>
</properties>
<body>
<section name="About this Document">
<p>
This document is intended as a "getting started" guide to using and running the
Jakarta Lucene demos. It walks you through some basic installation and configuration.
</p>
</section>
<section name="About the Demos">
<p>
The Lucene Demo code is a set of command line example applications that demonstrate various
functionality of Lucene and how one should go about adding it to their
applications.
</p>
</section>
<section name="Setting your classpath">
<p>
First, extract the latest Lucene distribution.
</p>
<p>
You should see the Jakarta Lucene jar file in the directory you created
when you extracted the archive. It should be named something like
<b>lucene-{version}.jar</b>.
</p>
<p>
You should also see a file called called <b>lucene-demos-{version}.jar</b>.
Put both of these files in your Java CLASSPATH.
</p>
</section>
<section name="Indexing Files">
<p>
Once you've gotten this far you're probably itching to go. Let's <b> build an index!</b>
Assuming you've set your classpath correctly, just type
"java org.apache.lucene.demo.IndexFiles {full-path-to-lucene}/src". This will produce
a subdirectory called "index" which will contain an index of all of the Lucene
sourcecode.
</p>
<p>
<b> To search the index </b> type "java org.apache.lucene.demo.SearchFiles". You'll be prompted
for a query. Type in a swear word and press the enter key. You'll see that the Lucene
developers are very well mannered and get no results. Now try entering the word "vector".
That should return a whole bunch of documents. The results will page at every tenth
result and ask you whether you want more results.
</p>
</section>
<section name="About the code...">
<p>
<a href="demo2.html">read on&gt;&gt;&gt;</a>
</p>
</section>
</body>
</document>

92
xdocs/demo2.xml Normal file
View File

@ -0,0 +1,92 @@
<?xml version="1.0"?>
<document>
<properties>
<author email="acoliver@apache.org">Andrew C. Oliver</author>
<title>Jakarta Lucene - Basic Demo Sources Walkthrough</title>
</properties>
<body>
<section name="About the Code">
<p>
In this section we walk through the sources behind the basic Lucene demo such as where to
find it, its parts and their function. This section is intended for Java developers
wishing to understand how to use Jakarta Lucene in their applications.
</p>
</section>
<section name="Location of the source">
<p>
Relative to the directory created when you extracted Lucene or retreived it from CVS, you
should see a directory called "src" which in turn contains a directory called "demo".
This is the root for all of the Lucene demos. Under this directory is org/apache/lucene/demo,
this is where all the Java sources live.
</p>
<p>
Within this directory you should see the IndexFiles class we executed earlier. Bring that
up in vi or your alternative text editor and lets take a look at it.
</p>
</section>
<section name="IndexFiles">
<p>
As we discussed in the previous walkthrough, the IndexFiles class creates a Lucene Index.
Lets take a look at how it does this.
</p>
<p>
The first substantial thing the main function does is instantiate an instance
of IndexWriter. It passes a string called "index" and a new instance of a class called
"StandardAnalyzer". The "index" string is the name of the directory that all index information
should be stored in. Because we're not passing any path information, one must assume this
will be created as a subdirectory of the current directory (if does not already exist). On
some platforms this may actually result in it being created in other directories (such as
the user's home directory).
</p>
<p>
The <b>IndexWriter</b> is the main class responsible for creating indicies. To use it you
must instantiate it with a path that it can write the index into, if this path does not
exist it will create it, otherwise it will refresh the index living at that path. You
must a also pass an instance of <b>org.apache.analysis.Analyzer</b>.
</p>
<p>
The <b>Analyzer</b>, in this case, the <b>Stop Analyzer</b> is little more than a standard Java
Tokenizer, converting all strings to lowercase and filtering out useless words from the index.
By useless words I mean common language words such as articles (a,an,the) and other words that
would be useless for searching. It should be noted that there are different rules for every
language, and you should use the proper analyzer for each. Lucene currently provides Analyzers
for English and German.
</p>
<p>
Looking down further in the file, you should see the indexDocs() code. This recursive function
simply crawls the directories and uses FileDocument to create Document objects. The Document
is simply a data object to represent the content in the file as well as its creation time and
location. These instances are added to the indexWriter. Take a look inside FileDocument. Its
not particularly complicated, it just adds fields to the Document.
</p>
<p>
As you can see there isn't much to creating an index. The devil is in the details. You may also
wish to examine the other samples in this directory, particularly the IndexHTML class. It is
a bit more complex but builds upon this example.
</p>
</section>
<section name="Searching Files">
<p>
The SearchFiles class is quite simple. It primarily collaborates with an IndexSearcher, StandardAnalyzer
(which is used in the IndexFiles class as well) and a QueryParser. The query parser is constructed
with an analyzer used to interperate your query in the same way the Index was interperated: finding
the end of words and removing useless words like 'a', 'an' and 'the'. The Query object contains the
results from the QueryParser which is passed to the searcher. The searcher results are returned in
a collection of Documents called "Hits" which is then iterated through and displayed to the user.
</p>
</section>
<section name="The Web example...">
<p>
<a href="demo3.html">read on&gt;&gt;&gt;</a>
</p>
</section>
</body>
</document>

86
xdocs/demo3.xml Normal file
View File

@ -0,0 +1,86 @@
<?xml version="1.0"?>
<document>
<properties>
<author email="acoliver@apache.org">Andrew C. Oliver</author>
<title>Jakarta Lucene - Building and Installing the Basic Demo</title>
</properties>
<body>
<section name="About this Document">
<p>
This document is intended as a "getting started" guide to installing and running the
Jakarta Lucene web application demo. This guide assumes that you have read the
information in the previous two examples or already know it anyhow. We'll use
Tomcat 4.0.1 as our reference web container. These demos should work with nearly
any container, but it is up to you to adapt them appropriately.
</p>
</section>
<section name="About the Demos">
<p>
The Lucene Web Application demo is a template web application intended for deployment
on Tomcat or a similar web container. It's NOT designed as a "best practices"
implementation by ANY means. Its more of a "hello world" type Lucene Web App.
The purpose of this application is to demonstrate Lucene. With that being said,
it should be relatively simple to create a small searchable website in Tomcat or
a similar application server.
</p>
</section>
<section name="Indexing Files">
<p>
Once you've gotten this far you're probably itching to go.
Let's start by creating the index you'll need for the web examples.
Since you've already set your classpath in the previous examples,
all you need to do is type
<b> "java org.apache.lucene.demo.IndexHTML -create -index {index-dir} .."</b>.
You'll need to do this from your {tomcat}/webapps/luceneweb directory. {index-dir}
should be a directory that Tomcat has permission to read and write, but is
outside of a web accessible context. By default the webapp is configured
to look in <b>/opt/lucene/index</b> for this index.
</p>
</section>
<section name="Deploying the Demos">
<p>Located in your distribution directory you should see
a war file called luceneweb.war. Copy this to your
{tomcat-home}/webapps directory. You may need to restart
Tomcat. </p>
</section>
<section name="Configuration">
<p>
From your Tomcat directory look in the webapps/luceneweb subdirectory. If its not
present, try browsing to "http://localhost:8080/luceneweb" then look again.
Edit a file called configuration.jsp. Ensure that the indexLocation is equal to the
location you used for your index. You may also customize the appTitle and appFooter
strings as you see fit. Once you have finsihed altering the configuration you should
restart Tomcat. You may also wish to update the war file by typing
<b>jar -uf luceneweb.war configuration.jsp</b> from the luceneweb subdirectory.
(The u option is not available in all versions of jar. In this case recreate the war file).
</p>
</section>
<section name="Running the Demos">
<p>Now you're ready to roll. In your browser set the url to "http://localhost:8080/luceneweb"
enter "test" and the number of items per page and press search.</p>
<p>You should now be looking either at a number of results (provided you didn't erase the
Tomcat examples) or nothing. Try other search terms. Depending on the number of items
per page you set and results returned, there may be a link at the bottom that says "more results>>",
clicking it goes to subsequent pages. If you get an error regarding opening the index, then you
probably set the path in "configuration" incorrectly or Tomcat doesn't have permissions to the
index (or you skipped the step of creating it).</p>
</section>
<section name="About the code...">
<p>
If you want to know more about how this web app works or how to customize it then
<a href="demo4.html">read on&gt;&gt;&gt;</a>.
</p>
</section>
</body>
</document>

134
xdocs/demo4.xml Normal file
View File

@ -0,0 +1,134 @@
<?xml version="1.0"?>
<document>
<properties>
<author email="acoliver@apache.org">Andrew C. Oliver</author>
<title>Jakarta Lucene - Basic Demo Sources Walkthrough</title>
</properties>
<body>
<section name="About the Code">
<p>
In this section we walk through the sources behind the basic Lucene Web Application demo.
Where to find it, its parts, and their function. This section is intended for Java developers
wishing to understand how to use Jakarta Lucene in their applications or for those involved
in deploying web applications based on Lucene.
</p>
</section>
<section name="Location of the source (developers/deployers)">
<p>
Relative the directory created when you extracted Lucene or retreived it from CVS, you
should see a directory called "src" which in turn contains a directory called "jsp".
This is the root for all of the Lucene web demo.
</p>
<p>
Within this directory you should see the index.jsp class. Bring this up in vi or your
editor of choice.
</p>
</section>
<section name="index.jsp (developers/deployers)">
<p>
This jsp page is pretty boring by itself. All it does is include a header, display a form and
include a footer. If you look at the form, it has two fields: query (where you enter your
search criteria) and maxresults where you specify the number of results per page. If you look
at the form tag, you'll notice it uses the get method as opposed to the post. While this is
considered deprecated functionality by the latest w3c specs, its unlikely to go away due to the
usefulness of being able to bookmark things like searches. By the structure of this JSP it should
be easy to customize it without even editing this particular file. You could simply change the
header and footer. Let's look at the header.jsp (located in the same directory) next.
</p>
</section>
<section name="header.jsp (developers/deployers)">
<p>
The header is also very simple by itself. The only thing it does is include the configuration.jsp
(which you looked at in the last section of this guide) and set the title and a brief header. This
would be a good place to put your own custom HTML to "pretty" things up a bit. We won't cover the
footer because all it does is display the footer and close your tags. Let's look at the results.jsp,
the meat of this application next.
</p>
</section>
<section name="results.jsp (developers)">
<p>
The results.jsp had a lot more functionality. Much of it is for paging the search results we'll not
cover this as its commented well enough. It does not peform any optimizations such as caching results,
etc. as that would make this a more complex example. The first thing in this page is the actual imports
for the Lucene classes and Lucene demo classes. These classes are loaded from the jars included in the
WEB-INF/lib directory in the final war file.
</p>
<p>
You'll notice that this file includes the same header and footer as the "index.jsp". From there the jsp
constructs an IndexSearcher with the "indexLocation" that was specified in the "configuration.jsp". If there
is an error of any kind in opening the index, it is diplayed ot the user and a boolean flag is set to tell
the rest of the sections of the jsp not to continue.
</p>
<p>
From there, this jsp attempts to get the search criteria, the start index (used for paging) and the maximum
number of results per page. If the maximum results per page is not set or not valid then it and the
start index are set to default values. If only the start index is invalid it is set to a default value. If
the criteria isn't provided then a servlet error is thrown (it is assumed that this is the result of url tampering
or some form of browser malfunction).
</p>
<p>
The jsp moves on to construct a StandardAnalyzer just as in the simple demo, to analyze the search critieria, it
is passed to the QueryParser along with the criteria to construct a Query object. You'll also notice the
string literal "contents" included. This is to specify the search should include the the contents and not
the title, url or some other field in the indexed documents. If there is any error in constructing a Query
object an error is displayed to the user.
</p>
<p>
In the next section of the jsp the IndexSearcher is asked to search given the query object. the results are
returned in a collection called "hits". If the length property of the hits collection is 0 then an error
is displayed to the user and the error flag is set.
</p>
<p>
Finally the jsp iterates through the hits collection and displayed properties of the "Document" objects we talked
about in the first walkthrough. These objects contain "known" fields specific to their indexer (in this case
"IndexHTML" constructs a document with "url", "title" and "contents"). You'll notice that these results are paged
but the search is repeated every time. This is an area where optimization could improve performance for large
result sets.
</p>
</section>
<section name="More sources (developers)">
<p>
There are additional sources used by the web app that were not specifically covered by either walkthrough. For
example the HTML parser, the IndexHTML class and HTMLDocument class. These are very similar to the classes
covered in the first example, however they have properties sepecific to parsing and indexing HTML. This is
beyond our scope; however, by now you should feel like you're "getting started" with Lucene.
</p>
</section>
<section name="Where to go from here? (Everyone!)">
<p>
There are a number of things this demo doesn't do or doesn't do quite right. For instance, you may
have noticed that documents in the root context are unreachable (unless you reconfigure Tomcat to
support that context or redirect to it), anywhere where the directory doesn't quite match the context mapping,
you'll have a broken link in your results. If you want to index non-local files or have some other
needs this isn't supported, plus there may be security issues with running the indexing application from
your webapps directory. There are a number of things left for you the implementor or developer to do.
</p>
<p>
In time some of these things may be added to Lucene as features (if you've got a good idea we'd love to hear it!),
but for now: this is where you begin and the search engine/indexer ends. Lastly, one would assume you'd
want to follow the above advice and customize the application to look a little more fancy than black on
white with "Lucene Template" at the top. We'll see you on the Lucene Users' or Developers' mailing lists!
</p>
</section>
<section name="When to contact the Author">
<p>
Please resist the urge to contact the authors of this document (without bribes of fame and fortune attached). First
contact the <a href="http://jakarta.apache.org/site/mail.html">mailing lists</a>. That being said feedback,
and modifications to this document and samples are ever so greatly appreciatedThey are just best sent to the
lists so that everyone can share in them. Certainly you'll get the most help there as well.
Thanks for understanding.
</p>
</section>
</body>
</document>

View File

@ -15,6 +15,7 @@
<menu name="Documentation">
<item name="FAQ" href="http://www.lucene.com/cgi-bin/faq/faqmanager.cgi" target="_blank"/>
<item name="Getting Started" href="/gettingstarted.html"/>
<item name="Articles" href="/resources.html"/>
<item name="Javadoc" href="/api/index.html"/>
</menu>