2002-07-14 15:05:12 -04:00
|
|
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
|
|
|
|
|
|
|
<!-- Content Stylesheet for Site -->
|
|
|
|
|
|
|
|
|
|
|
|
<!-- start the processing -->
|
|
|
|
<!-- ====================================================================== -->
|
2002-12-12 01:23:48 -05:00
|
|
|
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
2002-07-14 15:05:12 -04:00
|
|
|
<!-- Main Page Section -->
|
|
|
|
<!-- ====================================================================== -->
|
|
|
|
<html>
|
|
|
|
<head>
|
|
|
|
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
|
|
|
|
|
|
|
<meta name="author" value="Otis Gospodentic">
|
|
|
|
<meta name="email" value="$au.getAttributeValue("email")">
|
|
|
|
|
|
|
|
|
|
|
|
|
2002-12-24 16:20:23 -05:00
|
|
|
|
2002-07-14 15:05:12 -04:00
|
|
|
<title>Jakarta Lucene - Lucene Sandbox</title>
|
|
|
|
</head>
|
|
|
|
|
|
|
|
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
|
|
|
<table border="0" width="100%" cellspacing="0">
|
|
|
|
<!-- TOP IMAGE -->
|
|
|
|
<tr>
|
|
|
|
<td align="left">
|
|
|
|
<a href="http://jakarta.apache.org"><img src="http://jakarta.apache.org/images/jakarta-logo.gif" border="0"/></a>
|
|
|
|
</td>
|
|
|
|
<td align="right">
|
|
|
|
<a href="http://jakarta.apache.org/lucene/"><img src="../images/lucene_green_300.gif" alt="Jakarta Lucene" border="0"/></a>
|
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
</table>
|
|
|
|
<table border="0" width="100%" cellspacing="4">
|
|
|
|
<tr><td colspan="2">
|
|
|
|
<hr noshade="" size="1"/>
|
|
|
|
</td></tr>
|
|
|
|
|
|
|
|
<tr>
|
|
|
|
<!-- LEFT SIDE NAVIGATION -->
|
|
|
|
<td width="20%" valign="top" nowrap="true">
|
|
|
|
<p><strong>About</strong></p>
|
|
|
|
<ul>
|
|
|
|
<li> <a href="../index.html">Overview</a>
|
|
|
|
</li>
|
|
|
|
<li> <a href="../powered.html">Powered by Lucene</a>
|
|
|
|
</li>
|
|
|
|
<li> <a href="../whoweare.html">Who We Are</a>
|
|
|
|
</li>
|
|
|
|
<li> <a href="http://jakarta.apache.org/site/mail.html">Mailing Lists</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
<p><strong>Resources</strong></p>
|
|
|
|
<ul>
|
|
|
|
<li> <a href="http://lucene.sourceforge.net/cgi-bin/faq/faqmanager.cgi">FAQ (Official)</a>
|
|
|
|
</li>
|
2002-11-29 16:23:47 -05:00
|
|
|
<li> <a href="http://www.jguru.com/faq/Lucene">jGuru FAQ</a>
|
2002-07-14 15:05:12 -04:00
|
|
|
</li>
|
|
|
|
<li> <a href="../gettingstarted.html">Getting Started</a>
|
|
|
|
</li>
|
|
|
|
<li> <a href="../queryparsersyntax.html">Query Syntax</a>
|
2002-10-29 23:14:11 -05:00
|
|
|
</li>
|
|
|
|
<li> <a href="../fileformats.html">File Formats</a>
|
2002-07-14 15:05:12 -04:00
|
|
|
</li>
|
|
|
|
<li> <a href="../api/index.html">Javadoc</a>
|
|
|
|
</li>
|
|
|
|
<li> <a href="../contributions.html">Contributions</a>
|
|
|
|
</li>
|
|
|
|
<li> <a href="../resources.html">Articles, etc.</a>
|
2002-12-04 00:56:33 -05:00
|
|
|
</li>
|
|
|
|
<li> <a href="../benchmarks.html">Benchmarks</a>
|
2002-11-29 16:23:47 -05:00
|
|
|
</li>
|
2003-01-04 12:19:17 -05:00
|
|
|
<li> <a href="http://nagoya.apache.org/bugzilla/buglist.cgi?bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&email1=&emailtype1=substring&emailassigned_to1=1&email2=&emailtype2=substring&emailreporter2=1&bugidtype=include&bug_id=&changedin=&votes=&chfieldfrom=&chfieldto=Now&chfieldvalue=&product=Lucene&short_desc=%5BPATCH%5D&short_desc_type=allwordssubstr&long_desc=&long_desc_type=allwordssubstr&bug_file_loc=&bug_file_loc_type=allwordssubstr&keywords=&keywords_type=anywords&field0-0-0=noop&type0-0-0=noop&value0-0-0=&cmdtype=doit&order=%27Importance%27">Patches</a>
|
2002-11-07 15:18:09 -05:00
|
|
|
</li>
|
|
|
|
<li> <a href="http://jakarta.apache.org/site/bugs.html">Bugs</a>
|
|
|
|
</li>
|
|
|
|
<li> <a href="http://nagoya.apache.org/bugzilla/buglist.cgi?bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&email1=&emailtype1=substring&emailassigned_to1=1&email2=&emailtype2=substring&emailreporter2=1&bugidtype=include&bug_id=&changedin=&votes=&chfieldfrom=&chfieldto=Now&chfieldvalue=&product=Lucene&short_desc=&short_desc_type=allwordssubstr&long_desc=&long_desc_type=allwordssubstr&bug_file_loc=&bug_file_loc_type=allwordssubstr&keywords=&keywords_type=anywords&field0-0-0=noop&type0-0-0=noop&value0-0-0=&cmdtype=doit&order=%27Importance%27">Lucene Bugs</a>
|
2002-11-29 16:23:47 -05:00
|
|
|
</li>
|
|
|
|
<li> <a href="http://nagoya.apache.org/eyebrowse/SummarizeList?listId=30">Lucene-user</a>
|
|
|
|
</li>
|
|
|
|
<li> <a href="http://nagoya.apache.org/eyebrowse/SummarizeList?listId=29">Lucene-dev</a>
|
2002-12-04 00:56:33 -05:00
|
|
|
</li>
|
|
|
|
<li> <a href="../lucene-sandbox/">Lucene Sandbox</a>
|
2002-07-14 15:05:12 -04:00
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
<p><strong>Download</strong></p>
|
|
|
|
<ul>
|
|
|
|
<li> <a href="http://jakarta.apache.org/site/binindex.html">Binaries</a>
|
|
|
|
</li>
|
|
|
|
<li> <a href="http://jakarta.apache.org/site/sourceindex.html">Source Code</a>
|
|
|
|
</li>
|
|
|
|
<li> <a href="http://jakarta.apache.org/site/cvsindex.html">CVS Repositories</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
<p><strong>Jakarta</strong></p>
|
|
|
|
<ul>
|
|
|
|
<li> <a href="http://jakarta.apache.org/site/getinvolved.html">Get Involved</a>
|
|
|
|
</li>
|
|
|
|
<li> <a href="http://jakarta.apache.org/site/acknowledgements.html">Acknowledgements</a>
|
|
|
|
</li>
|
|
|
|
<li> <a href="http://jakarta.apache.org/site/contact.html">Contact</a>
|
|
|
|
</li>
|
|
|
|
<li> <a href="http://jakarta.apache.org/site/legal.html">Legal</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</td>
|
|
|
|
<td width="80%" align="left" valign="top">
|
|
|
|
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
|
|
|
<tr><td bgcolor="#525D76">
|
|
|
|
<font color="#ffffff" face="arial,helvetica,sanserif">
|
|
|
|
<a name="Lucene Sandbox"><strong>Lucene Sandbox</strong></a>
|
|
|
|
</font>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td>
|
|
|
|
<blockquote>
|
2002-10-29 23:14:11 -05:00
|
|
|
<p>
|
|
|
|
Lucene project also contains a workspace, Lucene Sandbox, that is open to all Lucene committers, as well
|
|
|
|
as a few other developers. The purpose of the Sandbox is to host various third party contributions,
|
|
|
|
and to serve as a place to try out new ideas and prepare them for inclusion into the core Lucene
|
|
|
|
distribution.<br />
|
|
|
|
Users are free to experiment with the components developed in the Sandbox, but Sandbox components will
|
|
|
|
not necessarily be maintained, particularly in their current state.
|
|
|
|
</p>
|
|
|
|
<p>
|
|
|
|
You can access the Lucene Sandbox CVS repository at
|
|
|
|
<a href="http://cvs.apache.org/viewcvs/jakarta-lucene-sandbox/">http://cvs.apache.org/viewcvs/jakarta-lucene-sandbox/</a>.
|
|
|
|
</p>
|
2002-09-17 00:21:06 -04:00
|
|
|
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
2002-10-29 23:14:11 -05:00
|
|
|
<tr><td bgcolor="#828DA6">
|
|
|
|
<font color="#ffffff" face="arial,helvetica,sanserif">
|
|
|
|
<a name="LARM"><strong>LARM</strong></a>
|
|
|
|
</font>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td>
|
|
|
|
<blockquote>
|
|
|
|
<p>
|
|
|
|
LARM is a web crawler optimized for large intranets with up to a couple of hundred hosts.
|
|
|
|
</p>
|
|
|
|
<a href="larm/overview.html">Technical Overview</a>
|
|
|
|
</blockquote>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td><br/></td></tr>
|
2002-12-24 16:20:23 -05:00
|
|
|
</table>
|
|
|
|
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
|
|
|
<tr><td bgcolor="#828DA6">
|
|
|
|
<font color="#ffffff" face="arial,helvetica,sanserif">
|
|
|
|
<a name="Snowball Stemmers for Lucene"><strong>Snowball Stemmers for Lucene</strong></a>
|
|
|
|
</font>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td>
|
|
|
|
<blockquote>
|
|
|
|
<p>
|
|
|
|
This project provides pre-compiled versions of the Snowball stemmers
|
|
|
|
for Lucene.
|
|
|
|
</p>
|
|
|
|
<p>
|
2003-05-11 21:49:47 -04:00
|
|
|
More information can be found
|
2002-12-24 16:20:23 -05:00
|
|
|
<a href="http://jakarta.apache.org/lucene/docs/lucene-sandbox/snowball/">here</a>.
|
2003-01-28 17:54:23 -05:00
|
|
|
</p>
|
|
|
|
<p>
|
|
|
|
<a href="http://snowball.tartarus.org/">Background information on Snowball</a>,
|
|
|
|
which is a language for stemmers developed by Martin Porter.
|
|
|
|
</p>
|
|
|
|
</blockquote>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td><br/></td></tr>
|
|
|
|
</table>
|
|
|
|
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
|
|
|
<tr><td bgcolor="#828DA6">
|
|
|
|
<font color="#ffffff" face="arial,helvetica,sanserif">
|
|
|
|
<a name="Ant"><strong>Ant</strong></a>
|
|
|
|
</font>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td>
|
|
|
|
<blockquote>
|
|
|
|
<p>
|
|
|
|
The Ant project is a useful Ant task that creates a Lucene index out of an Ant fileset. It also
|
|
|
|
contains an example HTML parser that uses JTidy.
|
|
|
|
</p>
|
|
|
|
<p>
|
2003-05-11 21:49:47 -04:00
|
|
|
<a href="http://cvs.apache.org/viewcvs/jakarta-lucene-sandbox/contributions/ant/">The
|
2003-01-28 17:54:23 -05:00
|
|
|
CVS repository for the Ant contribution.</a>
|
|
|
|
</p>
|
|
|
|
</blockquote>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td><br/></td></tr>
|
|
|
|
</table>
|
|
|
|
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
|
|
|
<tr><td bgcolor="#828DA6">
|
|
|
|
<font color="#ffffff" face="arial,helvetica,sanserif">
|
|
|
|
<a name="SearchBean"><strong>SearchBean</strong></a>
|
|
|
|
</font>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td>
|
|
|
|
<blockquote>
|
|
|
|
<p>
|
|
|
|
SearchBean is a UI component that can be used to browse through the results of a Lucene search.
|
|
|
|
The SearchBean searches the index for a given query string, retrieves the hits, and then brings
|
|
|
|
them into the HitsIterator class, which can be used for paging and sorting through search results.
|
|
|
|
|
|
|
|
</p>
|
|
|
|
<p>
|
2003-05-11 21:49:47 -04:00
|
|
|
<a href="http://cvs.apache.org/viewcvs/jakarta-lucene-sandbox/contributions/searchBean/">The
|
2003-01-28 17:54:23 -05:00
|
|
|
CVS repository for the SearchBean contribution.</a>
|
|
|
|
</p>
|
|
|
|
<p>
|
|
|
|
<a href="http://snowball.tartarus.org/">Background information on Snowball</a>,
|
|
|
|
which is a language for stemmers developed by Martin Porter.
|
|
|
|
</p>
|
|
|
|
</blockquote>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td><br/></td></tr>
|
|
|
|
</table>
|
|
|
|
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
|
|
|
<tr><td bgcolor="#828DA6">
|
|
|
|
<font color="#ffffff" face="arial,helvetica,sanserif">
|
2003-05-11 21:49:47 -04:00
|
|
|
<a name="WordNet/Synonyms"><strong>WordNet/Synonyms</strong></a>
|
2003-01-28 17:54:23 -05:00
|
|
|
</font>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td>
|
|
|
|
<blockquote>
|
|
|
|
<p>
|
2003-05-11 21:49:47 -04:00
|
|
|
The Lucene WordNet code consists of a single class which parses a prolog file
|
|
|
|
from the WordNet site that contains a list of English words and synonyms.
|
|
|
|
The class builds a Lucene index from the synonyms file. Your querying code could
|
|
|
|
hit this index to build up a set of synonyms for the terms in the
|
|
|
|
search query.
|
2003-01-28 17:54:23 -05:00
|
|
|
</p>
|
|
|
|
<p>
|
2003-05-11 21:49:47 -04:00
|
|
|
More information on the <a href="http://www.tropo.com/techno/java/lucene/wordnet.html">Lucene WordNet package</a>.
|
|
|
|
<a href="http://www.cogsci.princeton.edu/~wn/">WordNet</a> is an online database of English language words that contains
|
|
|
|
synonyms, definitions, and various relationships between synonym sets.
|
2003-01-31 14:42:30 -05:00
|
|
|
</p>
|
|
|
|
<p>
|
2003-05-11 21:49:47 -04:00
|
|
|
<a href="http://cvs.apache.org/viewcvs.cgi/jakarta-lucene-sandbox/contributions/WordNet/">
|
|
|
|
CVS for the WordNet module.</a>
|
2003-01-28 17:54:23 -05:00
|
|
|
</p>
|
|
|
|
</blockquote>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td><br/></td></tr>
|
|
|
|
</table>
|
|
|
|
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
|
|
|
<tr><td bgcolor="#828DA6">
|
|
|
|
<font color="#ffffff" face="arial,helvetica,sanserif">
|
2003-05-11 21:49:47 -04:00
|
|
|
<a name="SAX/DOM XML Indexing demo"><strong>SAX/DOM XML Indexing demo</strong></a>
|
2003-01-28 17:54:23 -05:00
|
|
|
</font>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td>
|
|
|
|
<blockquote>
|
|
|
|
<p>
|
2003-05-11 21:49:47 -04:00
|
|
|
This contribution is some sample code that demonstrates adding simple XML documents into the index. It creates
|
|
|
|
a new Document object for each file, and then populates the Document with a Field for each XML element, recursively.
|
|
|
|
There are examples included for both SAX and DOM.
|
2003-01-31 14:42:30 -05:00
|
|
|
</p>
|
|
|
|
<p>
|
2003-05-11 21:49:47 -04:00
|
|
|
|
|
|
|
<a href="http://cvs.apache.org/viewcvs.cgi/jakarta-lucene-sandbox/contributions/XML-Indexing-Demo/">
|
|
|
|
CVS for the XML Indexing Demo.</a>
|
|
|
|
</p>
|
|
|
|
</blockquote>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td><br/></td></tr>
|
|
|
|
</table>
|
|
|
|
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
|
|
|
<tr><td bgcolor="#828DA6">
|
|
|
|
<font color="#ffffff" face="arial,helvetica,sanserif">
|
|
|
|
<a name="Javascript Query Constructor"><strong>Javascript Query Constructor</strong></a>
|
|
|
|
</font>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td>
|
|
|
|
<blockquote>
|
|
|
|
<p>
|
|
|
|
Javascript library to support client-side query-building. Provides support for a user interface similar to
|
|
|
|
<a href="http://www.google.com.sg/advanced_search">Google's Advanced Search</a>.
|
2003-01-31 14:42:30 -05:00
|
|
|
</p>
|
|
|
|
<p>
|
2003-05-11 21:49:47 -04:00
|
|
|
|
|
|
|
<a href="http://cvs.apache.org/viewcvs.cgi/jakarta-lucene-sandbox/contributions/javascript/queryConstructor/">
|
|
|
|
CVS for the files.</a>
|
2003-01-31 14:42:30 -05:00
|
|
|
</p>
|
|
|
|
</blockquote>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td><br/></td></tr>
|
|
|
|
</table>
|
|
|
|
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
|
|
|
<tr><td bgcolor="#828DA6">
|
|
|
|
<font color="#ffffff" face="arial,helvetica,sanserif">
|
2003-05-11 21:49:47 -04:00
|
|
|
<a name="Javascript Query Validator"><strong>Javascript Query Validator</strong></a>
|
2003-01-31 14:42:30 -05:00
|
|
|
</font>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td>
|
|
|
|
<blockquote>
|
|
|
|
<p>
|
2003-05-11 21:49:47 -04:00
|
|
|
Javascript library to support client-side query validation. Lucene doesn't like malformed queries and tends to
|
|
|
|
throw ParseException, which are often difficult to interpret and pass on to the user. This library hopes to
|
|
|
|
alleviate that problem.
|
2003-01-31 14:42:30 -05:00
|
|
|
</p>
|
|
|
|
<p>
|
2003-01-28 17:54:23 -05:00
|
|
|
|
2003-05-11 21:49:47 -04:00
|
|
|
<a href="http://cvs.apache.org/viewcvs.cgi/jakarta-lucene-sandbox/contributions/javascript/queryValidator/">
|
|
|
|
CVS for files.</a>
|
2003-01-31 14:42:30 -05:00
|
|
|
</p>
|
|
|
|
</blockquote>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td><br/></td></tr>
|
|
|
|
</table>
|
|
|
|
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
|
|
|
<tr><td bgcolor="#828DA6">
|
|
|
|
<font color="#ffffff" face="arial,helvetica,sanserif">
|
|
|
|
<a name="High Frequency Terms"><strong>High Frequency Terms</strong></a>
|
|
|
|
</font>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td>
|
|
|
|
<blockquote>
|
|
|
|
<p>
|
|
|
|
The miscellaneous package is for classes that don't fit anywhere else. The only class in it right now determines
|
|
|
|
what terms occur the most inside a Lucene index. This could be useful for analyzing which terms may need to go
|
|
|
|
into a custom stop word list for better search results.
|
2003-01-28 17:54:23 -05:00
|
|
|
</p>
|
|
|
|
<p>
|
2003-01-31 14:42:30 -05:00
|
|
|
|
2003-05-11 21:49:47 -04:00
|
|
|
<a href="http://cvs.apache.org/viewcvs.cgi/jakarta-lucene-sandbox/contributions/miscellaneous/src/java/org/apache/lucene/misc/">
|
2003-01-31 14:42:30 -05:00
|
|
|
CVS for miscellaneous classes.</a>
|
2002-12-24 16:20:23 -05:00
|
|
|
</p>
|
|
|
|
</blockquote>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td><br/></td></tr>
|
2002-09-17 00:21:06 -04:00
|
|
|
</table>
|
2002-07-14 15:05:12 -04:00
|
|
|
</blockquote>
|
|
|
|
</p>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td><br/></td></tr>
|
|
|
|
</table>
|
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
|
|
|
|
<!-- FOOTER -->
|
|
|
|
<tr><td colspan="2">
|
|
|
|
<hr noshade="" size="1"/>
|
|
|
|
</td></tr>
|
|
|
|
<tr><td colspan="2">
|
|
|
|
<div align="center"><font color="#525D76" size="-1"><em>
|
2003-03-06 14:30:52 -05:00
|
|
|
Copyright © 1999-2003, Apache Software Foundation
|
2002-07-14 15:05:12 -04:00
|
|
|
</em></font></div>
|
|
|
|
</td></tr>
|
|
|
|
</table>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
<!-- end the processing -->
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|