Updated the website to new Forrest based site, see Issue 707, part one of commits
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@479465 13f79535-47bb-0310-9956-ffa450edef68
|
@ -1,726 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
|
||||
<!-- start the processing -->
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
<meta name="author" value="Kelvin Tan">
|
||||
<meta name="email" value="kelvint@apache.org">
|
||||
|
||||
|
||||
|
||||
|
||||
<title>Apache Lucene - Resources - Performance Benchmarks</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
<a href="http://lucene.apache.org/"><img src="./images/lucene_green_300.gif" alt="Apache Lucene" border="0"/></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
<p><strong>About</strong></p>
|
||||
<ul>
|
||||
<li> <a href="./index.html">Overview</a>
|
||||
</li>
|
||||
<li> <a href="./features.html">Features</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/PoweredBy">Powered by Lucene</a>
|
||||
</li>
|
||||
<li> <a href="./whoweare.html">Who We Are</a>
|
||||
</li>
|
||||
<li> <a href="./mailinglists.html">Mailing Lists</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Resources</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene">Wiki</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ">FAQ</a>
|
||||
</li>
|
||||
<li> <a href="./gettingstarted.html">Getting Started</a>
|
||||
</li>
|
||||
<li> <a href="./queryparsersyntax.html">Query Syntax</a>
|
||||
</li>
|
||||
<li> <a href="./fileformats.html">File Formats</a>
|
||||
</li>
|
||||
<li> <a href="./scoring.html">Scoring</a>
|
||||
</li>
|
||||
<li> <a href="./api/index.html">Javadoc</a>
|
||||
</li>
|
||||
<li> <a href="./contributions.html">Contributions</a>
|
||||
</li>
|
||||
<li> <a href="./benchmarks.html">Benchmarks</a>
|
||||
</li>
|
||||
<li> <a href="http://issues.apache.org/jira/browse/LUCENE">Issue Tracker</a>
|
||||
</li>
|
||||
<li> <a href="./lucene-sandbox/">Lucene Sandbox</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Download</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">Releases</a>
|
||||
</li>
|
||||
<li> <a href="http://svn.apache.org/viewcvs.cgi/lucene/java/">Source Repository</a>
|
||||
</li>
|
||||
</ul>
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Performance Benchmarks"><strong>Performance Benchmarks</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
The purpose of these user-submitted performance figures is to
|
||||
give current and potential users of Lucene a sense
|
||||
of how well Lucene scales. If the requirements for an upcoming
|
||||
project is similar to an existing benchmark, you
|
||||
will also have something to work with when designing the system
|
||||
architecture for the application.
|
||||
</p>
|
||||
<p>
|
||||
If you've conducted performance tests with Lucene, we'd
|
||||
appreciate if you can submit these figures for display
|
||||
on this page. Post these figures to the lucene-user mailing list
|
||||
using this
|
||||
<a href="benchmarktemplate.xml">template</a>.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Benchmark Variables"><strong>Benchmark Variables</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
<ul>
|
||||
<p>
|
||||
<b>Hardware Environment</b><br />
|
||||
<li><i>Dedicated machine for indexing</i>: Self-explanatory
|
||||
(yes/no)</li>
|
||||
<li><i>CPU</i>: Self-explanatory (Type, Speed and Quantity)</li>
|
||||
<li><i>RAM</i>: Self-explanatory</li>
|
||||
<li><i>Drive configuration</i>: Self-explanatory (IDE, SCSI,
|
||||
RAID-1, RAID-5)</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Software environment</b><br />
|
||||
<li><i>Lucene Version</i>: Self-explanatory</li>
|
||||
<li><i>Java Version</i>: Version of Java SDK/JRE that is run
|
||||
</li>
|
||||
<li><i>Java VM</i>: Server/client VM, Sun VM/JRockIt</li>
|
||||
<li><i>OS Version</i>: Self-explanatory</li>
|
||||
<li><i>Location of index</i>: Is the index stored in filesystem
|
||||
or database? Is it on the same server(local) or
|
||||
over the network?</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Lucene indexing variables</b><br />
|
||||
<li><i>Number of source documents</i>: Number of documents being
|
||||
indexed</li>
|
||||
<li><i>Total filesize of source documents</i>:
|
||||
Self-explanatory</li>
|
||||
<li><i>Average filesize of source documents</i>:
|
||||
Self-explanatory</li>
|
||||
<li><i>Source documents storage location</i>: Where are the
|
||||
documents being indexed located?
|
||||
Filesystem, DB, http, etc.</li>
|
||||
<li><i>File type of source documents</i>: Types of files being
|
||||
indexed, e.g. HTML files, XML files, PDF files, etc.</li>
|
||||
<li><i>Parser(s) used, if any</i>: Parsers used for parsing the
|
||||
various files for indexing,
|
||||
e.g. XML parser, HTML parser, etc.</li>
|
||||
<li><i>Analyzer(s) used</i>: Type of Lucene analyzer used</li>
|
||||
<li><i>Number of fields per document</i>: Number of Fields each
|
||||
Document contains</li>
|
||||
<li><i>Type of fields</i>: Type of each field</li>
|
||||
<li><i>Index persistence</i>: Where the index is stored, e.g.
|
||||
FSDirectory, SqlDirectory, etc.</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Figures</b><br />
|
||||
<li><i>Time taken (in ms/s as an average of at least 3 indexing
|
||||
runs)</i>: Time taken to index all files</li>
|
||||
<li><i>Time taken / 1000 docs indexed</i>: Time taken to index
|
||||
1000 files</li>
|
||||
<li><i>Memory consumption</i>: Self-explanatory</li>
|
||||
<li><i>Query speed</i>: average time a query takes, type
|
||||
of queries (e.g. simple one-term query, phrase query),
|
||||
not measuring any overhead outside Lucene</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Notes</b><br />
|
||||
<li><i>Notes</i>: Any comments which don't belong in the above,
|
||||
special tuning/strategies, etc.</li>
|
||||
</p>
|
||||
</ul>
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="User-submitted Benchmarks"><strong>User-submitted Benchmarks</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
These benchmarks have been kindly submitted by Lucene users for
|
||||
reference purposes.
|
||||
</p>
|
||||
<p><b>We make NO guarantees regarding their accuracy or
|
||||
validity.</b>
|
||||
</p>
|
||||
<p>We strongly recommend you conduct your own
|
||||
performance benchmarks before deciding on a particular
|
||||
hardware/software setup (and hopefully submit
|
||||
these figures to us).
|
||||
</p>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Hamish Carpenter's benchmarks"><strong>Hamish Carpenter's benchmarks</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<ul>
|
||||
<p>
|
||||
<b>Hardware Environment</b><br />
|
||||
<li><i>Dedicated machine for indexing</i>: yes</li>
|
||||
<li><i>CPU</i>: Intel x86 P4 1.5Ghz</li>
|
||||
<li><i>RAM</i>: 512 DDR</li>
|
||||
<li><i>Drive configuration</i>: IDE 7200rpm Raid-1</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Software environment</b><br />
|
||||
<li><i>Lucene Version</i>: 1.3</li>
|
||||
<li><i>Java Version</i>: 1.3.1 IBM JITC Enabled</li>
|
||||
<li><i>Java VM</i>: </li>
|
||||
<li><i>OS Version</i>: Debian Linux 2.4.18-686</li>
|
||||
<li><i>Location of index</i>: local</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Lucene indexing variables</b><br />
|
||||
<li><i>Number of source documents</i>: Random generator. Set
|
||||
to make 1M documents
|
||||
in 2x500,000 batches.</li>
|
||||
<li><i>Total filesize of source documents</i>: > 1GB if
|
||||
stored</li>
|
||||
<li><i>Average filesize of source documents</i>: 1KB</li>
|
||||
<li><i>Source documents storage location</i>: Filesystem</li>
|
||||
<li><i>File type of source documents</i>: Generated</li>
|
||||
<li><i>Parser(s) used, if any</i>: </li>
|
||||
<li><i>Analyzer(s) used</i>: Default</li>
|
||||
<li><i>Number of fields per document</i>: 11</li>
|
||||
<li><i>Type of fields</i>: 1 date, 1 id, 9 text</li>
|
||||
<li><i>Index persistence</i>: FSDirectory</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Figures</b><br />
|
||||
<li><i>Time taken (in ms/s as an average of at least 3
|
||||
indexing runs)</i>: </li>
|
||||
<li><i>Time taken / 1000 docs indexed</i>: 49 seconds</li>
|
||||
<li><i>Memory consumption</i>:</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Notes</b><br />
|
||||
<p>
|
||||
A windows client ran a random document generator which
|
||||
created
|
||||
documents based on some arrays of values and an excerpt
|
||||
(approx 1kb)
|
||||
from a text file of the bible (King James version).<br />
|
||||
These were submitted via a socket connection (open throughout
|
||||
indexing process).<br />
|
||||
The index writer was not closed between index calls.<br />
|
||||
This created a 400Mb index in 23 files (after
|
||||
optimization).<br />
|
||||
</p>
|
||||
<p>
|
||||
<u>Query details</u>:<br />
|
||||
</p>
|
||||
<p>
|
||||
Set up a threaded class to start x number of simultaneous
|
||||
threads to
|
||||
search the above created index.
|
||||
</p>
|
||||
<p>
|
||||
Query: +Domain:sos +(+((Name:goo*^2.0 Name:plan*^2.0)
|
||||
(Teaser:goo* Tea
|
||||
ser:plan*) (Details:goo* Details:plan*)) -Cancel:y)
|
||||
+DisplayStartDate:[mkwsw2jk0
|
||||
-mq3dj1uq0] +EndDate:[mq3dj1uq0-ntlxuggw0]
|
||||
</p>
|
||||
<p>
|
||||
This query counted 34000 documents and I limited the returned
|
||||
documents
|
||||
to 5.
|
||||
</p>
|
||||
<p>
|
||||
This is using Peter Halacsy's IndexSearcherCache slightly
|
||||
modified to
|
||||
be a singleton returned cached searchers for a given
|
||||
directory. This
|
||||
solved an initial problem with too many files open and
|
||||
running out of
|
||||
linux handles for them.
|
||||
</p>
|
||||
<pre>
|
||||
Threads|Avg Time per query (ms)
|
||||
1 1009ms
|
||||
2 2043ms
|
||||
3 3087ms
|
||||
4 4045ms
|
||||
.. .
|
||||
.. .
|
||||
10 10091ms
|
||||
</pre>
|
||||
<p>
|
||||
I removed the two date range terms from the query and it made
|
||||
a HUGE
|
||||
difference in performance. With 4 threads the avg time
|
||||
dropped to 900ms!
|
||||
</p>
|
||||
<p>Other query optimizations made little difference.</p>
|
||||
</p>
|
||||
</ul>
|
||||
<p>
|
||||
Hamish can be contacted at hamish at catalyst.net.nz.
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Justin Greene's benchmarks"><strong>Justin Greene's benchmarks</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<ul>
|
||||
<p>
|
||||
<b>Hardware Environment</b><br />
|
||||
<li><i>Dedicated machine for indexing</i>: No, but nominal
|
||||
usage at time of indexing.</li>
|
||||
<li><i>CPU</i>: Compaq Proliant 1850R/600 2 X pIII 600</li>
|
||||
<li><i>RAM</i>: 1GB, 256MB allocated to JVM.</li>
|
||||
<li><i>Drive configuration</i>: RAID 5 on Fibre Channel
|
||||
Array</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Software environment</b><br />
|
||||
<li><i>Java Version</i>: 1.3.1_06</li>
|
||||
<li><i>Java VM</i>: </li>
|
||||
<li><i>OS Version</i>: Winnt 4/Sp6</li>
|
||||
<li><i>Location of index</i>: local</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Lucene indexing variables</b><br />
|
||||
<li><i>Number of source documents</i>: about 60K</li>
|
||||
<li><i>Total filesize of source documents</i>: 6.5GB</li>
|
||||
<li><i>Average filesize of source documents</i>: 100K
|
||||
(6.5GB/60K documents)</li>
|
||||
<li><i>Source documents storage location</i>: filesystem on
|
||||
NTFS</li>
|
||||
<li><i>File type of source documents</i>: </li>
|
||||
<li><i>Parser(s) used, if any</i>: Currently the only parser
|
||||
used is the Quiotix html
|
||||
parser.</li>
|
||||
<li><i>Analyzer(s) used</i>: SimpleAnalyzer</li>
|
||||
<li><i>Number of fields per document</i>: 8</li>
|
||||
<li><i>Type of fields</i>: All strings, and all are stored
|
||||
and indexed.</li>
|
||||
<li><i>Index persistence</i>: FSDirectory</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Figures</b><br />
|
||||
<li><i>Time taken (in ms/s as an average of at least 3
|
||||
indexing runs)</i>: 1 hour 12 minutes, 1 hour 14 minutes and 1 hour 17
|
||||
minutes. Note that the #
|
||||
and size of documents changes daily.</li>
|
||||
<li><i>Time taken / 1000 docs indexed</i>: </li>
|
||||
<li><i>Memory consumption</i>: JVM is given 256MB and uses it
|
||||
all.</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Notes</b><br />
|
||||
<p>
|
||||
We have 10 threads reading files from the filesystem and
|
||||
parsing and
|
||||
analyzing them and the pushing them onto a queue and a single
|
||||
thread poping
|
||||
them from the queue and indexing. Note that we are indexing
|
||||
email messages
|
||||
and are storing the entire plaintext in of the message in the
|
||||
index. If the
|
||||
message contains attachment and we do not have a filter for
|
||||
the attachment
|
||||
(ie. we do not do PDFs yet), we discard the data.
|
||||
</p>
|
||||
</p>
|
||||
</ul>
|
||||
<p>
|
||||
Justin can be contacted at tvxh-lw4x at spamex.com.
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Daniel Armbrust's benchmarks"><strong>Daniel Armbrust's benchmarks</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
My disclaimer is that this is a very poor "Benchmark". It was not done for raw speed,
|
||||
nor was the total index built in one shot. The index was created on several different
|
||||
machines (all with these specs, or very similar), with each machine indexing batches of 500,000 to
|
||||
1 million documents per batch. Each of these small indexes was then moved to a
|
||||
much larger drive, where they were all merged together into a big index.
|
||||
This process was done manually, over the course of several months, as the sources became available.
|
||||
</p>
|
||||
<ul>
|
||||
<p>
|
||||
<b>Hardware Environment</b><br />
|
||||
<li><i>Dedicated machine for indexing</i>: no - The machine had moderate to low load. However, the indexing process was built single
|
||||
threaded, so it only took advantage of 1 of the processors. It usually got 100% of this processor.</li>
|
||||
<li><i>CPU</i>: Sun Ultra 80 4 x 64 bit processors</li>
|
||||
<li><i>RAM</i>: 4 GB Memory</li>
|
||||
<li><i>Drive configuration</i>: Ultra-SCSI Wide 10000 RPM 36GB Drive</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Software environment</b><br />
|
||||
<li><i>Lucene Version</i>: 1.2</li>
|
||||
<li><i>Java Version</i>: 1.3.1</li>
|
||||
<li><i>Java VM</i>: </li>
|
||||
<li><i>OS Version</i>: Sun 5.8 (64 bit)</li>
|
||||
<li><i>Location of index</i>: local</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Lucene indexing variables</b><br />
|
||||
<li><i>Number of source documents</i>: 13,820,517</li>
|
||||
<li><i>Total filesize of source documents</i>: 87.3 GB</li>
|
||||
<li><i>Average filesize of source documents</i>: 6.3 KB</li>
|
||||
<li><i>Source documents storage location</i>: Filesystem</li>
|
||||
<li><i>File type of source documents</i>: XML</li>
|
||||
<li><i>Parser(s) used, if any</i>: </li>
|
||||
<li><i>Analyzer(s) used</i>: A home grown analyzer that simply removes stopwords.</li>
|
||||
<li><i>Number of fields per document</i>: 1 - 31</li>
|
||||
<li><i>Type of fields</i>: All text, though 2 of them are dates (20001205) that we filter on</li>
|
||||
<li><i>Index persistence</i>: FSDirectory</li>
|
||||
<li><i>Index size</i>: 12.5 GB</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Figures</b><br />
|
||||
<li><i>Time taken (in ms/s as an average of at least 3
|
||||
indexing runs)</i>: For 617271 documents, 209698 seconds (or ~2.5 days)</li>
|
||||
<li><i>Time taken / 1000 docs indexed</i>: 340 Seconds</li>
|
||||
<li><i>Memory consumption</i>: (java executed with) java -Xmx1000m -Xss8192k so
|
||||
1 GB of memory was allotted to the indexer</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Notes</b><br />
|
||||
<p>
|
||||
The source documents were XML. The "indexer" opened each document one at a time, ran an
|
||||
XSL transformation on them, and then proceeded to index the stream. The indexer optimized
|
||||
the index every 50,000 documents (on this run) though previously, we optimized every
|
||||
300,000 documents. The performance didn't change much either way. We did no other
|
||||
tuning (RAM Directories, separate process to pretransform the source material, etc.)
|
||||
to make it index faster. When all of these individual indexes were built, they were
|
||||
merged together into the main index. That process usually took ~ a day.
|
||||
</p>
|
||||
</p>
|
||||
</ul>
|
||||
<p>
|
||||
Daniel can be contacted at Armbrust.Daniel at mayo.edu.
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Geoffrey Peddle's benchmarks"><strong>Geoffrey Peddle's benchmarks</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
I'm doing a technical evaluation of search engines
|
||||
for Ariba, an enterprise application software company.
|
||||
I compared Lucene to a commercial C language based
|
||||
search engine which I'll refer to as vendor A.
|
||||
Overall Lucene's performance was similar to vendor A
|
||||
and met our application's requirements. I've
|
||||
summarized our results below.
|
||||
</p>
|
||||
<p>
|
||||
Search scalability:<br />
|
||||
We ran a set of 16 queries in a single thread for 20
|
||||
iterations. We report below the times for the last 15
|
||||
iterations (ie after the system was warmed up). The
|
||||
4 sets of results below are for indexes with between
|
||||
50,000 documents to 600,000 documents. Although the
|
||||
times for Lucene grew faster with document count than
|
||||
vendor A they were comparable.
|
||||
</p>
|
||||
<pre>
|
||||
50K documents
|
||||
Lucene 5.2 seconds
|
||||
A 7.2
|
||||
200K
|
||||
Lucene 15.3
|
||||
A 15.2
|
||||
400K
|
||||
Lucene 28.2
|
||||
A 25.5
|
||||
600K
|
||||
Lucene 41
|
||||
A 33
|
||||
</pre>
|
||||
<p>
|
||||
Individual Query times:<br />
|
||||
Total query times are very similar between the 2
|
||||
systems but there were larger differences when you
|
||||
looked at individual queries.
|
||||
</p>
|
||||
<p>
|
||||
For simple queries with small result sets Vendor A was
|
||||
consistently faster than Lucene. For example a
|
||||
single query might take vendor A 32 thousands of a
|
||||
second and Lucene 64 thousands of a second. Both
|
||||
times are however well within acceptable response
|
||||
times for our application.
|
||||
</p>
|
||||
<p>
|
||||
For simple queries with large result sets Vendor A was
|
||||
consistently slower than Lucene. For example a
|
||||
single query might take vendor A 300 thousands of a
|
||||
second and Lucene 200 thousands of a second.
|
||||
For more complex queries of the form (term1 or term2
|
||||
or term3) AND (term4 or term5 or term6) AND (term7 or
|
||||
term8) the results were more divergent. For
|
||||
queries with small result sets Vendor A generally had
|
||||
very short response times and sometimes Lucene had
|
||||
significantly larger response times. For example
|
||||
Vendor A might take 16 thousands of a second and
|
||||
Lucene might take 156. I do not consider it to be
|
||||
the case that Lucene's response time grew unexpectedly
|
||||
but rather that Vendor A appeared to be taking
|
||||
advantage of an optimization which Lucene didn't have.
|
||||
(I believe there's been discussions on the dev
|
||||
mailing list on complex queries of this sort.)
|
||||
</p>
|
||||
<p>
|
||||
Index Size:<br />
|
||||
For our test data the size of both indexes grew
|
||||
linearly with the number of documents. Note that
|
||||
these sizes are compact sizes, not maximum size during
|
||||
index loading. The numbers below are from running du
|
||||
-k in the directory containing the index data. The
|
||||
larger number's below for Vendor A may be because it
|
||||
supports additional functionality not available in
|
||||
Lucene. I think it's the constant rate of growth
|
||||
rather than the absolute amount which is more
|
||||
important.
|
||||
</p>
|
||||
<pre>
|
||||
50K documents
|
||||
Lucene 45516 K
|
||||
A 63921
|
||||
200K
|
||||
Lucene 171565
|
||||
A 228370
|
||||
400K
|
||||
Lucene 345717
|
||||
A 457843
|
||||
600K
|
||||
Lucene 511338
|
||||
A 684913
|
||||
</pre>
|
||||
<p>
|
||||
Indexing Times:<br />
|
||||
These times are for reading the documents from our
|
||||
database, processing them, inserting them into the
|
||||
document search product and index compacting. Our
|
||||
data has a large number of fields/attributes. For
|
||||
this test I restricted Lucene to 24 attributes to
|
||||
reduce the number of files created. Doing this I was
|
||||
able to specify a merge width for Lucene of 60. I
|
||||
found in general that Lucene indexing performance to
|
||||
be very sensitive to changes in the merge width.
|
||||
Note also that our application does a full compaction
|
||||
after inserting every 20,000 documents. These times
|
||||
are just within our acceptable limits but we are
|
||||
interested in alternatives to increase Lucene's
|
||||
performance in this area.
|
||||
</p>
|
||||
<p>
|
||||
<pre>
|
||||
600K documents
|
||||
Lucene 81 minutes
|
||||
A 34 minutes
|
||||
</pre>
|
||||
</p>
|
||||
<p>
|
||||
(I don't have accurate results for all sizes on this
|
||||
measure but believe that the indexing time for both
|
||||
solutions grew essentially linearly with size. The
|
||||
time to compact the index generally grew with index
|
||||
size but it's a small percent of overall time at these
|
||||
sizes.)
|
||||
</p>
|
||||
<ul>
|
||||
<p>
|
||||
<b>Hardware Environment</b><br />
|
||||
<li><i>Dedicated machine for indexing</i>: yes</li>
|
||||
<li><i>CPU</i>: Dell Pentium 4 CPU 2.00Ghz, 1cpu</li>
|
||||
<li><i>RAM</i>: 1 GB Memory</li>
|
||||
<li><i>Drive configuration</i>: Fujitsu MAM3367MP SCSI </li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Software environment</b><br />
|
||||
<li><i>Java Version</i>: 1.4.2_02</li>
|
||||
<li><i>Java VM</i>: JDK</li>
|
||||
<li><i>OS Version</i>: Windows XP </li>
|
||||
<li><i>Location of index</i>: local</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Lucene indexing variables</b><br />
|
||||
<li><i>Number of source documents</i>: 600,000</li>
|
||||
<li><i>Total filesize of source documents</i>: from database</li>
|
||||
<li><i>Average filesize of source documents</i>: from database</li>
|
||||
<li><i>Source documents storage location</i>: from database</li>
|
||||
<li><i>File type of source documents</i>: XML</li>
|
||||
<li><i>Parser(s) used, if any</i>: </li>
|
||||
<li><i>Analyzer(s) used</i>: small variation on WhitespaceAnalyzer</li>
|
||||
<li><i>Number of fields per document</i>: 24</li>
|
||||
<li><i>Type of fields</i>: A1 keyword, 1 big unindexed, rest are unstored and a mix of tokenized/untokenized</li>
|
||||
<li><i>Index persistence</i>: FSDirectory</li>
|
||||
<li><i>Index size</i>: 12.5 GB</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Figures</b><br />
|
||||
<li><i>Time taken (in ms/s as an average of at least 3
|
||||
indexing runs)</i>: 600,000 documents in 81 minutes (du -k = 511338)</li>
|
||||
<li><i>Time taken / 1000 docs indexed</i>: 123 documents/second</li>
|
||||
<li><i>Memory consumption</i>: -ms256m -mx512m -Xss4m -XX:MaxPermSize=512M</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Notes</b><br />
|
||||
<p>
|
||||
<li>merge width of 60</li>
|
||||
<li>did a compact every 20,000 documents</li>
|
||||
</p>
|
||||
</p>
|
||||
</ul>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="#525D76" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
<!-- end the processing -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
<benchmark>
|
||||
<ul>
|
||||
<p>
|
||||
<b>Hardware Environment</b><br/>
|
||||
<li><i>Dedicated machine for indexing</i>: Self-explanatory
|
||||
(yes/no)</li>
|
||||
<li><i>CPU</i>: Self-explanatory (Type, Speed and Quantity)</li>
|
||||
<li><i>RAM</i>: Self-explanatory</li>
|
||||
<li><i>Drive configuration</i>: Self-explanatory (IDE, SCSI, RAID-1,
|
||||
RAID-5)</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Software environment</b><br/>
|
||||
<li><i>Lucene Version</i>: Self-explanatory</li>
|
||||
<li><i>Java Version</i>: Version of Java SDK/JRE that is run </li>
|
||||
<li><i>Java VM</i>: Server/client VM, Sun VM/JRockIt</li>
|
||||
<li><i>OS Version</i>: Self-explanatory</li>
|
||||
<li><i>Location of index</i>: Is the index stored in filesystem or
|
||||
database? Is it on the same server (local) or
|
||||
over the network?</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Lucene indexing variables</b><br/>
|
||||
<li><i>Number of source documents</i>: Number of documents being
|
||||
indexed</li>
|
||||
<li><i>Total filesize of source documents</i>: Self-explanatory</li>
|
||||
<li><i>Average filesize of source documents</i>:
|
||||
Self-explanatory</li>
|
||||
<li><i>Source documents storage location</i>: Where are the documents
|
||||
being indexed located?
|
||||
Filesystem, DB, http,etc</li>
|
||||
<li><i>File type of source documents</i>: Types of files being
|
||||
indexed, e.g. HTML files, XML files, PDF files, etc.</li>
|
||||
<li><i>Parser(s) used, if any</i>: Parsers used for parsing the
|
||||
various files for indexing,
|
||||
e.g. XML parser, HTML parser, etc.</li>
|
||||
<li><i>Analyzer(s) used</i>: Type of Lucene analyzer used</li>
|
||||
<li><i>Number of fields per document</i>: Number of Fields each
|
||||
Document contains</li>
|
||||
<li><i>Type of fields</i>: Type of each field</li>
|
||||
<li><i>Index persistence</i>: Where the index is stored, e.g.
|
||||
FSDirectory, SqlDirectory, etc</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Figures</b><br/>
|
||||
<li><i>Time taken (in ms/s as an average of at least 3 indexing
|
||||
runs)</i>: Time taken to index to index all files</li>
|
||||
<li><i>Time taken / 1000 docs indexed</i>: Time taken to index 1000
|
||||
files</li>
|
||||
<li><i>Memory consumption</i>: Self-explanatory</li>
|
||||
<li><i>Query speed</i>: average time a query takes, type
|
||||
of queries (e.g. simple one-term query, phrase query),
|
||||
not measuring any overhead outside Lucene</li>
|
||||
</p>
|
||||
<p>
|
||||
<b>Notes</b><br/>
|
||||
<li><i>Notes</i>: Any comments which don't belong in the above,
|
||||
special tuning/strategies, etc</li>
|
||||
</p>
|
||||
</ul>
|
||||
</benchmark>
|
|
@ -1,802 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
|
||||
<!-- start the processing -->
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
<meta name="author" value="
|
||||
Peter Carlson
|
||||
">
|
||||
<meta name="email" value="carlson@apache.org">
|
||||
|
||||
|
||||
|
||||
|
||||
<title>Apache Lucene -
|
||||
Contributions - Apache Lucene
|
||||
</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
<a href="http://lucene.apache.org/"><img src="./images/lucene_green_300.gif" alt="Apache Lucene" border="0"/></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
<p><strong>About</strong></p>
|
||||
<ul>
|
||||
<li> <a href="./index.html">Overview</a>
|
||||
</li>
|
||||
<li> <a href="./features.html">Features</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/PoweredBy">Powered by Lucene</a>
|
||||
</li>
|
||||
<li> <a href="./whoweare.html">Who We Are</a>
|
||||
</li>
|
||||
<li> <a href="./mailinglists.html">Mailing Lists</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Resources</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene">Wiki</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ">FAQ</a>
|
||||
</li>
|
||||
<li> <a href="./gettingstarted.html">Getting Started</a>
|
||||
</li>
|
||||
<li> <a href="./queryparsersyntax.html">Query Syntax</a>
|
||||
</li>
|
||||
<li> <a href="./fileformats.html">File Formats</a>
|
||||
</li>
|
||||
<li> <a href="./scoring.html">Scoring</a>
|
||||
</li>
|
||||
<li> <a href="./api/index.html">Javadoc</a>
|
||||
</li>
|
||||
<li> <a href="./contributions.html">Contributions</a>
|
||||
</li>
|
||||
<li> <a href="./benchmarks.html">Benchmarks</a>
|
||||
</li>
|
||||
<li> <a href="http://issues.apache.org/jira/browse/LUCENE">Issue Tracker</a>
|
||||
</li>
|
||||
<li> <a href="./lucene-sandbox/">Lucene Sandbox</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Download</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">Releases</a>
|
||||
</li>
|
||||
<li> <a href="http://svn.apache.org/viewcvs.cgi/lucene/java/">Source Repository</a>
|
||||
</li>
|
||||
</ul>
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Overview"><strong>Overview</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>This page lists external Lucene resources. If you have
|
||||
written something that should be included, please post all
|
||||
relevant information to one of the mailing lists. Nothing
|
||||
listed here is directly supported by the Lucene
|
||||
developers, so if you encounter any problems with any of
|
||||
this software, please use the author's contact information
|
||||
to get help.</p>
|
||||
<p>If you are looking for information on contributing patches or other improvements to Lucene, see
|
||||
<a href="http://wiki.apache.org/jakarta-lucene/HowToContribute">How To Contribute</a> on the Lucene Wiki.</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Lucene Tools"><strong>Lucene Tools</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
Software that works with Lucene indices.
|
||||
</p>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Luke"><strong>Luke</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<table>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
URL
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
<a href="http://www.getopt.org/luke/">
|
||||
http://www.getopt.org/luke/
|
||||
</a>
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
author
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
Andrzej Bialecki
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="LIMO (Lucene Index Monitor)"><strong>LIMO (Lucene Index Monitor)</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<table>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
URL
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
<a href="http://limo.sf.net/">
|
||||
http://limo.sf.net/
|
||||
</a>
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
author
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
Julien Nioche
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Lucene Document Converters"><strong>Lucene Document Converters</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
Lucene requires information you want to index to be
|
||||
converted into a Document class. Here are
|
||||
contributions for various solutions that convert different
|
||||
content types to Lucene's Document classes.
|
||||
</p>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="XML Document #1"><strong>XML Document #1</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<table>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
URL
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
<a href="http://marc.theaimsgroup.com/?l=lucene-dev&m=100723333506246&w=2">
|
||||
http://marc.theaimsgroup.com/?l=lucene-dev&m=100723333506246&w=2
|
||||
</a>
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
author
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
Philip Ogren - ogren@mayo.edu
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="XML Document #2"><strong>XML Document #2</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<table>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
URL
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
<a href="http://www.mail-archive.com/lucene-user@jakarta.apache.org/msg00346.html">
|
||||
http://www.mail-archive.com/lucene-user@jakarta.apache.org/msg00346.html
|
||||
</a>
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
author
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
Peter Carlson - carlson@bookandhammer.com
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="PDF Box"><strong>PDF Box</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<table>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
URL
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
<a href="http://www.pdfbox.org/">
|
||||
http://www.pdfbox.org/
|
||||
</a>
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
author
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
Ben Litchfield - ben@csh.rit.edu
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="XPDF - PDF Document Conversion"><strong>XPDF - PDF Document Conversion</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<table>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
URL
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
<a href="http://www.foolabs.com/xpdf">
|
||||
http://www.foolabs.com/xpdf
|
||||
</a>
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
author
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
N/A
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="PDFTextStream -- PDF text and metadata extraction"><strong>PDFTextStream -- PDF text and metadata extraction</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<table>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
URL
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
<a href="http://snowtide.com">
|
||||
http://snowtide.com
|
||||
</a>
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
author
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
N/A
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="PJ Classic & PJ Professional - PDF Document Conversion"><strong>PJ Classic & PJ Professional - PDF Document Conversion</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<table>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
URL
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
<a href=" http://www.etymon.com/">
|
||||
http://www.etymon.com/
|
||||
</a>
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
author
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
N/A
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Miscellaneous"><strong>Miscellaneous</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
</p>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Arabic Analyzer for Java"><strong>Arabic Analyzer for Java</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<table>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
URL
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
<a href="http://savannah.nongnu.org/projects/aramorph">
|
||||
http://savannah.nongnu.org/projects/aramorph
|
||||
</a>
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
author
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
Pierrick Brihaye
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Phonetix"><strong>Phonetix</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<table>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
URL
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
<a href="http://www.companywebstore.de/tangentum/mirror/en/products/phonetix/index.html">
|
||||
http://www.companywebstore.de/tangentum/mirror/en/products/phonetix/index.html
|
||||
</a>
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
author
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
tangentum technologies
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="ejIndex - JBoss MBean for Lucene"><strong>ejIndex - JBoss MBean for Lucene</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
</p>
|
||||
<table>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
URL
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
<a href="http://ejindex.sourceforge.net/">
|
||||
http://ejindex.sourceforge.net/
|
||||
</a>
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
author
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
Andy Scholz
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="JavaCC"><strong>JavaCC</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<table>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
URL
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
<a href="https://javacc.dev.java.net/">
|
||||
https://javacc.dev.java.net/
|
||||
</a>
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th bgcolor="#039acc" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
author
|
||||
|
||||
</font>
|
||||
</th>
|
||||
<td bgcolor="#a0ddf0" colspan="" rowspan="" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
|
||||
Sun Microsystems (java.net)
|
||||
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="#525D76" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
<!-- end the processing -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
259
docs/demo.html
|
@ -1,259 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
|
||||
<!-- start the processing -->
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
<meta name="author" value="Andrew C. Oliver">
|
||||
<meta name="email" value="acoliver@apache.org">
|
||||
|
||||
|
||||
|
||||
|
||||
<title>Apache Lucene - Apache Lucene - Building and Installing the Basic Demo</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
<a href="http://lucene.apache.org/"><img src="./images/lucene_green_300.gif" alt="Apache Lucene" border="0"/></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
<p><strong>About</strong></p>
|
||||
<ul>
|
||||
<li> <a href="./index.html">Overview</a>
|
||||
</li>
|
||||
<li> <a href="./features.html">Features</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/PoweredBy">Powered by Lucene</a>
|
||||
</li>
|
||||
<li> <a href="./whoweare.html">Who We Are</a>
|
||||
</li>
|
||||
<li> <a href="./mailinglists.html">Mailing Lists</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Resources</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene">Wiki</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ">FAQ</a>
|
||||
</li>
|
||||
<li> <a href="./gettingstarted.html">Getting Started</a>
|
||||
</li>
|
||||
<li> <a href="./queryparsersyntax.html">Query Syntax</a>
|
||||
</li>
|
||||
<li> <a href="./fileformats.html">File Formats</a>
|
||||
</li>
|
||||
<li> <a href="./scoring.html">Scoring</a>
|
||||
</li>
|
||||
<li> <a href="./api/index.html">Javadoc</a>
|
||||
</li>
|
||||
<li> <a href="./contributions.html">Contributions</a>
|
||||
</li>
|
||||
<li> <a href="./benchmarks.html">Benchmarks</a>
|
||||
</li>
|
||||
<li> <a href="http://issues.apache.org/jira/browse/LUCENE">Issue Tracker</a>
|
||||
</li>
|
||||
<li> <a href="./lucene-sandbox/">Lucene Sandbox</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Download</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">Releases</a>
|
||||
</li>
|
||||
<li> <a href="http://svn.apache.org/viewcvs.cgi/lucene/java/">Source Repository</a>
|
||||
</li>
|
||||
</ul>
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="About this Document"><strong>About this Document</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
This document is intended as a "getting started" guide to using and running the Lucene demos.
|
||||
It walks you through some basic installation and configuration.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="About the Demos"><strong>About the Demos</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
The Lucene command-line demo code consists of two applications that demonstrate various
|
||||
functionalities of Lucene and how one should go about adding Lucene to their applications.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Setting your CLASSPATH"><strong>Setting your CLASSPATH</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
First, you should <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">download</a> the
|
||||
latest Lucene distribution and then extract it to a working directory. Alternatively, you can <a href="http://wiki.apache.org/jakarta-lucene/SourceRepository">check out the sources from
|
||||
Subversion</a>, and then run <code>ant war-demo</code> to generate the JARs and WARs.
|
||||
</p>
|
||||
<p>
|
||||
You should see the Lucene JAR file in the directory you created when you extracted the archive. It
|
||||
should be named something like <code>lucene-core-{version}.jar</code>. You should also see a file
|
||||
called <code>lucene-demos-{version}.jar</code>. If you checked out the sources from Subversion then
|
||||
the JARs are located under the <code>build</code> subdirectory (after running <code>ant</code>
|
||||
successfully). Put both of these files in your Java CLASSPATH.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Indexing Files"><strong>Indexing Files</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
Once you've gotten this far you're probably itching to go. Let's <b>build an index!</b> Assuming
|
||||
you've set your CLASSPATH correctly, just type:
|
||||
|
||||
<pre>
|
||||
java org.apache.lucene.demo.IndexFiles {full-path-to-lucene}/src
|
||||
</pre>
|
||||
|
||||
This will produce a subdirectory called <code>index</code> which will contain an index of all of the
|
||||
Lucene source code.
|
||||
</p>
|
||||
<p>
|
||||
To <b>search the index</b> type:
|
||||
|
||||
<pre>
|
||||
java org.apache.lucene.demo.SearchFiles
|
||||
</pre>
|
||||
|
||||
You'll be prompted for a query. Type in a swear word and press the enter key. You'll see that the
|
||||
Lucene developers are very well mannered and get no results. Now try entering the word "vector".
|
||||
That should return a whole bunch of documents. The results will page at every tenth result and ask
|
||||
you whether you want more results.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="About the code..."><strong>About the code...</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
<a href="demo2.html">read on>>></a>
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="#525D76" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
<!-- end the processing -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
287
docs/demo2.html
|
@ -1,287 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
|
||||
<!-- start the processing -->
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
<meta name="author" value="Andrew C. Oliver">
|
||||
<meta name="email" value="acoliver@apache.org">
|
||||
|
||||
|
||||
|
||||
|
||||
<title>Apache Lucene - Apache Lucene - Basic Demo Sources Walk-through</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
<a href="http://lucene.apache.org/"><img src="./images/lucene_green_300.gif" alt="Apache Lucene" border="0"/></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
<p><strong>About</strong></p>
|
||||
<ul>
|
||||
<li> <a href="./index.html">Overview</a>
|
||||
</li>
|
||||
<li> <a href="./features.html">Features</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/PoweredBy">Powered by Lucene</a>
|
||||
</li>
|
||||
<li> <a href="./whoweare.html">Who We Are</a>
|
||||
</li>
|
||||
<li> <a href="./mailinglists.html">Mailing Lists</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Resources</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene">Wiki</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ">FAQ</a>
|
||||
</li>
|
||||
<li> <a href="./gettingstarted.html">Getting Started</a>
|
||||
</li>
|
||||
<li> <a href="./queryparsersyntax.html">Query Syntax</a>
|
||||
</li>
|
||||
<li> <a href="./fileformats.html">File Formats</a>
|
||||
</li>
|
||||
<li> <a href="./scoring.html">Scoring</a>
|
||||
</li>
|
||||
<li> <a href="./api/index.html">Javadoc</a>
|
||||
</li>
|
||||
<li> <a href="./contributions.html">Contributions</a>
|
||||
</li>
|
||||
<li> <a href="./benchmarks.html">Benchmarks</a>
|
||||
</li>
|
||||
<li> <a href="http://issues.apache.org/jira/browse/LUCENE">Issue Tracker</a>
|
||||
</li>
|
||||
<li> <a href="./lucene-sandbox/">Lucene Sandbox</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Download</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">Releases</a>
|
||||
</li>
|
||||
<li> <a href="http://svn.apache.org/viewcvs.cgi/lucene/java/">Source Repository</a>
|
||||
</li>
|
||||
</ul>
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="About the Code"><strong>About the Code</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
In this section we walk through the sources behind the command-line Lucene demo: where to find them,
|
||||
their parts and their function. This section is intended for Java developers wishing to understand
|
||||
how to use Lucene in their applications.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Location of the source"><strong>Location of the source</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
Relative to the directory created when you extracted Lucene or retrieved it from Subversion, you
|
||||
should see a directory called <code>src</code> which in turn contains a directory called
|
||||
<code>demo</code>. This is the root for all of the Lucene demos. Under this directory is
|
||||
<code>org/apache/lucene/demo</code>. This is where all the Java sources for the demos live.
|
||||
</p>
|
||||
<p>
|
||||
Within this directory you should see the <code>IndexFiles.java</code> class we executed earlier.
|
||||
Bring it up in <code>vi</code> or your editor of choice and let's take a look at it.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="IndexFiles"><strong>IndexFiles</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
As we discussed in the previous walk-through, the <code><a href="api/org/apache/lucene/demo/IndexFiles.html">IndexFiles</a></code> class creates a Lucene
|
||||
Index. Let's take a look at how it does this.
|
||||
</p>
|
||||
<p>
|
||||
The first substantial thing the <code>main</code> function does is instantiate <code><a href="api/org/apache/lucene/index/IndexWriter.html">IndexWriter</a></code>. It passes the string
|
||||
"<code>index</code>" and a new instance of a class called <code><a href="api/org/apache/lucene/analysis/standard/StandardAnalyzer.html">StandardAnalyzer</a></code>.
|
||||
The "<code>index</code>" string is the name of the filesystem directory where all index information
|
||||
should be stored. Because we're not passing a full path, this will be created as a subdirectory of
|
||||
the current working directory (if it does not already exist). On some platforms, it may be created
|
||||
in other directories (such as the user's home directory).
|
||||
</p>
|
||||
<p>
|
||||
The <code><a href="api/org/apache/lucene/index/IndexWriter.html">IndexWriter</a></code> is the main
|
||||
class responsible for creating indices. To use it you must instantiate it with a path that it can
|
||||
write the index into. If this path does not exist it will first create it. Otherwise it will
|
||||
refresh the index at that path. You can also create an index using one of the subclasses of <code><a href="api/org/apache/lucene/store/Directory.html">Directory</a></code>. In any case, you must also pass an
|
||||
instance of <code><a href="api/org/apache/lucene/analysis/Analyzer.html">org.apache.lucene.analysis.Analyzer</a></code>.
|
||||
</p>
|
||||
<p>
|
||||
The particular <code><a href="api/org/apache/lucene/analysis/Analyzer.html">Analyzer</a></code> we
|
||||
are using, <code><a href="api/org/apache/lucene/analysis/standard/StandardAnalyzer.html">StandardAnalyzer</a></code>, is
|
||||
little more than a standard Java Tokenizer, converting all strings to lowercase and filtering out
|
||||
useless words and characters from the index. By useless words and characters I mean common language
|
||||
words such as articles (a, an, the, etc.) and other strings that would be useless for searching
|
||||
(e.g. <b>'s</b>) . It should be noted that there are different rules for every language, and you
|
||||
should use the proper analyzer for each. Lucene currently provides Analyzers for a number of
|
||||
different languages (see the <code>*Analyzer.java</code> sources under <a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/">contrib/analyzers/src/java/org/apache/lucene/analysis</a>).
|
||||
</p>
|
||||
<p>
|
||||
Looking further down in the file, you should see the <code>indexDocs()</code> code. This recursive
|
||||
function simply crawls the directories and uses <code><a href="api/org/apache/lucene/demo/FileDocument.html">FileDocument</a></code> to create <code><a href="api/org/apache/lucene/document/Document.html">Document</a></code> objects. The <code><a href="api/org/apache/lucene/document/Document.html">Document</a></code> is simply a data object to
|
||||
represent the content in the file as well as its creation time and location. These instances are
|
||||
added to the <code>indexWriter</code>. Take a look inside <code><a href="api/org/apache/lucene/demo/FileDocument.html">FileDocument</a></code>. It's not particularly
|
||||
complicated. It just adds fields to the <code><a href="api/org/apache/lucene/document/Document.html">Document</a></code>.
|
||||
</p>
|
||||
<p>
|
||||
As you can see there isn't much to creating an index. The devil is in the details. You may also
|
||||
wish to examine the other samples in this directory, particularly the <code><a href="api/org/apache/lucene/demo/IndexHTML.html">IndexHTML</a></code> class. It is a bit more
|
||||
complex but builds upon this example.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Searching Files"><strong>Searching Files</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
The <code><a href="api/org/apache/lucene/demo/SearchFiles.html">SearchFiles</a></code> class is
|
||||
quite simple. It primarily collaborates with an <code><a href="api/org/apache/lucene/search/IndexSearcher.html">IndexSearcher</a></code>, <code><a href="api/org/apache/lucene/analysis/standard/StandardAnalyzer.html">StandardAnalyzer</a></code>
|
||||
(which is used in the <code><a href="api/org/apache/lucene/demo/IndexFiles.html">IndexFiles</a></code> class as well) and a
|
||||
<code><a href="api/org/apache/lucene/queryParser/QueryParser.html">QueryParser</a></code>. The
|
||||
query parser is constructed with an analyzer used to interpret your query text in the same way the
|
||||
documents are interpreted: finding the end of words and removing useless words like 'a', 'an' and
|
||||
'the'. The <code><a href="api/org/apache/lucene/search/Query.html">Query</a></code> object contains
|
||||
the results from the <code><a href="api/org/apache/lucene/queryParser/QueryParser.html">QueryParser</a></code> which is passed to
|
||||
the searcher. Note that it's also possible to programmatically construct a rich <code><a href="api/org/apache/lucene/search/Query.html">Query</a></code> object without using the query
|
||||
parser. The query parser just enables decoding the <a href="queryparsersyntax.html">Lucene query
|
||||
syntax</a> into the corresponding <code><a href="api/org/apache/lucene/search/Query.html">Query</a></code> object. The searcher results are
|
||||
returned in a collection of Documents called <code><a href="api/org/apache/lucene/search/Hits.html">Hits</a></code> which is then iterated through and
|
||||
displayed to the user.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="The Web example..."><strong>The Web example...</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
<a href="demo3.html">read on>>></a>
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="#525D76" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
<!-- end the processing -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
292
docs/demo3.html
|
@ -1,292 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
|
||||
<!-- start the processing -->
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
<meta name="author" value="Andrew C. Oliver">
|
||||
<meta name="email" value="acoliver@apache.org">
|
||||
|
||||
|
||||
|
||||
|
||||
<title>Apache Lucene - Apache Lucene - Building and Installing the Basic Demo</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
<a href="http://lucene.apache.org/"><img src="./images/lucene_green_300.gif" alt="Apache Lucene" border="0"/></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
<p><strong>About</strong></p>
|
||||
<ul>
|
||||
<li> <a href="./index.html">Overview</a>
|
||||
</li>
|
||||
<li> <a href="./features.html">Features</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/PoweredBy">Powered by Lucene</a>
|
||||
</li>
|
||||
<li> <a href="./whoweare.html">Who We Are</a>
|
||||
</li>
|
||||
<li> <a href="./mailinglists.html">Mailing Lists</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Resources</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene">Wiki</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ">FAQ</a>
|
||||
</li>
|
||||
<li> <a href="./gettingstarted.html">Getting Started</a>
|
||||
</li>
|
||||
<li> <a href="./queryparsersyntax.html">Query Syntax</a>
|
||||
</li>
|
||||
<li> <a href="./fileformats.html">File Formats</a>
|
||||
</li>
|
||||
<li> <a href="./scoring.html">Scoring</a>
|
||||
</li>
|
||||
<li> <a href="./api/index.html">Javadoc</a>
|
||||
</li>
|
||||
<li> <a href="./contributions.html">Contributions</a>
|
||||
</li>
|
||||
<li> <a href="./benchmarks.html">Benchmarks</a>
|
||||
</li>
|
||||
<li> <a href="http://issues.apache.org/jira/browse/LUCENE">Issue Tracker</a>
|
||||
</li>
|
||||
<li> <a href="./lucene-sandbox/">Lucene Sandbox</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Download</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">Releases</a>
|
||||
</li>
|
||||
<li> <a href="http://svn.apache.org/viewcvs.cgi/lucene/java/">Source Repository</a>
|
||||
</li>
|
||||
</ul>
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="About this Document"><strong>About this Document</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
This document is intended as a "getting started" guide to installing and running the Lucene
|
||||
web application demo. This guide assumes that you have read the information in the previous two
|
||||
examples. We'll use Tomcat as our reference web container. These demos should work with nearly any
|
||||
container, but you may have to adapt them appropriately.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="About the Demos"><strong>About the Demos</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
The Lucene Web Application demo is a template web application intended for deployment on Tomcat or a
|
||||
similar web container. It's NOT designed as a "best practices" implementation by ANY means. It's
|
||||
more of a "hello world" type Lucene Web App. The purpose of this application is to demonstrate
|
||||
Lucene. With that being said, it should be relatively simple to create a small searchable website
|
||||
in Tomcat or a similar application server.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Indexing Files"><strong>Indexing Files</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p> Once you've gotten this far you're probably itching to go. Let's start by creating the index
|
||||
you'll need for the web examples. Since you've already set your CLASSPATH in the previous examples,
|
||||
all you need to do is type:
|
||||
|
||||
<pre>
|
||||
java org.apache.lucene.demo.IndexHTML -create -index {index-dir} ..
|
||||
</pre>
|
||||
|
||||
You'll need to do this from a (any) subdirectory of your <code>{tomcat}/webapps</code> directory
|
||||
(make sure you didn't leave off the <code>..</code> or you'll get a null pointer exception).
|
||||
<code>{index-dir}</code> should be a directory that Tomcat has permission to read and write, but is
|
||||
outside of a web accessible context. By default the webapp is configured to look in
|
||||
<code>/opt/lucene/index</code> for this index.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Deploying the Demos"><strong>Deploying the Demos</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Located in your distribution directory you should see a war file called
|
||||
<code>luceneweb.war</code>. If you're working with a Subversion checkout, this will be under the
|
||||
<code>build</code> subdirectory. Copy this to your <code>{tomcat-home}/webapps</code> directory.
|
||||
You may need to restart Tomcat. </p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Configuration"><strong>Configuration</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p> From your Tomcat directory look in the <code>webapps/luceneweb</code> subdirectory. If it's not
|
||||
present, try browsing to <code>http://localhost:8080/luceneweb</code> (which causes Tomcat to deploy
|
||||
the webapp), then look again. Edit a file called <code>configuration.jsp</code>. Ensure that the
|
||||
<code>indexLocation</code> is equal to the location you used for your index. You may also customize
|
||||
the <code>appTitle</code> and <code>appFooter</code> strings as you see fit. Once you have finished
|
||||
altering the configuration you may need to restart Tomcat. You may also wish to update the war file
|
||||
by typing <code>jar -uf luceneweb.war configuration.jsp</code> from the <code>luceneweb</code>
|
||||
subdirectory. (The -u option is not available in all versions of jar. In this case recreate the
|
||||
war file).
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Running the Demos"><strong>Running the Demos</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Now you're ready to roll. In your browser set the url to
|
||||
<code>http://localhost:8080/luceneweb</code> enter <code>test</code> and the number of items per
|
||||
page and press search.</p>
|
||||
<p>You should now be looking either at a number of results (provided you didn't erase the Tomcat
|
||||
examples) or nothing. If you get an error regarding opening the index, then you probably set the
|
||||
path in <code>configuration.jsp</code> incorrectly or Tomcat doesn't have permissions to the index
|
||||
(or you skipped the step of creating it). Try other search terms. Depending on the number of items
|
||||
per page you set and results returned, there may be a link at the bottom that says <b>More
|
||||
Results>></b>; clicking it takes you to subsequent pages. </p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="About the code..."><strong>About the code...</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
If you want to know more about how this web app works or how to customize it then <a href="demo4.html">read on>>></a>.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="#525D76" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
<!-- end the processing -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
361
docs/demo4.html
|
@ -1,361 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
|
||||
<!-- start the processing -->
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
<meta name="author" value="Andrew C. Oliver">
|
||||
<meta name="email" value="acoliver@apache.org">
|
||||
|
||||
|
||||
|
||||
|
||||
<title>Apache Lucene - Apache Lucene - Basic Demo Sources Walkthrough</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
<a href="http://lucene.apache.org/"><img src="./images/lucene_green_300.gif" alt="Apache Lucene" border="0"/></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
<p><strong>About</strong></p>
|
||||
<ul>
|
||||
<li> <a href="./index.html">Overview</a>
|
||||
</li>
|
||||
<li> <a href="./features.html">Features</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/PoweredBy">Powered by Lucene</a>
|
||||
</li>
|
||||
<li> <a href="./whoweare.html">Who We Are</a>
|
||||
</li>
|
||||
<li> <a href="./mailinglists.html">Mailing Lists</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Resources</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene">Wiki</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ">FAQ</a>
|
||||
</li>
|
||||
<li> <a href="./gettingstarted.html">Getting Started</a>
|
||||
</li>
|
||||
<li> <a href="./queryparsersyntax.html">Query Syntax</a>
|
||||
</li>
|
||||
<li> <a href="./fileformats.html">File Formats</a>
|
||||
</li>
|
||||
<li> <a href="./scoring.html">Scoring</a>
|
||||
</li>
|
||||
<li> <a href="./api/index.html">Javadoc</a>
|
||||
</li>
|
||||
<li> <a href="./contributions.html">Contributions</a>
|
||||
</li>
|
||||
<li> <a href="./benchmarks.html">Benchmarks</a>
|
||||
</li>
|
||||
<li> <a href="http://issues.apache.org/jira/browse/LUCENE">Issue Tracker</a>
|
||||
</li>
|
||||
<li> <a href="./lucene-sandbox/">Lucene Sandbox</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Download</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">Releases</a>
|
||||
</li>
|
||||
<li> <a href="http://svn.apache.org/viewcvs.cgi/lucene/java/">Source Repository</a>
|
||||
</li>
|
||||
</ul>
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="About the Code"><strong>About the Code</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
In this section we walk through the sources behind the basic Lucene Web Application demo: where to
|
||||
find them, their parts and their function. This section is intended for Java developers wishing to
|
||||
understand how to use Lucene in their applications or for those involved in deploying web
|
||||
applications based on Lucene.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Location of the source (developers/deployers)"><strong>Location of the source (developers/deployers)</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
Relative to the directory created when you extracted Lucene or retrieved it from Subversion, you
|
||||
should see a directory called <code>src</code> which in turn contains a directory called
|
||||
<code>jsp</code>. This is the root for all of the Lucene web demo.
|
||||
</p>
|
||||
<p>
|
||||
Within this directory you should see <code>index.jsp</code>. Bring this up in vi or your editor of
|
||||
choice.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="index.jsp (developers/deployers)"><strong>index.jsp (developers/deployers)</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
This jsp page is pretty boring by itself. All it does is include a header, display a form and
|
||||
include a footer. If you look at the form, it has two fields: <code>query</code> (where you enter
|
||||
your search criteria) and <code>maxresults</code> where you specify the number of results per page.
|
||||
By the structure of this JSP it should be easy to customize it without even editing this particular
|
||||
file. You could simply change the header and footer. Let's look at the <code>header.jsp</code>
|
||||
(located in the same directory) next.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="header.jsp (developers/deployers)"><strong>header.jsp (developers/deployers)</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
The header is also very simple by itself. The only thing it does is include the
|
||||
<code>configuration.jsp</code> (which you looked at in the last section of this guide) and set the
|
||||
title and a brief header. This would be a good place to put your own custom HTML to "pretty" things
|
||||
up a bit. We won't cover the footer because all it does is display the footer and close your tags.
|
||||
Let's look at the <code>results.jsp</code>, the meat of this application, next.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="results.jsp (developers)"><strong>results.jsp (developers)</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
Most of the functionality lies in <code>results.jsp</code>. Much of it is for paging the search
|
||||
results, which we'll not cover here as it's commented well enough. The first thing in this page is
|
||||
the actual imports for the Lucene classes and Lucene demo classes. These classes are loaded from
|
||||
the jars included in the <code>WEB-INF/lib</code> directory in the <code>luceneweb.war</code> file.
|
||||
</p>
|
||||
<p>
|
||||
You'll notice that this file includes the same header and footer as <code>index.jsp</code>. From
|
||||
there it constructs an <code><a href="api/org/apache/lucene/search/IndexSearcher.html">IndexSearcher</a></code> with the
|
||||
<code>indexLocation</code> that was specified in <code>configuration.jsp</code>. If there is an
|
||||
error of any kind in opening the index, it is displayed to the user and the boolean flag
|
||||
<code>error</code> is set to tell the rest of the sections of the jsp not to continue.
|
||||
</p>
|
||||
<p>
|
||||
From there, this jsp attempts to get the search criteria, the start index (used for paging) and the
|
||||
maximum number of results per page. If the maximum results per page is not set or not valid then it
|
||||
and the start index are set to default values. If only the start index is invalid it is set to a
|
||||
default value. If the criteria isn't provided then a servlet error is thrown (it is assumed that
|
||||
this is the result of url tampering or some form of browser malfunction).
|
||||
</p>
|
||||
<p>
|
||||
The jsp moves on to construct a <code><a href="api/org/apache/lucene/analysis/standard/StandardAnalyzer.html">StandardAnalyzer</a></code> to
|
||||
analyze the search text. This matches the analyzer used during indexing (<code><a href="api/org/apache/lucene/demo/IndexHTML.html">IndexHTML</a></code>), which is generally
|
||||
recommended. This is passed to the <code><a href="api/org/apache/lucene/queryParser/QueryParser.html">QueryParser</a></code> along with the
|
||||
criteria to construct a <code><a href="api/org/apache/lucene/search/Query.html">Query</a></code>
|
||||
object. You'll also notice the string literal <code>"contents"</code> included. This specifies
|
||||
that the search should cover the <code>contents</code> field and not the <code>title</code>,
|
||||
<code>url</code> or some other field in the indexed documents. If there is any error in
|
||||
constructing a <code><a href="api/org/apache/lucene/search/Query.html">Query</a></code> object an
|
||||
error is displayed to the user.
|
||||
</p>
|
||||
<p>
|
||||
In the next section of the jsp the <code><a href="api/org/apache/lucene/search/IndexSearcher.html">IndexSearcher</a></code> is asked to search
|
||||
given the query object. The results are returned in a collection called <code>hits</code>. If the
|
||||
length property of the <code>hits</code> collection is 0 (meaning there were no results) then an
|
||||
error is displayed to the user and the error flag is set.
|
||||
</p>
|
||||
<p>
|
||||
Finally the jsp iterates through the <code>hits</code> collection, taking the current page into
|
||||
account, and displays properties of the <code><a href="api/org/apache/lucene/document/Document.html">Document</a></code> objects we talked about in
|
||||
the first walkthrough. These objects contain "known" fields specific to their indexer (in this case
|
||||
<code><a href="api/org/apache/lucene/demo/IndexHTML.html">IndexHTML</a></code> constructs a document
|
||||
with "url", "title" and "contents").
|
||||
</p>
|
||||
<p>
|
||||
Please note that in a real deployment of Lucene, it's best to instantiate <code><a href="api/org/apache/lucene/search/IndexSearcher.html">IndexSearcher</a></code> and <code><a href="api/org/apache/lucene/queryParser/QueryParser.html">QueryParser</a></code> once, and then
|
||||
share them across search requests, instead of re-instantiating per search request.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="More sources (developers)"><strong>More sources (developers)</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
There are additional sources used by the web app that were not specifically covered by either
|
||||
walkthrough. For example the HTML parser, the <code><a href="api/org/apache/lucene/demo/IndexHTML.html">IndexHTML</a></code> class and <code><a href="api/org/apache/lucene/demo/HTMLDocument.html">HTMLDocument</a></code> class. These are very
|
||||
similar to the classes covered in the first example, with properties specific to parsing and
|
||||
indexing HTML. This is beyond our scope; however, by now you should feel like you're "getting
|
||||
started" with Lucene.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Where to go from here? (everyone!)"><strong>Where to go from here? (everyone!)</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
There are a number of things this demo doesn't do or doesn't do quite right. For instance, you may
|
||||
have noticed that documents in the root context are unreachable (unless you reconfigure Tomcat to
|
||||
support that context or redirect to it), anywhere where the directory doesn't quite match the
|
||||
context mapping, you'll have a broken link in your results. If you want to index non-local files or
|
||||
have some other needs this isn't supported, plus there may be security issues with running the
|
||||
indexing application from your webapps directory. There are a number of things left for you the
|
||||
developer to do.
|
||||
</p>
|
||||
<p>
|
||||
In time some of these things may be added to Lucene as features (if you've got a good idea we'd love
|
||||
to hear it!), but for now: this is where you begin and the search engine/indexer ends. Lastly, one
|
||||
would assume you'd want to follow the above advice and customize the application to look a little
|
||||
more fancy than black on white with "Lucene Template" at the top. We'll see you on the Lucene
|
||||
Users' or Developers' <a href="mailinglists.html">mailing lists</a>!
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="When to contact the Author"><strong>When to contact the Author</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
Please resist the urge to contact the authors of this document (without bribes of fame and fortune
|
||||
attached). First contact the <a href="mailinglists.html">mailing lists</a>, taking care to <a href="http://www.catb.org/~esr/faqs/smart-questions.html">Ask Questions The Smart Way</a>.
|
||||
Certainly you'll get the most help that way as well. That being said, feedback, and modifications
|
||||
to this document and samples are ever so greatly appreciated. They are just best sent to the lists
|
||||
or <a href="http://wiki.apache.org/jakarta-lucene/HowToContribute">posted as patches</a>, so that
|
||||
everyone can share in them. Thanks for understanding!
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="#525D76" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
<!-- end the processing -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,223 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
|
||||
<!-- start the processing -->
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<title>Apache Lucene - Features</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
<a href="http://lucene.apache.org/"><img src="./images/lucene_green_300.gif" alt="Apache Lucene" border="0"/></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
<p><strong>About</strong></p>
|
||||
<ul>
|
||||
<li> <a href="./index.html">Overview</a>
|
||||
</li>
|
||||
<li> <a href="./features.html">Features</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/PoweredBy">Powered by Lucene</a>
|
||||
</li>
|
||||
<li> <a href="./whoweare.html">Who We Are</a>
|
||||
</li>
|
||||
<li> <a href="./mailinglists.html">Mailing Lists</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Resources</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene">Wiki</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ">FAQ</a>
|
||||
</li>
|
||||
<li> <a href="./gettingstarted.html">Getting Started</a>
|
||||
</li>
|
||||
<li> <a href="./queryparsersyntax.html">Query Syntax</a>
|
||||
</li>
|
||||
<li> <a href="./fileformats.html">File Formats</a>
|
||||
</li>
|
||||
<li> <a href="./scoring.html">Scoring</a>
|
||||
</li>
|
||||
<li> <a href="./api/index.html">Javadoc</a>
|
||||
</li>
|
||||
<li> <a href="./contributions.html">Contributions</a>
|
||||
</li>
|
||||
<li> <a href="./benchmarks.html">Benchmarks</a>
|
||||
</li>
|
||||
<li> <a href="http://issues.apache.org/jira/browse/LUCENE">Issue Tracker</a>
|
||||
</li>
|
||||
<li> <a href="./lucene-sandbox/">Lucene Sandbox</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Download</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">Releases</a>
|
||||
</li>
|
||||
<li> <a href="http://svn.apache.org/viewcvs.cgi/lucene/java/">Source Repository</a>
|
||||
</li>
|
||||
</ul>
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Features"><strong>Features</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Lucene offers powerful features through a simple API:</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Scalable, High-Performance Indexing"><strong>Scalable, High-Performance Indexing</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<ul>
|
||||
<li>over 20MB/minute on Pentium M 1.5GHz<br /></li>
|
||||
<li>small RAM requirements -- only 1MB heap</li>
|
||||
<li>incremental indexing as fast as batch indexing</li>
|
||||
<li>index size roughly 20-30% the size of text indexed</li>
|
||||
</ul>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Powerful, Accurate and Efficient Search Algorithms"><strong>Powerful, Accurate and Efficient Search Algorithms</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<ul>
|
||||
<li>ranked searching -- best results returned first</li>
|
||||
<li>many powerful query types: phrase queries, wildcard queries, proximity
|
||||
queries, range queries and more</li>
|
||||
<li>fielded searching (e.g., title, author, contents)</li>
|
||||
<li>date-range searching</li>
|
||||
<li>sorting by any field</li>
|
||||
<li>multiple-index searching with merged results</li>
|
||||
<li>allows simultaneous update and searching</li>
|
||||
</ul>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Cross-Platform Solution"><strong>Cross-Platform Solution</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<ul>
|
||||
<li>Available as Open Source software under the
|
||||
<a href="http://www.apache.org/licenses/LICENSE-2.0.html">Apache License</a>
|
||||
which lets you use Lucene in both commercial and Open Source programs</li>
|
||||
<li>100%-pure Java</li>
|
||||
<li>implementations <a href="http://wiki.apache.org/jakarta-lucene/LuceneImplementations">in other
|
||||
programming languages available</a> that are index-compatible</li>
|
||||
</ul>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="#525D76" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
<!-- end the processing -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,195 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
|
||||
<!-- start the processing -->
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
<meta name="author" value="Andrew C. Oliver">
|
||||
<meta name="email" value="acoliver@apache.org">
|
||||
|
||||
|
||||
|
||||
|
||||
<title>Apache Lucene - Apache Lucene - Getting Started Guide</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
<a href="http://lucene.apache.org/"><img src="./images/lucene_green_300.gif" alt="Apache Lucene" border="0"/></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
<p><strong>About</strong></p>
|
||||
<ul>
|
||||
<li> <a href="./index.html">Overview</a>
|
||||
</li>
|
||||
<li> <a href="./features.html">Features</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/PoweredBy">Powered by Lucene</a>
|
||||
</li>
|
||||
<li> <a href="./whoweare.html">Who We Are</a>
|
||||
</li>
|
||||
<li> <a href="./mailinglists.html">Mailing Lists</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Resources</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene">Wiki</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ">FAQ</a>
|
||||
</li>
|
||||
<li> <a href="./gettingstarted.html">Getting Started</a>
|
||||
</li>
|
||||
<li> <a href="./queryparsersyntax.html">Query Syntax</a>
|
||||
</li>
|
||||
<li> <a href="./fileformats.html">File Formats</a>
|
||||
</li>
|
||||
<li> <a href="./scoring.html">Scoring</a>
|
||||
</li>
|
||||
<li> <a href="./api/index.html">Javadoc</a>
|
||||
</li>
|
||||
<li> <a href="./contributions.html">Contributions</a>
|
||||
</li>
|
||||
<li> <a href="./benchmarks.html">Benchmarks</a>
|
||||
</li>
|
||||
<li> <a href="http://issues.apache.org/jira/browse/LUCENE">Issue Tracker</a>
|
||||
</li>
|
||||
<li> <a href="./lucene-sandbox/">Lucene Sandbox</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Download</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">Releases</a>
|
||||
</li>
|
||||
<li> <a href="http://svn.apache.org/viewcvs.cgi/lucene/java/">Source Repository</a>
|
||||
</li>
|
||||
</ul>
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Getting Started"><strong>Getting Started</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
This document is intended as a "getting started" guide. It has three audiences: first-time users
|
||||
looking to install Apache Lucene in their application or web server; developers looking to modify or base
|
||||
the applications they develop on Lucene; and developers looking to become involved in and contribute
|
||||
to the development of Lucene. This document is written in tutorial and walk-through format. The
|
||||
goal is to help you "get started". It does not go into great depth on some of the conceptual or
|
||||
inner details of Lucene.
|
||||
</p>
|
||||
<p>
|
||||
Each section listed below builds on one another. More advanced users
|
||||
may wish to skip sections.
|
||||
</p>
|
||||
<ul>
|
||||
<li><a href="demo.html">About the command-line Lucene demo and its usage</a>. This section
|
||||
is intended for anyone who wants to use the command-line Lucene demo.</li> <p />
|
||||
|
||||
<li><a href="demo2.html">About the sources and implementation for the command-line Lucene
|
||||
demo</a>. This section walks through the implementation details (sources) of the
|
||||
command-line Lucene demo. This section is intended for developers.</li> <p />
|
||||
|
||||
<li><a href="demo3.html">About installing and configuring the demo template web
|
||||
application</a>. While this walk-through assumes Tomcat as your container of choice,
|
||||
there is no reason you can't (provided you have the requisite knowledge) adapt the
|
||||
instructions to your container. This section is intended for those responsible for the
|
||||
development or deployment of Lucene-based web applications.</li> <p />
|
||||
|
||||
<li><a href="demo4.html">About the sources used to construct the demo template web
|
||||
application</a>. Please note the template application is designed to highlight features of
|
||||
Lucene and is <b>not</b> an example of best practices. (One would hopefully use MVC
|
||||
architecture such as provided by Jakarta Struts and taglibs, but showing you how to do that
|
||||
would be WAY beyond the scope of this guide.) This section is intended for developers and
|
||||
those wishing to customize the demo template web application to their needs. </li>
|
||||
|
||||
</ul>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="#525D76" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
<!-- end the processing -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
255
docs/index.html
|
@ -1,255 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
|
||||
<!-- start the processing -->
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
<meta name="author" value="Jon S. Stevens">
|
||||
<meta name="email" value="jon at latchkey.com">
|
||||
<meta name="author" value="Ted Husted">
|
||||
<meta name="email" value="husted at apache.org">
|
||||
<meta name="author" value="Doug Cutting">
|
||||
<meta name="email" value="cutting at apache.org">
|
||||
<meta name="author" value="Peter Carlson">
|
||||
<meta name="email" value="carlson at apache.org">
|
||||
|
||||
|
||||
|
||||
|
||||
<title>Apache Lucene - Overview - Apache Lucene</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
<a href="http://lucene.apache.org/"><img src="./images/lucene_green_300.gif" alt="Apache Lucene" border="0"/></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
<p><strong>About</strong></p>
|
||||
<ul>
|
||||
<li> <a href="./index.html">Overview</a>
|
||||
</li>
|
||||
<li> <a href="./features.html">Features</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/PoweredBy">Powered by Lucene</a>
|
||||
</li>
|
||||
<li> <a href="./whoweare.html">Who We Are</a>
|
||||
</li>
|
||||
<li> <a href="./mailinglists.html">Mailing Lists</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Resources</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene">Wiki</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ">FAQ</a>
|
||||
</li>
|
||||
<li> <a href="./gettingstarted.html">Getting Started</a>
|
||||
</li>
|
||||
<li> <a href="./queryparsersyntax.html">Query Syntax</a>
|
||||
</li>
|
||||
<li> <a href="./fileformats.html">File Formats</a>
|
||||
</li>
|
||||
<li> <a href="./scoring.html">Scoring</a>
|
||||
</li>
|
||||
<li> <a href="./api/index.html">Javadoc</a>
|
||||
</li>
|
||||
<li> <a href="./contributions.html">Contributions</a>
|
||||
</li>
|
||||
<li> <a href="./benchmarks.html">Benchmarks</a>
|
||||
</li>
|
||||
<li> <a href="http://issues.apache.org/jira/browse/LUCENE">Issue Tracker</a>
|
||||
</li>
|
||||
<li> <a href="./lucene-sandbox/">Lucene Sandbox</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Download</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">Releases</a>
|
||||
</li>
|
||||
<li> <a href="http://svn.apache.org/viewcvs.cgi/lucene/java/">Source Repository</a>
|
||||
</li>
|
||||
</ul>
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Apache Lucene"><strong>Apache Lucene</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
Apache Lucene is a high-performance, full-featured text search engine
|
||||
library written entirely in Java. It is a technology suitable for nearly any
|
||||
application that requires full-text search, especially cross-platform.
|
||||
</p>
|
||||
<p>
|
||||
Apache Lucene is an open source project available for
|
||||
<a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">free download</a>.
|
||||
Please use the links on the left to access Lucene.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Lucene News"><strong>Lucene News</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<h3>26 May 2006 - Release 2.0.0 available </h3>
|
||||
<p>This is mostly a bugfix release from release 1.9.1.
|
||||
Note however that deprecated 1.x features have now
|
||||
been removed. Any code that compiles against Lucene
|
||||
1.9.1 without deprecation warnings should work without
|
||||
further changes with any 2.x release. For more
|
||||
information about this release, please read <a href="http://svn.apache.org/repos/asf/lucene/java/tags/lucene_2_0_0/CHANGES.txt">CHANGES.txt</a>.</p>
|
||||
<p>Binary and source distributions are
|
||||
available <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">here</a>.</p>
|
||||
<h3>2 March 2006 - Release 1.9.1 available </h3>
|
||||
<p>This fixes a serious bug in release 1.9-final. <a href="http://svn.apache.org/repos/asf/lucene/java/tags/lucene_1_9_1/CHANGES.txt">CHANGES.txt</a>
|
||||
for details.</p>
|
||||
<p>Binary and source distributions are
|
||||
available <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">here</a>.</p>
|
||||
<h3>27 February 2006 - 1.9 final available </h3>
|
||||
<p>This release has many improvements since release
|
||||
1.4.3, including new features, performance
|
||||
improvements, bug fixes, etc. See <a href="http://svn.apache.org/repos/asf/lucene/java/tags/lucene_1_9_final/CHANGES.txt">CHANGES.txt</a>
|
||||
for details.</p>
|
||||
<p>1.9 will be the last 1.x release. It is both
|
||||
back-compatible with 1.4.3 and forward-compatible with
|
||||
the upcoming 2.0 release. Many methods and classes in
|
||||
1.4.3 have been deprecated in 1.9 and will be removed
|
||||
in 2.0. Applications must compile against 1.9 without
|
||||
deprecation warnings before they are compatible with
|
||||
2.0.</p>
|
||||
<p>Binary and source distributions are
|
||||
available <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">here</a>.</p>
|
||||
<h3>26 January 2006 - Nightly builds available</h3>
|
||||
<p>Nightly builds of the current development version of Lucene, to be released as Lucene 1.9,
|
||||
are now available at <a href="http://cvs.apache.org/dist/lucene/java/nightly/">http://cvs.apache.org/dist/lucene/java/nightly/</a>.
|
||||
</p>
|
||||
<h3>28 October 2005 - Lucene at ApacheCon</h3>
|
||||
<p><a href="http://www.apachecon.com"><img src="http://apachecon.com/2005/US/logos/Conference135x59.jpg" /></a></p>
|
||||
<p>Monday, December 12, 2005 at 3pm by Grant Ingersoll:<br />
|
||||
Abstract:<br />
|
||||
Lucene is a high performance, scalable, cross-platform search engine that contains many advanced features that often go untapped by the majority of users. In this session, designed for those familiar with Lucene, we will examine some of Lucene's more advanced topics and their application, including:</p>
|
||||
<ol>
|
||||
<li>Term Vectors: Manual and Pseudo relevance feedback; Advanced document collection analysis for
|
||||
domain specialization</li>
|
||||
<li>Span Queries: Better phrase matching; Candidate Identification for Question Answering</li>
|
||||
<li>Tying it all Together: Building a search framework for experimentation and rapid deployment</li>
|
||||
<li>Case Studies from <a href="http://www.cnlp.org">CNLP</a>: Crosslingual/multilingual retrieval in Arabic, English and Dutch;
|
||||
Sublanguage specialization for commercial trouble ticket analysis; Passage retrieval and
|
||||
analysis for Question Answering application</li>
|
||||
</ol>
|
||||
<p>Topics 1 through 3 will provide technical details on implementing the advanced Lucene features, while the fourth topic will provide a broader context for understanding when and where to use these features.
|
||||
</p>
|
||||
<h3>14 February 2005 - Lucene moves to Apache top-level</h3>
|
||||
<p>Lucene has migrated from Apache's Jakarta project to the top-level. Along with this migration,
|
||||
the source code repository has been converted to Subversion. The migration is in progress with
|
||||
some loose ends. Please stay tuned!
|
||||
</p>
|
||||
<h3>December 2004 - <em>Lucene in Action</em> is published</h3>
|
||||
<a href="http://www.lucenebook.com/"><img border="0" align="left" src="images/lia_3d.jpg" /></a>
|
||||
<p>The first book dedicated solely to Lucene is published. The
|
||||
"search inside the book" feature implemented with Lucene can
|
||||
be seen at <a href="http://www.lucenebook.com/">lucenebook.com</a>.
|
||||
</p>
|
||||
<p style="clear: both;" />
|
||||
<h3>29 November 2004 - Lucene 1.4.3 Released</h3>
|
||||
<p>This fixes a few bugs in 1.4.2. See <a href="http://svn.apache.org/repos/asf/lucene/java/tags/lucene_1_4_3/CHANGES.txt">CHANGES.txt</a>
|
||||
for details. Binary and source distributions are
|
||||
available <a href="http://www.apache.org/dyn/closer.cgi/lucene/">here</a>. After choosing your mirror, navigate to the archive section via the java link.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="#525D76" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
<!-- end the processing -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,367 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
|
||||
<!-- start the processing -->
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
<meta name="author" value="Otis Gospodentic">
|
||||
<meta name="email" value="$au.getAttributeValue("email")">
|
||||
|
||||
|
||||
|
||||
|
||||
<title>Apache Lucene - Lucene Sandbox</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
<a href="http://lucene.apache.org/"><img src="../images/lucene_green_300.gif" alt="Apache Lucene" border="0"/></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
<p><strong>About</strong></p>
|
||||
<ul>
|
||||
<li> <a href="../index.html">Overview</a>
|
||||
</li>
|
||||
<li> <a href="../features.html">Features</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/PoweredBy">Powered by Lucene</a>
|
||||
</li>
|
||||
<li> <a href="../whoweare.html">Who We Are</a>
|
||||
</li>
|
||||
<li> <a href="../mailinglists.html">Mailing Lists</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Resources</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene">Wiki</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ">FAQ</a>
|
||||
</li>
|
||||
<li> <a href="../gettingstarted.html">Getting Started</a>
|
||||
</li>
|
||||
<li> <a href="../queryparsersyntax.html">Query Syntax</a>
|
||||
</li>
|
||||
<li> <a href="../fileformats.html">File Formats</a>
|
||||
</li>
|
||||
<li> <a href="../scoring.html">Scoring</a>
|
||||
</li>
|
||||
<li> <a href="../api/index.html">Javadoc</a>
|
||||
</li>
|
||||
<li> <a href="../contributions.html">Contributions</a>
|
||||
</li>
|
||||
<li> <a href="../benchmarks.html">Benchmarks</a>
|
||||
</li>
|
||||
<li> <a href="http://issues.apache.org/jira/browse/LUCENE">Issue Tracker</a>
|
||||
</li>
|
||||
<li> <a href="../lucene-sandbox/">Lucene Sandbox</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Download</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">Releases</a>
|
||||
</li>
|
||||
<li> <a href="http://svn.apache.org/viewcvs.cgi/lucene/java/">Source Repository</a>
|
||||
</li>
|
||||
</ul>
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Lucene Sandbox"><strong>Lucene Sandbox</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
Lucene project also contains a workspace, Lucene Sandbox, that is open to all Lucene committers, as well
|
||||
as a few other developers. The purpose of the Sandbox is to host various third party contributions,
|
||||
and to serve as a place to try out new ideas and prepare them for inclusion into the core Lucene
|
||||
distribution.<br />
|
||||
Users are free to experiment with the components developed in the Sandbox, but Sandbox components will
|
||||
not necessarily be maintained, particularly in their current state.
|
||||
</p>
|
||||
<p>
|
||||
You can access the Lucene Sandbox repository at
|
||||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/">http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/</a>.
|
||||
</p>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Snowball Stemmers for Lucene"><strong>Snowball Stemmers for Lucene</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
This project provides pre-compiled versions of the Snowball stemmers
|
||||
for Lucene.
|
||||
</p>
|
||||
<p>
|
||||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/snowball">The
|
||||
repository for the Snowball contribution.</a>
|
||||
</p>
|
||||
<p>
|
||||
<a href="http://snowball.tartarus.org/">Background information on Snowball</a>,
|
||||
which is a language for stemmers developed by Martin Porter.
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Analyzers, Tokenizers, Filters"><strong>Analyzers, Tokenizers, Filters</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
Contributed Analyzers, Tokenizers, and Filters for various languages.
|
||||
</p>
|
||||
<p>
|
||||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/analyzers/">The
|
||||
repository for the Analyzers contribution.</a>
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Ant"><strong>Ant</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
The Ant project is a useful Ant task that creates a Lucene index out of an Ant fileset. It also
|
||||
contains an example HTML parser that uses JTidy.
|
||||
</p>
|
||||
<p>
|
||||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/ant/">The
|
||||
repository for the Ant contribution.</a>
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="WordNet/Synonyms"><strong>WordNet/Synonyms</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
The Lucene WordNet code consists of a single class which parses a prolog file
|
||||
from the WordNet site that contains a list of English words and synonyms.
|
||||
The class builds a Lucene index from the synonyms file. Your querying code could
|
||||
hit this index to build up a set of synonyms for the terms in the
|
||||
search query.
|
||||
</p>
|
||||
<p>
|
||||
More information on the <a href="http://www.tropo.com/techno/java/lucene/wordnet.html">Lucene WordNet package</a>.
|
||||
<a href="http://wordnet.princeton.edu/">WordNet</a> is an online database of English language words that contains
|
||||
synonyms, definitions, and various relationships between synonym sets.
|
||||
</p>
|
||||
<p>
|
||||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/wordnet/">The
|
||||
repository for the WordNet module.</a>
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Lucli - Lucene Command-line Interface"><strong>Lucli - Lucene Command-line Interface</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
The Lucli application allows index manipulation from the
|
||||
command-line.
|
||||
</p>
|
||||
<p>
|
||||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/lucli/">The
|
||||
repository for the Lucli contribution.</a>
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Term Highlighter"><strong>Term Highlighter</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
A small set of classes for highlighting matching terms in
|
||||
search results.
|
||||
</p>
|
||||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/highlighter/">The
|
||||
repository for the Highlighter contribution.</a>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Javascript Query Constructor"><strong>Javascript Query Constructor</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
Javascript library to support client-side query-building. Provides support for a user interface similar to
|
||||
<a href="http://www.google.com.sg/advanced_search">Google's Advanced Search</a>.
|
||||
</p>
|
||||
<p>
|
||||
|
||||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/javascript/queryConstructor/">The
|
||||
repository for the Javascript Query Constructor files.</a>
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Javascript Query Validator"><strong>Javascript Query Validator</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
Javascript library to support client-side query validation. Lucene doesn't like malformed queries and tends to
|
||||
throw ParseException, which are often difficult to interpret and pass on to the user. This library hopes to
|
||||
alleviate that problem.
|
||||
</p>
|
||||
<p>
|
||||
|
||||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/javascript/queryValidator/">The
|
||||
repository for the Javascript Query Validator files.</a>
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="High Frequency Terms"><strong>High Frequency Terms</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
The miscellaneous package is for classes that don't fit anywhere else. The only class in it right now determines
|
||||
what terms occur the most inside a Lucene index. This could be useful for analyzing which terms may need to go
|
||||
into a custom stop word list for better search results.
|
||||
</p>
|
||||
<p>
|
||||
|
||||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/miscellaneous/">The
|
||||
repository for miscellaneous classes.</a>
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="#525D76" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
<!-- end the processing -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,285 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
|
||||
<!-- start the processing -->
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<title>Apache Lucene - Apache Lucene - Mailing Lists</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
<a href="http://lucene.apache.org/"><img src="./images/lucene_green_300.gif" alt="Apache Lucene" border="0"/></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
<p><strong>About</strong></p>
|
||||
<ul>
|
||||
<li> <a href="./index.html">Overview</a>
|
||||
</li>
|
||||
<li> <a href="./features.html">Features</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/PoweredBy">Powered by Lucene</a>
|
||||
</li>
|
||||
<li> <a href="./whoweare.html">Who We Are</a>
|
||||
</li>
|
||||
<li> <a href="./mailinglists.html">Mailing Lists</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Resources</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene">Wiki</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ">FAQ</a>
|
||||
</li>
|
||||
<li> <a href="./gettingstarted.html">Getting Started</a>
|
||||
</li>
|
||||
<li> <a href="./queryparsersyntax.html">Query Syntax</a>
|
||||
</li>
|
||||
<li> <a href="./fileformats.html">File Formats</a>
|
||||
</li>
|
||||
<li> <a href="./api/index.html">Javadoc</a>
|
||||
</li>
|
||||
<li> <a href="./contributions.html">Contributions</a>
|
||||
</li>
|
||||
<li> <a href="./benchmarks.html">Benchmarks</a>
|
||||
</li>
|
||||
<li> <a href="http://issues.apache.org/jira/browse/LUCENE">Issue Tracker</a>
|
||||
</li>
|
||||
<li> <a href="./lucene-sandbox/">Lucene Sandbox</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Download</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">Releases</a>
|
||||
</li>
|
||||
<li> <a href="http://svn.apache.org/viewcvs.cgi/lucene/java/">Source Repository</a>
|
||||
</li>
|
||||
</ul>
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Java User List"><strong>Java User List</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
This list is for users of Java Lucene to ask questions, share knowledge,
|
||||
and discuss issues.
|
||||
</p>
|
||||
<ul>
|
||||
<li><a href="mailto:java-user-subscribe@lucene.apache.org">Subscribe</a></li>
|
||||
<li><a href="mailto:java-user-unsubscribe@lucene.apache.org">Unsubscribe</a></li>
|
||||
<li><a href="http://mail-archives.apache.org/mod_mbox/lucene-java-user/">Archive</a>
|
||||
(<a href="http://mail-archives.apache.org/mod_mbox/jakarta-lucene-user/">old archive</a>)</li>
|
||||
<li><a href="http://www.gossamer-threads.com/lists/lucene/java-user/">Alternative
|
||||
archive with search feature</a></li>
|
||||
</ul>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Java Developer List"><strong>Java Developer List</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
This is the list where participating developers of the Java Lucene project meet
|
||||
and discuss issues, code changes/additions, etc. Do not send mail to this list
|
||||
with usage questions or configuration questions and problems.
|
||||
</p>
|
||||
<p>
|
||||
Discussion list:
|
||||
<ul>
|
||||
<li><a href="mailto:java-dev-subscribe@lucene.apache.org">Subscribe</a></li>
|
||||
<li><a href="mailto:java-dev-unsubscribe@lucene.apache.org">Unsubscribe</a></li>
|
||||
<li><a href="http://mail-archives.apache.org/mod_mbox/lucene-java-dev/">Archive</a>
|
||||
(<a href="http://mail-archives.apache.org/mod_mbox/jakarta-lucene-dev/">old archive</a>)</li>
|
||||
<li><a href="http://www.gossamer-threads.com/lists/lucene/java-dev/">Alternative
|
||||
archive with search feature</a></li>
|
||||
</ul>
|
||||
Commit notifications:
|
||||
<ul>
|
||||
<li><a href="mailto:java-commits-subscribe@lucene.apache.org">Subscribe</a></li>
|
||||
<li><a href="mailto:java-commits-unsubscribe@lucene.apache.org">Unsubscribe</a></li>
|
||||
<li><a href="http://mail-archives.apache.org/mod_mbox/lucene-java-commits/">Archive</a></li>
|
||||
</ul>
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Lucene4c Developer List"><strong>Lucene4c Developer List</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
This is the list where participating developers of the lucene4c
|
||||
project meet and disucss issues related to development of
|
||||
lucene4c. Do not send mail to this list with usage or
|
||||
configuration questions and problems.
|
||||
</p>
|
||||
<p>
|
||||
Discussion list:
|
||||
<ul>
|
||||
<li><a href="mailto:c-dev-subscribe@lucene.apache.org">Subscribe</a></li>
|
||||
<li><a href="mailto:c-dev-unsubscribe@lucene.apache.org">Unsubscribe</a></li>
|
||||
<li><a href="http://mail-archives.apache.org/mod_mbox/lucene-c-dev/">Archive</a></li>
|
||||
<li><a href="http://www.gossamer-threads.com/lists/lucene/c-dev/">Alternative
|
||||
archive with search feature</a></li>
|
||||
</ul>
|
||||
Commit notifications:
|
||||
<ul>
|
||||
<li><a href="mailto:c-commits-subscribe@lucene.apache.org">Subscribe</a></li>
|
||||
<li><a href="mailto:c-commits-unsubscribe@lucene.apache.org">Unsubscribe</a></li>
|
||||
<li><a href="http://mail-archives.apache.org/mod_mbox/lucene-c-commits/">Archive</a></li>
|
||||
</ul>
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Ruby Developer List"><strong>Ruby Developer List</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
Discussion list for developers of Ruby/SWIG Lucene.
|
||||
</p>
|
||||
<ul>
|
||||
<li><a href="mailto:ruby-dev-subscribe@lucene.apache.org">Subscribe</a></li>
|
||||
<li><a href="mailto:ruby-dev-unsubscribe@lucene.apache.org">Unsubscribe</a></li>
|
||||
<li><a href="http://mail-archives.apache.org/mod_mbox/lucene-ruby-dev/">Archive</a></li>
|
||||
<li><a href="http://www.gossamer-threads.com/lists/lucene/ruby-dev/">Alternative
|
||||
archive with search feature</a></li>
|
||||
</ul>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="General Lucene List"><strong>General Lucene List</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
General discussion concerning all Lucene subprojects.
|
||||
</p>
|
||||
<ul>
|
||||
<li><a href="mailto:general-subscribe@lucene.apache.org">Subscribe</a></li>
|
||||
<li><a href="mailto:general-unsubscribe@lucene.apache.org">Unsubscribe</a></li>
|
||||
<li><a href="http://mail-archives.apache.org/mod_mbox/lucene-general/">Archive</a></li>
|
||||
<li><a href="http://www.gossamer-threads.com/lists/lucene/general/">Alternative
|
||||
archive with search feature</a></li>
|
||||
</ul>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="#525D76" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
<!-- end the processing -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,933 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
|
||||
<!-- start the processing -->
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
<meta name="author" value="Peter Carlson">
|
||||
<meta name="email" value="carlson@apache.org">
|
||||
|
||||
|
||||
|
||||
|
||||
<title>Apache Lucene -
|
||||
Query Parser Syntax - Apache Lucene
|
||||
</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
<a href="http://lucene.apache.org/"><img src="./images/lucene_green_300.gif" alt="Apache Lucene" border="0"/></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
<p><strong>About</strong></p>
|
||||
<ul>
|
||||
<li> <a href="./index.html">Overview</a>
|
||||
</li>
|
||||
<li> <a href="./features.html">Features</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/PoweredBy">Powered by Lucene</a>
|
||||
</li>
|
||||
<li> <a href="./whoweare.html">Who We Are</a>
|
||||
</li>
|
||||
<li> <a href="./mailinglists.html">Mailing Lists</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Resources</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene">Wiki</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ">FAQ</a>
|
||||
</li>
|
||||
<li> <a href="./gettingstarted.html">Getting Started</a>
|
||||
</li>
|
||||
<li> <a href="./queryparsersyntax.html">Query Syntax</a>
|
||||
</li>
|
||||
<li> <a href="./fileformats.html">File Formats</a>
|
||||
</li>
|
||||
<li> <a href="./scoring.html">Scoring</a>
|
||||
</li>
|
||||
<li> <a href="./api/index.html">Javadoc</a>
|
||||
</li>
|
||||
<li> <a href="./contributions.html">Contributions</a>
|
||||
</li>
|
||||
<li> <a href="./benchmarks.html">Benchmarks</a>
|
||||
</li>
|
||||
<li> <a href="http://issues.apache.org/jira/browse/LUCENE">Issue Tracker</a>
|
||||
</li>
|
||||
<li> <a href="./lucene-sandbox/">Lucene Sandbox</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Download</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">Releases</a>
|
||||
</li>
|
||||
<li> <a href="http://svn.apache.org/viewcvs.cgi/lucene/java/">Source Repository</a>
|
||||
</li>
|
||||
</ul>
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Overview"><strong>Overview</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Although Lucene provides the ability to create your own
|
||||
queries through its API, it also provides a rich query
|
||||
language through the Query Parser, a lexer which
|
||||
interprets a string into a Lucene Query using JavaCC.
|
||||
</p>
|
||||
<p>This page provides the Query Parser syntax in Lucene 1.9.
|
||||
If you are using a different
|
||||
version of Lucene, please consult the copy of
|
||||
<code>docs/queryparsersyntax.html</code> that was distributed
|
||||
with the version you are using.
|
||||
</p>
|
||||
<p>
|
||||
Before choosing to use the provided Query Parser, please consider the following:
|
||||
<ol>
|
||||
<li>If you are programmatically generating a query string and then
|
||||
parsing it with the query parser then you should seriously consider building
|
||||
your queries directly with the query API. In other words, the query
|
||||
parser is designed for human-entered text, not for program-generated
|
||||
text.</li>
|
||||
|
||||
<li>Untokenized fields are best added directly to queries, and not
|
||||
through the query parser. If a field's values are generated programmatically
|
||||
by the application, then so should query clauses for this field.
|
||||
An analyzer, which the query parser uses, is designed to convert human-entered
|
||||
text to terms. Program-generated values, like dates, keywords, etc.,
|
||||
should be consistently program-generated.</li>
|
||||
|
||||
<li>In a query form, fields which are general text should use the query
|
||||
parser. All others, such as date ranges, keywords, etc. are better added
|
||||
directly through the query API. A field with a limit set of values,
|
||||
that can be specified with a pull-down menu should not be added to a
|
||||
query string which is subsequently parsed, but rather added as a
|
||||
TermQuery clause.</li>
|
||||
</ol>
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Terms"><strong>Terms</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>A query is broken up into terms and operators. There are two types of terms: Single Terms and Phrases.</p>
|
||||
<p>A Single Term is a single word such as "test" or "hello".</p>
|
||||
<p>A Phrase is a group of words surrounded by double quotes such as "hello dolly".</p>
|
||||
<p>Multiple terms can be combined together with Boolean operators to form a more complex query (see below).</p>
|
||||
<p>Note: The analyzer used to create the index will be used on the terms and phrases in the query string.
|
||||
So it is important to choose an analyzer that will not interfere with the terms used in the query string.</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Fields"><strong>Fields</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Lucene supports fielded data. When performing a search you can either specify a field, or use the default field. The field names and default field is implementation specific.</p>
|
||||
<p>You can search any field by typing the field name followed by a colon ":" and then the term you are looking for. </p>
|
||||
<p>As an example, let's assume a Lucene index contains two fields, title and text and text is the default field.
|
||||
If you want to find the document entitled "The Right Way" which contains the text "don't go this way", you can enter: </p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>title:"The Right Way" AND text:go</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<p>or</p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>title:"Do it right" AND right</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<p>Since text is the default field, the field indicator is not required.</p>
|
||||
<p>Note: The field is only valid for the term that it directly precedes, so the query</p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>title:Do it right</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<p>Will only find "Do" in the title field. It will find "it" and "right" in the default field (in this case the text field). </p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Term Modifiers"><strong>Term Modifiers</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Lucene supports modifying query terms to provide a wide range of searching options.</p>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Wildcard Searches"><strong>Wildcard Searches</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Lucene supports single and multiple character wildcard searches.</p>
|
||||
<p>To perform a single character wildcard search use the "?" symbol.</p>
|
||||
<p>To perform a multiple character wildcard search use the "*" symbol.</p>
|
||||
<p>The single character wildcard search looks for terms that match that with the single character replaced. For example, to search for "text" or "test" you can use the search:</p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>te?t</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<p>Multiple character wildcard searches looks for 0 or more characters. For example, to search for test, tests or tester, you can use the search: </p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>test*</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<p>You can also use the wildcard searches in the middle of a term.</p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>te*t</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<p>Note: You cannot use a * or ? symbol as the first character of a search.</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Fuzzy Searches"><strong>Fuzzy Searches</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Lucene supports fuzzy searches based on the Levenshtein Distance, or Edit Distance algorithm. To do a fuzzy search use the tilde, "~", symbol at the end of a Single word Term. For example to search for a term similar in spelling to "roam" use the fuzzy search: </p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>roam~</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<p>This search will find terms like foam and roams.</p>
|
||||
<p>Starting with Lucene 1.9 an additional (optional) parameter can specify the required similarity. The value is between 0 and 1, with a value closer to 1 only terms with a higher similarity will be matched. For example:</p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>roam~0.8</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<p>The default that is used if the parameter is not given is 0.5.</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Proximity Searches"><strong>Proximity Searches</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Lucene supports finding words are a within a specific distance away. To do a proximity search use the tilde, "~", symbol at the end of a Phrase. For example to search for a "apache" and "jakarta" within 10 words of each other in a document use the search: </p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>"jakarta apache"~10</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Range Searches"><strong>Range Searches</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Range Queries allow one to match documents whose field(s) values
|
||||
are between the lower and upper bound specified by the Range Query.
|
||||
Range Queries can be inclusive or exclusive of the upper and lower bounds.
|
||||
Sorting is done lexicographically.</p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>mod_date:[20020101 TO 20030101]</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<p>This will find documents whose mod_date fields have values between 20020101 and 20030101, inclusive.
|
||||
Note that Range Queries are not reserved for date fields. You could also use range queries with non-date fields:</p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>title:{Aida TO Carmen}</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<p>This will find all documents whose titles are between Aida and Carmen, but not including Aida and Carmen.</p>
|
||||
<p>Inclusive range queries are denoted by square brackets. Exclusive range queries are denoted by
|
||||
curly brackets.</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Boosting a Term"><strong>Boosting a Term</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Lucene provides the relevance level of matching documents based on the terms found. To boost a term use the caret, "^", symbol with a boost factor (a number) at the end of the term you are searching. The higher the boost factor, the more relevant the term will be.</p>
|
||||
<p>Boosting allows you to control the relevance of a document by boosting its term. For example, if you are searching for</p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>jakarta apache</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<p>and you want the term "jakarta" to be more relevant boost it using the ^ symbol along with the boost factor next to the term.
|
||||
You would type:</p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>jakarta^4 apache</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<p>This will make documents with the term jakarta appear more relevant. You can also boost Phrase Terms as in the example: </p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>"jakarta apache"^4 "Apache Lucene"</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<p>By default, the boost factor is 1. Although the boost factor must be positive, it can be less than 1 (e.g. 0.2)</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Boolean operators"><strong>Boolean operators</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Boolean operators allow terms to be combined through logic operators.
|
||||
Lucene supports AND, "+", OR, NOT and "-" as Boolean operators(Note: Boolean operators must be ALL CAPS).</p>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="OR"><strong>OR</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>The OR operator is the default conjunction operator. This means that if there is no Boolean operator between two terms, the OR operator is used.
|
||||
The OR operator links two terms and finds a matching document if either of the terms exist in a document. This is equivalent to a union using sets.
|
||||
The symbol || can be used in place of the word OR.</p>
|
||||
<p>To search for documents that contain either "jakarta apache" or just "jakarta" use the query:</p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>"jakarta apache" jakarta</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<p>or</p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>"jakarta apache" OR jakarta</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="AND"><strong>AND</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>The AND operator matches documents where both terms exist anywhere in the text of a single document.
|
||||
This is equivalent to an intersection using sets. The symbol && can be used in place of the word AND.</p>
|
||||
<p>To search for documents that contain "jakarta apache" and "Apache Lucene" use the query: </p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>"jakarta apache" AND "Apache Lucene"</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="+"><strong>+</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>The "+" or required operator requires that the term after the "+" symbol exist somewhere in a the field of a single document.</p>
|
||||
<p>To search for documents that must contain "jakarta" and may contain "lucene" use the query:</p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>+jakarta apache</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="NOT"><strong>NOT</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>The NOT operator excludes documents that contain the term after NOT.
|
||||
This is equivalent to a difference using sets. The symbol ! can be used in place of the word NOT.</p>
|
||||
<p>To search for documents that contain "jakarta apache" but not "Apache Lucene" use the query: </p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>"jakarta apache" NOT "Apache Lucene"</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<p>Note: The NOT operator cannot be used with just one term. For example, the following search will return no results:</p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>NOT "jakarta apache"</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="-"><strong>-</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>The "-" or prohibit operator excludes documents that contain the term after the "-" symbol.</p>
|
||||
<p>To search for documents that contain "jakarta apache" but not "Apache Lucene" use the query: </p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>"jakarta apache" -"Apache Lucene"</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Grouping"><strong>Grouping</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Lucene supports using parentheses to group clauses to form sub queries. This can be very useful if you want to control the boolean logic for a query.</p>
|
||||
<p>To search for either "jakarta" or "apache" and "website" use the query:</p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>(jakarta OR apache) AND website</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<p>This eliminates any confusion and makes sure you that website must exist and either term jakarta or apache may exist.</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Field Grouping"><strong>Field Grouping</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Lucene supports using parentheses to group multiple clauses to a single field.</p>
|
||||
<p>To search for a title that contains both the word "return" and the phrase "pink panther" use the query:</p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>title:(+return +"pink panther")</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Escaping Special Characters"><strong>Escaping Special Characters</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Lucene supports escaping special characters that are part of the query syntax. The current list special characters are</p>
|
||||
<p>+ - && || ! ( ) { } [ ] ^ " ~ * ? : \</p>
|
||||
<p>To escape these character use the \ before the character. For example to search for (1+1):2 use the query:</p>
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>\(1\+1\)\:2</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="#525D76" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
<!-- end the processing -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,162 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
|
||||
<!-- start the processing -->
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
<meta name="author" value="Doug Cutting">
|
||||
<meta name="email" value="cutting@apache.org">
|
||||
|
||||
|
||||
|
||||
|
||||
<title>Apache Lucene - Resources - Apache Lucene</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
<a href="http://lucene.apache.org/"><img src="./images/lucene_green_300.gif" alt="Apache Lucene" border="0"/></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
<p><strong>About</strong></p>
|
||||
<ul>
|
||||
<li> <a href="./index.html">Overview</a>
|
||||
</li>
|
||||
<li> <a href="./features.html">Features</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/PoweredBy">Powered by Lucene</a>
|
||||
</li>
|
||||
<li> <a href="./whoweare.html">Who We Are</a>
|
||||
</li>
|
||||
<li> <a href="./mailinglists.html">Mailing Lists</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Resources</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene">Wiki</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ">FAQ</a>
|
||||
</li>
|
||||
<li> <a href="./gettingstarted.html">Getting Started</a>
|
||||
</li>
|
||||
<li> <a href="./queryparsersyntax.html">Query Syntax</a>
|
||||
</li>
|
||||
<li> <a href="./fileformats.html">File Formats</a>
|
||||
</li>
|
||||
<li> <a href="./scoring.html">Scoring</a>
|
||||
</li>
|
||||
<li> <a href="./api/index.html">Javadoc</a>
|
||||
</li>
|
||||
<li> <a href="./contributions.html">Contributions</a>
|
||||
</li>
|
||||
<li> <a href="./benchmarks.html">Benchmarks</a>
|
||||
</li>
|
||||
<li> <a href="http://issues.apache.org/jira/browse/LUCENE">Issue Tracker</a>
|
||||
</li>
|
||||
<li> <a href="./lucene-sandbox/">Lucene Sandbox</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Download</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">Releases</a>
|
||||
</li>
|
||||
<li> <a href="http://svn.apache.org/viewcvs.cgi/lucene/java/">Source Repository</a>
|
||||
</li>
|
||||
</ul>
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Page moved"><strong>Page moved</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<a href="http://wiki.apache.org/jakarta-lucene/Resources">This page is now part of the Wiki</a>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="#525D76" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
<!-- end the processing -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,552 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
|
||||
<!-- start the processing -->
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
<meta name="author" value="Grant Ingersoll">
|
||||
<meta name="email" value="gsingers at apache.org">
|
||||
|
||||
|
||||
|
||||
|
||||
<title>Apache Lucene - Scoring - Apache Lucene</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
<a href="http://lucene.apache.org/"><img src="./images/lucene_green_300.gif" alt="Apache Lucene" border="0"/></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
<p><strong>About</strong></p>
|
||||
<ul>
|
||||
<li> <a href="./index.html">Overview</a>
|
||||
</li>
|
||||
<li> <a href="./features.html">Features</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/PoweredBy">Powered by Lucene</a>
|
||||
</li>
|
||||
<li> <a href="./whoweare.html">Who We Are</a>
|
||||
</li>
|
||||
<li> <a href="./mailinglists.html">Mailing Lists</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Resources</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene">Wiki</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ">FAQ</a>
|
||||
</li>
|
||||
<li> <a href="./gettingstarted.html">Getting Started</a>
|
||||
</li>
|
||||
<li> <a href="./queryparsersyntax.html">Query Syntax</a>
|
||||
</li>
|
||||
<li> <a href="./fileformats.html">File Formats</a>
|
||||
</li>
|
||||
<li> <a href="./scoring.html">Scoring</a>
|
||||
</li>
|
||||
<li> <a href="./api/index.html">Javadoc</a>
|
||||
</li>
|
||||
<li> <a href="./contributions.html">Contributions</a>
|
||||
</li>
|
||||
<li> <a href="./benchmarks.html">Benchmarks</a>
|
||||
</li>
|
||||
<li> <a href="http://issues.apache.org/jira/browse/LUCENE">Issue Tracker</a>
|
||||
</li>
|
||||
<li> <a href="./lucene-sandbox/">Lucene Sandbox</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Download</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">Releases</a>
|
||||
</li>
|
||||
<li> <a href="http://svn.apache.org/viewcvs.cgi/lucene/java/">Source Repository</a>
|
||||
</li>
|
||||
</ul>
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Introduction"><strong>Introduction</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Lucene scoring is the heart of why we all love Lucene. It is blazingly fast and it hides almost all of the complexity from the user.
|
||||
In a nutshell, it works. At least, that is, until it doesn't work, or doesn't work as one would expect it to
|
||||
work. Then we are left digging into Lucene internals or asking for help on java-user@lucene.apache.org to figure out why a document with five of our query terms
|
||||
scores lower than a different document with only one of the query terms. </p>
|
||||
<p>While this document won't answer your specific scoring issues, it will, hopefully, point you to the places that can
|
||||
help you figure out the what and why of Lucene scoring.</p>
|
||||
<p>Lucene scoring uses a combination of the
|
||||
<a href="http://en.wikipedia.org/wiki/Vector_Space_Model">Vector Space Model (VSM) of Information
|
||||
Retrieval</a> and the <a href="http://en.wikipedia.org/wiki/Standard_Boolean_model">Boolean model</a>
|
||||
to determine
|
||||
how relevant a given Document is to a User's query. In general, the idea behind the VSM is the more
|
||||
times a query term appears in a document relative to
|
||||
the number of times the term appears in all the documents in the collection, the more relevant that
|
||||
document is to the query. It uses the Boolean model to first narrow down the documents that need to
|
||||
be scored based on the use of boolean logic in the Query specification. Lucene also adds some
|
||||
capabilities and refinements onto this model to support boolean and fuzzy searching, but it
|
||||
essentially remains a VSM based system at the heart.
|
||||
For some valuable references on VSM and IR in general refer to the
|
||||
<a href="http://wiki.apache.org/jakarta-lucene/InformationRetrieval">Lucene Wiki IR references</a>.
|
||||
</p>
|
||||
<p>The rest of this document will cover <a href="#Scoring">Scoring</a> basics and how to change your
|
||||
<a href="api/org/apache/lucene/search/Similarity.html">Similarity</a>. Next it will cover ways you can
|
||||
customize the Lucene internals in <a href="#Changing your Scoring -- Expert Level">Changing your Scoring
|
||||
-- Expert Level</a> which gives details on implementing your own
|
||||
<a href="api/org/apache/lucene/search/Query.html">Query</a> class and related functionality. Finally, we
|
||||
will finish up with some reference material in the <a href="#Appendix">Appendix</a>.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Scoring"><strong>Scoring</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Scoring is very much dependent on the way documents are indexed,
|
||||
so it is important to understand indexing (see
|
||||
<a href="gettingstarted.html">Apache Lucene - Getting Started Guide</a>
|
||||
and the Lucene
|
||||
<a href="fileformats.html">file formats</a>
|
||||
before continuing on with this section.) It is also assumed that readers know how to use the
|
||||
<a href="api/org/apache/lucene/search/Searcher.html#explain(Query query, int doc)">Searcher.explain(Query query, int doc)</a> functionality,
|
||||
which can go a long way in informing why a score is returned.
|
||||
</p>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Fields and Documents"><strong>Fields and Documents</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>In Lucene, the objects we are scoring are
|
||||
<a href="api/org/apache/lucene/document/Document.html">Documents</a>. A Document is a collection
|
||||
of
|
||||
<a href="api/org/apache/lucene/document/Field.html">Fields</a>. Each Field has semantics about how
|
||||
it is created and stored (i.e. tokenized, untokenized, raw data, compressed, etc.) It is important to
|
||||
note that Lucene scoring works on Fields and then combines the results to return Documents. This is
|
||||
important because two Documents with the exact same content, but one having the content in two Fields
|
||||
and the other in one Field will return different scores for the same query due to length normalization
|
||||
(assumming the
|
||||
<a href="api/org/apache/lucene/search/DefaultSimilarity.html">DefaultSimilarity</a>
|
||||
on the Fields).
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Score Boosting"><strong>Score Boosting</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Lucene allows influencing search results by "boosting" in more than one level:
|
||||
<ul>
|
||||
<li><b>Document level boosting</b>
|
||||
- while indexing - by calling
|
||||
<a href="api/org/apache/lucene/document/Document.html#setBoost(float)">document.setBoost()</a>
|
||||
before a document is added to the index.
|
||||
</li>
|
||||
<li><b>Document's Field level boosting</b>
|
||||
- while indexing - by calling
|
||||
<a href="api/org/apache/lucene/document/Fieldable.html#setBoost(float)">field.setBoost()</a>
|
||||
before adding a field to the document (and before adding the document to the index).
|
||||
</li>
|
||||
<li><b>Query level boosting</b>
|
||||
- during search, by setting a boost on a query clause, calling
|
||||
<a href="api/org/apache/lucene/search/Query.html#setBoost(float)">Query.setBoost()</a>.
|
||||
</li>
|
||||
</ul>
|
||||
</p>
|
||||
<p>Indexing time boosts are preprocessed for storage efficiency and written to
|
||||
the directory (when writing the document) in a single byte (!) as follows:
|
||||
For each field of a document, all boosts of that field
|
||||
(i.e. all boosts under the same field name in that doc) are multiplied.
|
||||
The result is multiplied by the boost of the document,
|
||||
and also multiplied by a "field length norm" value
|
||||
that represents the length of that field in that doc
|
||||
(so shorter fields are automatically boosted up).
|
||||
The result is decoded as a single byte
|
||||
(with some precision loss of course) and stored in the directory.
|
||||
The similarity object in effect at indexing computes the length-norm of the field.
|
||||
</p>
|
||||
<p>This composition of 1-byte representation of norms
|
||||
(that is, indexing time multiplication of field boosts & doc boost & field-length-norm)
|
||||
is nicely described in
|
||||
<a href="api/org/apache/lucene/document/Fieldable.html#setBoost(float)">Fieldable.setBoost()</a>.
|
||||
</p>
|
||||
<p>Encoding and decoding of the resulted float norm in a single byte are done by the
|
||||
static methods of the class Similarity:
|
||||
<a href="api/org/apache/lucene/search/Similarity.html#encodeNorm(float)">encodeNorm()</a> and
|
||||
<a href="api/org/apache/lucene/search/Similarity.html#decodeNorm(byte)">decodeNorm()</a>.
|
||||
Due to loss of precision, it is not guaranteed that decode(encode(x)) = x,
|
||||
e.g. decode(encode(0.89)) = 0.75.
|
||||
At scoring (search) time, this norm is brought into the score of document
|
||||
as <b>norm(t, d)</b>, as shown by the formula in
|
||||
<a href="api/org/apache/lucene/search/Similarity.html">Similarity</a>.
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Understanding the Scoring Formula"><strong>Understanding the Scoring Formula</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
This scoring formula is described in the
|
||||
<a href="api/org/apache/lucene/search/Similarity.html">Similarity</a> class. Please take the time to study this formula, as it contains much of the information about how the
|
||||
basics of Lucene scoring work, especially the
|
||||
<a href="api/org/apache/lucene/search/TermQuery.html">TermQuery</a>.
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="The Big Picture"><strong>The Big Picture</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>OK, so the tf-idf formula and the
|
||||
<a href="api/org/apache/lucene/search/Similarity.html">Similarity</a>
|
||||
is great for understanding the basics of Lucene scoring, but what really drives Lucene scoring are
|
||||
the use and interactions between the
|
||||
<a href="api/org/apache/lucene/search/Query.html">Query</a> classes, as created by each application in
|
||||
response to a user's information need.
|
||||
</p>
|
||||
<p>In this regard, Lucene offers a wide variety of <a href="api/org/apache/lucene/search/Query.html">Query</a> implementations, most of which are in the
|
||||
<a href="api/org/apache/lucene/search/package-summary.html">org.apache.lucene.search</a> package.
|
||||
These implementations can be combined in a wide variety of ways to provide complex querying
|
||||
capabilities along with
|
||||
information about where matches took place in the document collection. The <a href="#Query Classes">Query</a>
|
||||
section below
|
||||
highlights some of the more important Query classes. For information on the other ones, see the
|
||||
<a href="api/org/apache/lucene/search/package-summary.html">package summary</a>. For details on implementing
|
||||
your own Query class, see <a href="#Changing your Scoring -- Expert Level">Changing your Scoring --
|
||||
Expert Level</a> below.
|
||||
</p>
|
||||
<p>Once a Query has been created and submitted to the
|
||||
<a href="api/org/apache/lucene/search/IndexSearcher.html">IndexSearcher</a>, the scoring process
|
||||
begins. (See the <a href="#Appendix">Appendix</a> Algorithm section for more notes on the process.) After some infrastructure setup,
|
||||
control finally passes to the <a href="api/org/apache/lucene/search/Weight.html">Weight</a> implementation and its
|
||||
<a href="api/org/apache/lucene/search/Scorer.html">Scorer</a> instance. In the case of any type of
|
||||
<a href="api/org/apache/lucene/search/BooleanQuery.html">BooleanQuery</a>, scoring is handled by the
|
||||
<a href="http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/BooleanQuery.java?view=log">BooleanWeight2</a> (link goes to ViewVC BooleanQuery java code which contains the BooleanWeight2 inner class),
|
||||
unless the static
|
||||
<a href="api/org/apache/lucene/search/BooleanQuery.html#setUseScorer14(boolean)">
|
||||
BooleanQuery#setUseScorer14(boolean)</a> method is set to true,
|
||||
in which case the
|
||||
<a href="http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/BooleanQuery.java?view=log">BooleanWeight</a>
|
||||
(link goes to ViewVC BooleanQuery java code, which contains the BooleanWeight inner class) from the 1.4 version of Lucene is used by default.
|
||||
See <a href="http://svn.apache.org/repos/asf/lucene/java/trunk/CHANGES.txt">CHANGES.txt</a> under release 1.9 RC1 for more information on choosing which Scorer to use.
|
||||
</p>
|
||||
<p>
|
||||
Assuming the use of the BooleanWeight2, a
|
||||
BooleanScorer2 is created by bringing together
|
||||
all of the
|
||||
<a href="api/org/apache/lucene/search/Scorer.html">Scorer</a>s from the sub-clauses of the BooleanQuery.
|
||||
When the BooleanScorer2 is asked to score it delegates its work to an internal Scorer based on the type
|
||||
of clauses in the Query. This internal Scorer essentially loops over the sub scorers and sums the scores
|
||||
provided by each scorer while factoring in the coord() score.
|
||||
<!-- Do we want to fill in the details of the counting sum scorer, disjunction scorer, etc.? -->
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Query Classes"><strong>Query Classes</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>For information on the Query Classes, refer to the
|
||||
<a href="api/org/apache/lucene/search/package-summary.html#query">search package javadocs</a>
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Changing Similarity"><strong>Changing Similarity</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>One of the ways of changing the scoring characteristics of Lucene is to change the similarity factors. For information on
|
||||
how to do this, see the
|
||||
<a href="api/org/apache/lucene/search/package-summary.html#changingSimilarity">search package javadocs</a></p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Changing your Scoring -- Expert Level"><strong>Changing your Scoring -- Expert Level</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>At a much deeper level, one can affect scoring by implementing their own Query classes (and related scoring classes.) To learn more
|
||||
about how to do this, refer to the
|
||||
<a href="api/org/apache/lucene/search/package-summary.html#scoring">search package javadocs</a>
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Appendix"><strong>Appendix</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Class Diagrams"><strong>Class Diagrams</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
<a href="http://wiki.apache.org/jakarta-lucene/KarlWettin?action=AttachFile&do=view&target=search_uml_1.jpg">
|
||||
Karl Wettin's UML on the Wiki</a>
|
||||
</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Sequence Diagrams"><strong>Sequence Diagrams</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p class="highlight-for-editing">FILL IN HERE. Volunteers?</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#828DA6">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Algorithm"><strong>Algorithm</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>GSI Note: This section is mostly my notes on stepping through the Scoring process and serves as
|
||||
fertilizer for the earlier sections.</p>
|
||||
<p>In the typical search application, a
|
||||
<a href="api/org/apache/lucene/search/Query.html">Query</a>
|
||||
is passed to the
|
||||
<a href="api/org/apache/lucene/search/Searcher.html">Searcher</a>
|
||||
, beginning the scoring process.
|
||||
</p>
|
||||
<p>Once inside the Searcher, a
|
||||
<a href="api/org/apache/lucene/search/Hits.html">Hits</a>
|
||||
object is constructed, which handles the scoring and caching of the search results.
|
||||
The Hits constructor stores references to three or four important objects:
|
||||
<ol>
|
||||
<li>The
|
||||
<a href="api/org/apache/lucene/search/Weight.html">Weight</a>
|
||||
object of the Query. The Weight object is an internal representation of the Query that
|
||||
allows the Query to be reused by the Searcher.
|
||||
</li>
|
||||
<li>The Searcher that initiated the call.</li>
|
||||
<li>A
|
||||
<a href="api/org/apache/lucene/search/Filter.html">Filter</a>
|
||||
for limiting the result set. Note, the Filter may be null.
|
||||
</li>
|
||||
<li>A
|
||||
<a href="api/org/apache/lucene/search/Sort.html">Sort</a>
|
||||
object for specifying how to sort the results if the standard score based sort method is not
|
||||
desired.
|
||||
</li>
|
||||
</ol>
|
||||
</p>
|
||||
<p>Now that the Hits object has been initialized, it begins the process of identifying documents that
|
||||
match the query by calling getMoreDocs method. Assuming we are not sorting (since sorting doesn't
|
||||
effect the raw Lucene score),
|
||||
we call on the "expert" search method of the Searcher, passing in our
|
||||
<a href="api/org/apache/lucene/search/Weight.html">Weight</a>
|
||||
object,
|
||||
<a href="api/org/apache/lucene/search/Filter.html">Filter</a>
|
||||
and the number of results we want. This method
|
||||
returns a
|
||||
<a href="api/org/apache/lucene/search/TopDocs.html">TopDocs</a>
|
||||
object, which is an internal collection of search results.
|
||||
The Searcher creates a
|
||||
<a href="api/org/apache/lucene/search/TopDocCollector.html">TopDocCollector</a>
|
||||
and passes it along with the Weight, Filter to another expert search method (for more on the
|
||||
<a href="api/org/apache/lucene/search/HitCollector.html">HitCollector</a>
|
||||
mechanism, see
|
||||
<a href="api/org/apache/lucene/search/Searcher.html">Searcher</a>
|
||||
.) The TopDocCollector uses a
|
||||
<a href="api/org/apache/lucene/util/PriorityQueue.html">PriorityQueue</a>
|
||||
to collect the top results for the search.
|
||||
</p>
|
||||
<p>If a Filter is being used, some initial setup is done to determine which docs to include. Otherwise,
|
||||
we ask the Weight for
|
||||
a
|
||||
<a href="api/org/apache/lucene/search/Scorer.html">Scorer</a>
|
||||
for the
|
||||
<a href="api/org/apache/lucene/index/IndexReader.html">IndexReader</a>
|
||||
of the current searcher and we proceed by
|
||||
calling the score method on the
|
||||
<a href="api/org/apache/lucene/search/Scorer.html">Scorer</a>
|
||||
.
|
||||
</p>
|
||||
<p>At last, we are actually going to score some documents. The score method takes in the HitCollector
|
||||
(most likely the TopDocCollector) and does its business.
|
||||
Of course, here is where things get involved. The
|
||||
<a href="api/org/apache/lucene/search/Scorer.html">Scorer</a>
|
||||
that is returned by the
|
||||
<a href="api/org/apache/lucene/search/Weight.html">Weight</a>
|
||||
object depends on what type of Query was submitted. In most real world applications with multiple
|
||||
query terms,
|
||||
the
|
||||
<a href="api/org/apache/lucene/search/Scorer.html">Scorer</a>
|
||||
is going to be a
|
||||
<a href="http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/BooleanScorer2.java?view=log">BooleanScorer2</a>
|
||||
(see the section on customizing your scoring for info on changing this.)
|
||||
|
||||
</p>
|
||||
<p>Assuming a BooleanScorer2 scorer, we first initialize the Coordinator, which is used to apply the
|
||||
coord() factor. We then
|
||||
get a internal Scorer based on the required, optional and prohibited parts of the query.
|
||||
Using this internal Scorer, the BooleanScorer2 then proceeds
|
||||
into a while loop based on the Scorer#next() method. The next() method advances to the next document
|
||||
matching the query. This is an
|
||||
abstract method in the Scorer class and is thus overriden by all derived
|
||||
implementations. <!-- DOUBLE CHECK THIS -->If you have a simple OR query
|
||||
your internal Scorer is most likely a DisjunctionSumScorer, which essentially combines the scorers
|
||||
from the sub scorers of the OR'd terms.</p>
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="#525D76" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
<!-- end the processing -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
/*
|
||||
Place for sharing style information across the XDocs
|
||||
|
||||
*/
|
||||
|
||||
|
||||
.big{
|
||||
font-size: 1.5em;
|
||||
}
|
||||
|
||||
.formula{
|
||||
font-size: 0.9em;
|
||||
display: block;
|
||||
position: relative;
|
||||
left: -25px;
|
||||
}
|
||||
|
||||
#summation{
|
||||
|
||||
}
|
||||
|
||||
.summation-range{
|
||||
position: relative;
|
||||
top: 5px;
|
||||
font-size: 0.85em;
|
||||
}
|
||||
|
||||
/*
|
||||
Useful for highlighting pieces of documentation that others should pay special attention to
|
||||
when proof reading
|
||||
*/
|
||||
.highlight-for-editing{
|
||||
background-color: yellow;
|
||||
}
|
|
@ -1,302 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
|
||||
<!-- start the processing -->
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
<meta name="author" value="Otis Gospodneti?">
|
||||
<meta name="email" value="otis @ apache dot org">
|
||||
|
||||
|
||||
|
||||
|
||||
<title>Apache Lucene - Apache Lucene - System Properties</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
<a href="http://lucene.apache.org/"><img src="./images/lucene_green_300.gif" alt="Apache Lucene" border="0"/></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
<p><strong>About</strong></p>
|
||||
<ul>
|
||||
<li> <a href="./index.html">Overview</a>
|
||||
</li>
|
||||
<li> <a href="./features.html">Features</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/PoweredBy">Powered by Lucene</a>
|
||||
</li>
|
||||
<li> <a href="./whoweare.html">Who We Are</a>
|
||||
</li>
|
||||
<li> <a href="./mailinglists.html">Mailing Lists</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Resources</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene">Wiki</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ">FAQ</a>
|
||||
</li>
|
||||
<li> <a href="./gettingstarted.html">Getting Started</a>
|
||||
</li>
|
||||
<li> <a href="./queryparsersyntax.html">Query Syntax</a>
|
||||
</li>
|
||||
<li> <a href="./fileformats.html">File Formats</a>
|
||||
</li>
|
||||
<li> <a href="./scoring.html">Scoring</a>
|
||||
</li>
|
||||
<li> <a href="./api/index.html">Javadoc</a>
|
||||
</li>
|
||||
<li> <a href="./contributions.html">Contributions</a>
|
||||
</li>
|
||||
<li> <a href="./benchmarks.html">Benchmarks</a>
|
||||
</li>
|
||||
<li> <a href="http://issues.apache.org/jira/browse/LUCENE">Issue Tracker</a>
|
||||
</li>
|
||||
<li> <a href="./lucene-sandbox/">Lucene Sandbox</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Download</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">Releases</a>
|
||||
</li>
|
||||
<li> <a href="http://svn.apache.org/viewcvs.cgi/lucene/java/">Source Repository</a>
|
||||
</li>
|
||||
</ul>
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="About this Document"><strong>About this Document</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
Lucene has a number of properties that can be tuned. They can be adjusted either
|
||||
programmatically, using the Lucene API, or their default values can be set via
|
||||
system properties described in this document. Starting
|
||||
with Lucene 1.9, the system properties (except org.apache.lucene.lockDir) are not supported
|
||||
anymore and the API (i.e. the get/set methods) should be used directly.
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="System Properties"><strong>System Properties</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>
|
||||
<table width="100%" border="0" cellpadding="4" cellspacing="0">
|
||||
<tr valign="top">
|
||||
<td width="25%"><b>Lucene Property</b></td>
|
||||
<td width="25%"><b>System Property</b></td>
|
||||
<td width="25%"><b>Default Value</b></td>
|
||||
</tr>
|
||||
<tr valign="TOP">
|
||||
<td width="25%">
|
||||
<a href="api/org/apache/lucene/index/IndexWriter.html#mergeFactor">mergeFactor</a>
|
||||
</td>
|
||||
<td width="25%">
|
||||
org.apache.lucene.mergeFactor
|
||||
</td>
|
||||
<td width="25%">
|
||||
10
|
||||
</td>
|
||||
</tr>
|
||||
<tr valign="TOP">
|
||||
<td width="25%">
|
||||
<a href="api/org/apache/lucene/index/IndexWriter.html#minMergeDocs">minMergeDocs</a>
|
||||
</td>
|
||||
<td width="25%">
|
||||
org.apache.lucene.minMergeDocs
|
||||
</td>
|
||||
<td width="25%">
|
||||
10
|
||||
</td>
|
||||
</tr>
|
||||
<tr valign="TOP">
|
||||
<td width="25%">
|
||||
<a href="api/org/apache/lucene/index/IndexWriter.html#maxMergeDocs">maxMergeDocs</a>
|
||||
</td>
|
||||
<td width="25%">
|
||||
org.apache.lucene.maxMergeDocs
|
||||
</td>
|
||||
<td width="25%">
|
||||
Integer.MAX_VALUE
|
||||
</td>
|
||||
</tr>
|
||||
<tr valign="TOP">
|
||||
<td width="25%">
|
||||
<a href="api/org/apache/lucene/index/IndexWriter.html#maxFieldLength">maxFieldLength</a>
|
||||
</td>
|
||||
<td width="25%">
|
||||
org.apache.lucene.maxFieldLength
|
||||
</td>
|
||||
<td width="25%">
|
||||
10000
|
||||
</td>
|
||||
</tr>
|
||||
<tr valign="TOP">
|
||||
<td width="25%">
|
||||
<a href="api/org/apache/lucene/index/IndexWriter.html#COMMIT_LOCK_TIMEOUT">COMMIT_LOCK_TIMEOUT</a>
|
||||
</td>
|
||||
<td width="25%">
|
||||
org.apache.lucene.commitLockTimeout
|
||||
</td>
|
||||
<td width="25%">
|
||||
10000 ms
|
||||
</td>
|
||||
</tr>
|
||||
<tr valign="TOP">
|
||||
<td width="25%">
|
||||
<a href="api/org/apache/lucene/index/IndexWriter.html#WRITE_LOCK_TIMEOUT">WRITE_LOCK_TIMEOUT</a>
|
||||
</td>
|
||||
<td width="25%">
|
||||
org.apache.lucene.writeLockTimeout
|
||||
</td>
|
||||
<td width="25%">
|
||||
1000 ms
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
|
||||
<tr valign="TOP">
|
||||
<td width="25%">
|
||||
<a href="api/org/apache/lucene/search/BooleanQuery.html#maxClauseCount">maxClauseCount</a>
|
||||
</td>
|
||||
<td width="25%">
|
||||
org.apache.lucene.maxClauseCount
|
||||
</td>
|
||||
<td width="25%">
|
||||
1024
|
||||
</td>
|
||||
</tr>
|
||||
<tr valign="TOP">
|
||||
<td width="25%">
|
||||
<a href="api/org/apache/lucene/store/FSDirectory.html#lockDir">lockDir</a>
|
||||
</td>
|
||||
<td width="25%">
|
||||
org.apache.lucene.lockDir
|
||||
</td>
|
||||
<td width="25%">
|
||||
the value of <code>java.io.tmpdir</code> system property
|
||||
</td>
|
||||
</tr>
|
||||
<tr valign="TOP">
|
||||
<td width="25%">
|
||||
<a href="api/org/apache/lucene/store/FSDirectory.html#FSDirectory.class">FSDirectory.class</a>
|
||||
</td>
|
||||
<td width="25%">
|
||||
org.apache.lucene.FSDirectory.class
|
||||
</td>
|
||||
<td width="25%">
|
||||
org.apache.lucene.store.FSDirectory
|
||||
</td>
|
||||
</tr>
|
||||
<tr valign="TOP">
|
||||
<td width="25%">
|
||||
<a href="api/org/apache/lucene/index/SegmentReader.html#SegmentReader.class">SegmentReader.class</a>
|
||||
</td>
|
||||
<td width="25%">
|
||||
org.apache.lucene.index.SegmentReader.class
|
||||
</td>
|
||||
<td width="25%">
|
||||
org.apache.lucene.index.SegmentReader
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="#525D76" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
<!-- end the processing -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,227 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
|
||||
<!-- start the processing -->
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
<meta name="author" value="Ted Husted">
|
||||
<meta name="email" value="husted@apache.org">
|
||||
<meta name="author" value="Doug Cutting">
|
||||
<meta name="email" value="cutting@apache.org">
|
||||
|
||||
|
||||
|
||||
|
||||
<title>Apache Lucene - Who We Are - Apache Lucene</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff" text="#000000" link="#525D76">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
<a href="http://lucene.apache.org/"><img src="./images/lucene_green_300.gif" alt="Apache Lucene" border="0"/></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
<p><strong>About</strong></p>
|
||||
<ul>
|
||||
<li> <a href="./index.html">Overview</a>
|
||||
</li>
|
||||
<li> <a href="./features.html">Features</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/PoweredBy">Powered by Lucene</a>
|
||||
</li>
|
||||
<li> <a href="./whoweare.html">Who We Are</a>
|
||||
</li>
|
||||
<li> <a href="./mailinglists.html">Mailing Lists</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Resources</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene">Wiki</a>
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ">FAQ</a>
|
||||
</li>
|
||||
<li> <a href="./gettingstarted.html">Getting Started</a>
|
||||
</li>
|
||||
<li> <a href="./queryparsersyntax.html">Query Syntax</a>
|
||||
</li>
|
||||
<li> <a href="./fileformats.html">File Formats</a>
|
||||
</li>
|
||||
<li> <a href="./scoring.html">Scoring</a>
|
||||
</li>
|
||||
<li> <a href="./api/index.html">Javadoc</a>
|
||||
</li>
|
||||
<li> <a href="./contributions.html">Contributions</a>
|
||||
</li>
|
||||
<li> <a href="./benchmarks.html">Benchmarks</a>
|
||||
</li>
|
||||
<li> <a href="http://issues.apache.org/jira/browse/LUCENE">Issue Tracker</a>
|
||||
</li>
|
||||
<li> <a href="./lucene-sandbox/">Lucene Sandbox</a>
|
||||
</li>
|
||||
</ul>
|
||||
<p><strong>Download</strong></p>
|
||||
<ul>
|
||||
<li> <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">Releases</a>
|
||||
</li>
|
||||
<li> <a href="http://svn.apache.org/viewcvs.cgi/lucene/java/">Source Repository</a>
|
||||
</li>
|
||||
</ul>
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Who We Are"><strong>Who We Are</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<p>Lucene is maintained by a team of volunteer developers.</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Committers"><strong>Committers</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<ul>
|
||||
<li><b><a href="http://www.nutch.org/blog/cutting.html">Doug Cutting</a></b> (cutting@...)
|
||||
|
||||
<p>Lucene was originally written in Doug's spare time during late 1997
|
||||
and early 1998. Doug had previously written search engines at Xerox's
|
||||
Palo Alto Research Center (PARC), Apple, and Excite@Home, and authored
|
||||
several information retrieval <a href="http://lucene.sourceforge.net/publications.html">papers and
|
||||
patents</a>.</p>
|
||||
|
||||
</li>
|
||||
<li><b><a href="http://www.jroller.com/page/otis">Otis Gospodnetic</a></b> (otis@...)</li>
|
||||
<li><b>Brian Goetz</b> (briangoetz@...)</li>
|
||||
<li><b>Scott Ganyo</b> (scottganyo@...)</li>
|
||||
<li><b>Eugene Gluzberg</b> (drag0n@...)</li>
|
||||
<li><b>Matt Tucker</b> (mtucker@...)</li>
|
||||
<li><b>Cory Hubert</b> (clhubert@...)</li>
|
||||
<li><b>Dave Kor</b> (davekor@...)</li>
|
||||
<li><b>Jon Stevens</b> (jon at latchkey.com)</li>
|
||||
<li><b>Tal Dayan</b> (zapta@...)</li>
|
||||
<li><b>Andrew C. Oliver</b> (acoliver@...)</li>
|
||||
<li><b>Peter Carlson</b> (carlson@...)</li>
|
||||
<li><b>Erik Hatcher</b> (ehatcher@...)</li>
|
||||
<li><b>Dmitry Serebrennikov</b> (dmitrys@...)</li>
|
||||
<li><b>Christoph Goller</b> (goller@...)</li>
|
||||
<li><b>Tim Jones</b> (tjones@...)</li>
|
||||
<li><b>Daniel Naber</b> (dnaber@...)</li>
|
||||
<li><b>Bernhard Messer</b> (bmesser@...)</li>
|
||||
<li><b>Yonik Seeley</b> (yonik@...)</li>
|
||||
<li><b>Grant Ingersoll</b> (gsingers@...) </li>
|
||||
<li><b>Mike McCandless</b> (mikemccand@...) </li>
|
||||
<li><b>Mark Harwood</b> (mharwood@...) </li>
|
||||
</ul>
|
||||
<p>Note that the email addresses above end with @apache.org.</p>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="#525D76">
|
||||
<font color="#ffffff" face="arial,helvetica,sanserif">
|
||||
<a name="Other Contributors"><strong>Other Contributors</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
<ul>
|
||||
<li>Josh Bloch</li>
|
||||
<li>Ted Husted</li>
|
||||
</ul>
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="#525D76" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
<!-- end the processing -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,130 @@
|
|||
# Copyright 2002-2005 The Apache Software Foundation or its licensors,
|
||||
# as applicable.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
##############
|
||||
# Properties used by forrest.build.xml for building the website
|
||||
# These are the defaults, un-comment them only if you need to change them.
|
||||
##############
|
||||
|
||||
# Prints out a summary of Forrest settings for this project
|
||||
#forrest.echo=true
|
||||
|
||||
# Project name (used to name .war file)
|
||||
#project.name=my-project
|
||||
|
||||
# Specifies name of Forrest skin to use
|
||||
# See list at http://forrest.apache.org/docs/skins.html
|
||||
#project.skin=pelt
|
||||
|
||||
# Descriptors for plugins and skins
|
||||
# comma separated list, file:// is supported
|
||||
#forrest.skins.descriptors=http://forrest.apache.org/skins/skins.xml,file:///c:/myskins/skins.xml
|
||||
#forrest.plugins.descriptors=http://forrest.apache.org/plugins/plugins.xml,http://forrest.apache.org/plugins/whiteboard-plugins.xml
|
||||
|
||||
##############
|
||||
# behavioural properties
|
||||
#project.menu-scheme=tab_attributes
|
||||
#project.menu-scheme=directories
|
||||
|
||||
##############
|
||||
# layout properties
|
||||
|
||||
# Properties that can be set to override the default locations
|
||||
#
|
||||
# Parent properties must be set. This usually means uncommenting
|
||||
# project.content-dir if any other property using it is uncommented
|
||||
|
||||
#project.status=status.xml
|
||||
#project.content-dir=src/documentation
|
||||
#project.raw-content-dir=${project.content-dir}/content
|
||||
#project.conf-dir=${project.content-dir}/conf
|
||||
#project.sitemap-dir=${project.content-dir}
|
||||
#project.xdocs-dir=${project.content-dir}/content/xdocs
|
||||
#project.resources-dir=${project.content-dir}/resources
|
||||
#project.stylesheets-dir=${project.resources-dir}/stylesheets
|
||||
#project.images-dir=${project.resources-dir}/images
|
||||
#project.schema-dir=${project.resources-dir}/schema
|
||||
#project.skins-dir=${project.content-dir}/skins
|
||||
#project.skinconf=${project.content-dir}/skinconf.xml
|
||||
#project.lib-dir=${project.content-dir}/lib
|
||||
#project.classes-dir=${project.content-dir}/classes
|
||||
#project.translations-dir=${project.content-dir}/translations
|
||||
project.configfile=${project.home}/src/documentation/conf/cli.xconf
|
||||
|
||||
##############
|
||||
# validation properties
|
||||
|
||||
# This set of properties determine if validation is performed
|
||||
# Values are inherited unless overridden.
|
||||
# e.g. if forrest.validate=false then all others are false unless set to true.
|
||||
#forrest.validate=true
|
||||
#forrest.validate.xdocs=${forrest.validate}
|
||||
#forrest.validate.skinconf=${forrest.validate}
|
||||
#forrest.validate.sitemap=${forrest.validate}
|
||||
#forrest.validate.stylesheets=${forrest.validate}
|
||||
#forrest.validate.skins=${forrest.validate}
|
||||
#forrest.validate.skins.stylesheets=${forrest.validate.skins}
|
||||
|
||||
# *.failonerror=(true|false) - stop when an XML file is invalid
|
||||
#forrest.validate.failonerror=true
|
||||
|
||||
# *.excludes=(pattern) - comma-separated list of path patterns to not validate
|
||||
# e.g.
|
||||
#forrest.validate.xdocs.excludes=samples/subdir/**, samples/faq.xml
|
||||
#forrest.validate.xdocs.excludes=
|
||||
|
||||
|
||||
##############
|
||||
# General Forrest properties
|
||||
|
||||
# The URL to start crawling from
|
||||
#project.start-uri=linkmap.html
|
||||
|
||||
# Set logging level for messages printed to the console
|
||||
# (DEBUG, INFO, WARN, ERROR, FATAL_ERROR)
|
||||
#project.debuglevel=ERROR
|
||||
|
||||
# Max memory to allocate to Java
|
||||
#forrest.maxmemory=64m
|
||||
|
||||
# Any other arguments to pass to the JVM. For example, to run on an X-less
|
||||
# server, set to -Djava.awt.headless=true
|
||||
#forrest.jvmargs=
|
||||
|
||||
# The bugtracking URL - the issue number will be appended
|
||||
#project.bugtracking-url=http://issues.apache.org/bugzilla/show_bug.cgi?id=
|
||||
#project.bugtracking-url=http://issues.apache.org/jira/browse/
|
||||
|
||||
# The issues list as rss
|
||||
#project.issues-rss-url=
|
||||
|
||||
#I18n Property. Based on the locale request for the browser.
|
||||
#If you want to use it for static site then modify the JVM system.language
|
||||
# and run once per language
|
||||
#project.i18n=true
|
||||
|
||||
# The names of plugins that are required to build the project
|
||||
# comma separated list (no spaces)
|
||||
# You can request a specific version by appending "-VERSION" to the end of
|
||||
# the plugin name. If you exclude a version number the latest released version
|
||||
# will be used, however, be aware that this may be a development version. In
|
||||
# a production environment it is recomended that you specify a known working
|
||||
# version.
|
||||
# Run "forrest available-plugins" for a list of plug-ins currently available
|
||||
project.required.plugins=org.apache.forrest.plugin.output.pdf
|
||||
|
||||
# Proxy configuration
|
||||
# proxy.host=
|
||||
# proxy.port=
|
|
@ -0,0 +1,57 @@
|
|||
# Copyright 2002-2005 The Apache Software Foundation or its licensors,
|
||||
# as applicable.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#=======================================================================
|
||||
# CatalogManager.properties for Catalog Entity Resolver.
|
||||
#
|
||||
# This is the default properties file for your project.
|
||||
# This facilitates local configuration of application-specific catalogs.
|
||||
# If you have defined any local catalogs, then they will be loaded
|
||||
# before Forrest's core catalogs.
|
||||
#
|
||||
# See the Apache Forrest documentation:
|
||||
# http://forrest.apache.org/docs/your-project.html
|
||||
# http://forrest.apache.org/docs/validation.html
|
||||
|
||||
# verbosity:
|
||||
# The level of messages for status/debug (messages go to standard output).
|
||||
# The setting here is for your own local catalogs.
|
||||
# The verbosity of Forrest's core catalogs is controlled via
|
||||
# main/webapp/WEB-INF/cocoon.xconf
|
||||
#
|
||||
# The following messages are provided ...
|
||||
# 0 = none
|
||||
# 1 = ? (... not sure yet)
|
||||
# 2 = 1+, Loading catalog, Resolved public, Resolved system
|
||||
# 3 = 2+, Catalog does not exist, resolvePublic, resolveSystem
|
||||
# 10 = 3+, List all catalog entries when loading a catalog
|
||||
# (Cocoon also logs the "Resolved public" messages.)
|
||||
verbosity=1
|
||||
|
||||
# catalogs ... list of additional catalogs to load
|
||||
# (Note that Apache Forrest will automatically load its own default catalog
|
||||
# from main/webapp/resources/schema/catalog.xcat)
|
||||
# Use either full pathnames or relative pathnames.
|
||||
# pathname separator is always semi-colon (;) regardless of operating system
|
||||
# directory separator is always slash (/) regardless of operating system
|
||||
catalogs=../resources/schema/catalog.xcat
|
||||
|
||||
# relative-catalogs
|
||||
# If false, relative catalog URIs are made absolute with respect to the
|
||||
# base URI of the CatalogManager.properties file. This setting only
|
||||
# applies to catalog URIs obtained from the catalogs property in the
|
||||
# CatalogManager.properties file
|
||||
# Example: relative-catalogs=[yes|no]
|
||||
relative-catalogs=no
|
|
@ -0,0 +1,321 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Copyright 2002-2004 The Apache Software Foundation or its licensors,
|
||||
as applicable.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<!--+
|
||||
| This is the Apache Cocoon command line configuration file.
|
||||
| Here you give the command line interface details of where
|
||||
| to find various aspects of your Cocoon installation.
|
||||
|
|
||||
| If you wish, you can also use this file to specify the URIs
|
||||
| that you wish to generate.
|
||||
|
|
||||
| The current configuration information in this file is for
|
||||
| building the Cocoon documentation. Therefore, all links here
|
||||
| are relative to the build context dir, which, in the build.xml
|
||||
| file, is set to ${build.context}
|
||||
|
|
||||
| Options:
|
||||
| verbose: increase amount of information presented
|
||||
| to standard output (default: false)
|
||||
| follow-links: whether linked pages should also be
|
||||
| generated (default: true)
|
||||
| precompile-only: precompile sitemaps and XSP pages, but
|
||||
| do not generate any pages (default: false)
|
||||
| confirm-extensions: check the mime type for the generated page
|
||||
| and adjust filename and links extensions
|
||||
| to match the mime type
|
||||
| (e.g. text/html->.html)
|
||||
|
|
||||
| Note: Whilst using an xconf file to configure the Cocoon
|
||||
| Command Line gives access to more features, the use of
|
||||
| command line parameters is more stable, as there are
|
||||
| currently plans to improve the xconf format to allow
|
||||
| greater flexibility. If you require a stable and
|
||||
| consistent method for accessing the CLI, it is recommended
|
||||
| that you use the command line parameters to configure
|
||||
| the CLI. See documentation at:
|
||||
| http://cocoon.apache.org/2.1/userdocs/offline/
|
||||
| http://wiki.apache.org/cocoon/CommandLine
|
||||
|
|
||||
+-->
|
||||
|
||||
<cocoon verbose="true"
|
||||
follow-links="true"
|
||||
precompile-only="false"
|
||||
confirm-extensions="false">
|
||||
|
||||
<!--+
|
||||
| The context directory is usually the webapp directory
|
||||
| containing the sitemap.xmap file.
|
||||
|
|
||||
| The config file is the cocoon.xconf file.
|
||||
|
|
||||
| The work directory is used by Cocoon to store temporary
|
||||
| files and cache files.
|
||||
|
|
||||
| The destination directory is where generated pages will
|
||||
| be written (assuming the 'simple' mapper is used, see
|
||||
| below)
|
||||
+-->
|
||||
<context-dir>.</context-dir>
|
||||
<config-file>WEB-INF/cocoon.xconf</config-file>
|
||||
<work-dir>../tmp/cocoon-work</work-dir>
|
||||
<dest-dir>../site</dest-dir>
|
||||
|
||||
<!--+
|
||||
| A checksum file can be used to store checksums for pages
|
||||
| as they are generated. When the site is next generated,
|
||||
| files will not be written if their checksum has not changed.
|
||||
| This means that it will be easier to detect which files
|
||||
| need to be uploaded to a server, using the timestamp.
|
||||
+-->
|
||||
<!-- <checksums-uri>build/work/checksums</checksums-uri>-->
|
||||
|
||||
<!--+
|
||||
| Broken link reporting options:
|
||||
| Report into a text file, one link per line:
|
||||
| <broken-links type="text" report="filename"/>
|
||||
| Report into an XML file:
|
||||
| <broken-links type="xml" report="filename"/>
|
||||
| Ignore broken links (default):
|
||||
| <broken-links type="none"/>
|
||||
|
|
||||
| Two attributes to this node specify whether a page should
|
||||
| be generated when an error has occured. 'generate' specifies
|
||||
| whether a page should be generated (default: true) and
|
||||
| extension specifies an extension that should be appended
|
||||
| to the generated page's filename (default: none)
|
||||
|
|
||||
| Using this, a quick scan through the destination directory
|
||||
| will show broken links, by their filename extension.
|
||||
+-->
|
||||
<broken-links type="xml"
|
||||
file="../brokenlinks.xml"
|
||||
generate="false"
|
||||
extension=".error"
|
||||
show-referrers="true"/>
|
||||
|
||||
<!--+
|
||||
| Load classes at startup. This is necessary for generating
|
||||
| from sites that use SQL databases and JDBC.
|
||||
| The <load-class> element can be repeated if multiple classes
|
||||
| are needed.
|
||||
+-->
|
||||
<!--
|
||||
<load-class>org.firebirdsql.jdbc.Driver</load-class>
|
||||
-->
|
||||
|
||||
<!--+
|
||||
| Configures logging.
|
||||
| The 'log-kit' parameter specifies the location of the log kit
|
||||
| configuration file (usually called logkit.xconf.
|
||||
|
|
||||
| Logger specifies the logging category (for all logging prior
|
||||
| to other Cocoon logging categories taking over)
|
||||
|
|
||||
| Available log levels are:
|
||||
| DEBUG: prints all level of log messages.
|
||||
| INFO: prints all level of log messages except DEBUG
|
||||
| ones.
|
||||
| WARN: prints all level of log messages except DEBUG
|
||||
| and INFO ones.
|
||||
| ERROR: prints all level of log messages except DEBUG,
|
||||
| INFO and WARN ones.
|
||||
| FATAL_ERROR: prints only log messages of this level
|
||||
+-->
|
||||
<!-- <logging log-kit="WEB-INF/logkit.xconf" logger="cli" level="ERROR" /> -->
|
||||
|
||||
<!--+
|
||||
| Specifies the filename to be appended to URIs that
|
||||
| refer to a directory (i.e. end with a forward slash).
|
||||
+-->
|
||||
<default-filename>index.html</default-filename>
|
||||
|
||||
<!--+
|
||||
| Specifies a user agent string to the sitemap when
|
||||
| generating the site.
|
||||
|
|
||||
| A generic term for a web browser is "user agent". Any
|
||||
| user agent, when connecting to a web server, will provide
|
||||
| a string to identify itself (e.g. as Internet Explorer or
|
||||
| Mozilla). It is possible to have Cocoon serve different
|
||||
| content depending upon the user agent string provided by
|
||||
| the browser. If your site does this, then you may want to
|
||||
| use this <user-agent> entry to provide a 'fake' user agent
|
||||
| to Cocoon, so that it generates the correct version of your
|
||||
| site.
|
||||
|
|
||||
| For most sites, this can be ignored.
|
||||
+-->
|
||||
<!--
|
||||
<user-agent>Cocoon Command Line Environment 2.1</user-agent>
|
||||
-->
|
||||
|
||||
<!--+
|
||||
| Specifies an accept string to the sitemap when generating
|
||||
| the site.
|
||||
| User agents can specify to an HTTP server what types of content
|
||||
| (by mime-type) they are able to receive. E.g. a browser may be
|
||||
| able to handle jpegs, but not pngs. The HTTP accept header
|
||||
| allows the server to take the browser's capabilities into account,
|
||||
| and only send back content that it can handle.
|
||||
|
|
||||
| For most sites, this can be ignored.
|
||||
+-->
|
||||
|
||||
<accept>*/*</accept>
|
||||
|
||||
<!--+
|
||||
| Specifies which URIs should be included or excluded, according
|
||||
| to wildcard patterns.
|
||||
|
|
||||
| These includes/excludes are only relevant when you are following
|
||||
| links. A link URI must match an include pattern (if one is given)
|
||||
| and not match an exclude pattern, if it is to be followed by
|
||||
| Cocoon. It can be useful, for example, where there are links in
|
||||
| your site to pages that are not generated by Cocoon, such as
|
||||
| references to api-documentation.
|
||||
|
|
||||
| By default, all URIs are included. If both include and exclude
|
||||
| patterns are specified, a URI is first checked against the
|
||||
| include patterns, and then against the exclude patterns.
|
||||
|
|
||||
| Multiple patterns can be given, using muliple include or exclude
|
||||
| nodes.
|
||||
|
|
||||
| The order of the elements is not significant, as only the first
|
||||
| successful match of each category is used.
|
||||
|
|
||||
| Currently, only the complete source URI can be matched (including
|
||||
| any URI prefix). Future plans include destination URI matching
|
||||
| and regexp matching. If you have requirements for these, contact
|
||||
| dev@cocoon.apache.org.
|
||||
+-->
|
||||
|
||||
<exclude pattern="**/"/>
|
||||
<exclude pattern="**apidocs**"/>
|
||||
<exclude pattern="api/**"/>
|
||||
<exclude pattern="**benchmarktemplate.xml"/>
|
||||
|
||||
<!--
|
||||
This is a workaround for FOR-284 "link rewriting broken when
|
||||
linking to xml source views which contain site: links".
|
||||
See the explanation there and in declare-broken-site-links.xsl
|
||||
-->
|
||||
<exclude pattern="site:**"/>
|
||||
<exclude pattern="ext:**"/>
|
||||
<exclude pattern="**/site:**"/>
|
||||
<exclude pattern="**/ext:**"/>
|
||||
|
||||
<!-- Exclude tokens used in URLs to ASF mirrors (interpreted by a CGI) -->
|
||||
<exclude pattern="[preferred]/**"/>
|
||||
<exclude pattern="[location]"/>
|
||||
|
||||
<!-- <include-links extension=".html"/>-->
|
||||
|
||||
<!--+
|
||||
| <uri> nodes specify the URIs that should be generated, and
|
||||
| where required, what should be done with the generated pages.
|
||||
| They describe the way the URI of the generated file is created
|
||||
| from the source page's URI. There are three ways that a generated
|
||||
| file URI can be created: append, replace and insert.
|
||||
|
|
||||
| The "type" attribute specifies one of (append|replace|insert):
|
||||
|
|
||||
| append:
|
||||
| Append the generated page's URI to the end of the source URI:
|
||||
|
|
||||
| <uri type="append" src-prefix="documents/" src="index.html"
|
||||
| dest="build/dest/"/>
|
||||
|
|
||||
| This means that
|
||||
| (1) the "documents/index.html" page is generated
|
||||
| (2) the file will be written to "build/dest/documents/index.html"
|
||||
|
|
||||
| replace:
|
||||
| Completely ignore the generated page's URI - just
|
||||
| use the destination URI:
|
||||
|
|
||||
| <uri type="replace" src-prefix="documents/" src="index.html"
|
||||
| dest="build/dest/docs.html"/>
|
||||
|
|
||||
| This means that
|
||||
| (1) the "documents/index.html" page is generated
|
||||
| (2) the result is written to "build/dest/docs.html"
|
||||
| (3) this works only for "single" pages - and not when links
|
||||
| are followed
|
||||
|
|
||||
| insert:
|
||||
| Insert generated page's URI into the destination
|
||||
| URI at the point marked with a * (example uses fictional
|
||||
| zip protocol)
|
||||
|
|
||||
| <uri type="insert" src-prefix="documents/" src="index.html"
|
||||
| dest="zip://*.zip/page.html"/>
|
||||
|
|
||||
| This means that
|
||||
| (1)
|
||||
|
|
||||
| In any of these scenarios, if the dest attribute is omitted,
|
||||
| the value provided globally using the <dest-dir> node will
|
||||
| be used instead.
|
||||
+-->
|
||||
<!--
|
||||
<uri type="replace"
|
||||
src-prefix="samples/"
|
||||
src="hello-world/hello.html"
|
||||
dest="build/dest/hello-world.html"/>
|
||||
-->
|
||||
|
||||
<!--+
|
||||
| <uri> nodes can be grouped together in a <uris> node. This
|
||||
| enables a group of URIs to share properties. The following
|
||||
| properties can be set for a group of URIs:
|
||||
| * follow-links: should pages be crawled for links
|
||||
| * confirm-extensions: should file extensions be checked
|
||||
| for the correct mime type
|
||||
| * src-prefix: all source URIs should be
|
||||
| pre-pended with this prefix before
|
||||
| generation. The prefix is not
|
||||
| included when calculating the
|
||||
| destination URI
|
||||
| * dest: the base destination URI to be
|
||||
| shared by all pages in this group
|
||||
| * type: the method to be used to calculate
|
||||
| the destination URI. See above
|
||||
| section on <uri> node for details.
|
||||
|
|
||||
| Each <uris> node can have a name attribute. When a name
|
||||
| attribute has been specified, the -n switch on the command
|
||||
| line can be used to tell Cocoon to only process the URIs
|
||||
| within this URI group. When no -n switch is given, all
|
||||
| <uris> nodes are processed. Thus, one xconf file can be
|
||||
| used to manage multiple sites.
|
||||
+-->
|
||||
<!--
|
||||
<uris name="mirrors" follow-links="false">
|
||||
<uri type="append" src="mirrors.html"/>
|
||||
</uris>
|
||||
-->
|
||||
|
||||
<!--+
|
||||
| File containing URIs (plain text, one per line).
|
||||
+-->
|
||||
<!--
|
||||
<uri-file>uris.txt</uri-file>
|
||||
-->
|
||||
</cocoon>
|
|
@ -0,0 +1,3 @@
|
|||
#Forrest generates UTF-8 by default, but these httpd servers are
|
||||
#ignoring the meta http-equiv charset tags
|
||||
AddDefaultCharset off
|
|
@ -1,12 +1,15 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<header>
|
||||
<title>Apache Lucene - Resources - Performance Benchmarks</title>
|
||||
</header>
|
||||
<properties>
|
||||
<author email="kelvint@apache.org">Kelvin Tan</author>
|
||||
<title>Resources - Performance Benchmarks</title>
|
||||
|
||||
</properties>
|
||||
<body>
|
||||
|
||||
<section name="Performance Benchmarks">
|
||||
<section id="Performance Benchmarks"><title>Performance Benchmarks</title>
|
||||
<p>
|
||||
The purpose of these user-submitted performance figures is to
|
||||
give current and potential users of Lucene a sense
|
||||
|
@ -24,7 +27,7 @@
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Benchmark Variables">
|
||||
<section id="Benchmark Variables"><title>Benchmark Variables</title>
|
||||
<p>
|
||||
<ul>
|
||||
<p>
|
||||
|
@ -90,7 +93,7 @@
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="User-submitted Benchmarks">
|
||||
<section id="User-submitted Benchmarks"><title>User-submitted Benchmarks</title>
|
||||
<p>
|
||||
These benchmarks have been kindly submitted by Lucene users for
|
||||
reference purposes.
|
||||
|
@ -104,7 +107,7 @@
|
|||
these figures to us).
|
||||
</p>
|
||||
|
||||
<subsection name="Hamish Carpenter's benchmarks">
|
||||
<section id="Hamish Carpenter's benchmarks"><title>Hamish Carpenter's benchmarks</title>
|
||||
<ul>
|
||||
<p>
|
||||
<b>Hardware Environment</b><br/>
|
||||
|
@ -209,9 +212,9 @@
|
|||
<p>
|
||||
Hamish can be contacted at hamish at catalyst.net.nz.
|
||||
</p>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
<subsection name="Justin Greene's benchmarks">
|
||||
<section id="Justin Greene's benchmarks"><title>Justin Greene's benchmarks</title>
|
||||
<ul>
|
||||
<p>
|
||||
<b>Hardware Environment</b><br/>
|
||||
|
@ -277,10 +280,10 @@
|
|||
<p>
|
||||
Justin can be contacted at tvxh-lw4x at spamex.com.
|
||||
</p>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
|
||||
<subsection name="Daniel Armbrust's benchmarks">
|
||||
<section id="Daniel Armbrust's benchmarks"><title>Daniel Armbrust's benchmarks</title>
|
||||
<p>
|
||||
My disclaimer is that this is a very poor "Benchmark". It was not done for raw speed,
|
||||
nor was the total index built in one shot. The index was created on several different
|
||||
|
@ -344,8 +347,8 @@
|
|||
<p>
|
||||
Daniel can be contacted at Armbrust.Daniel at mayo.edu.
|
||||
</p>
|
||||
</subsection>
|
||||
<subsection name="Geoffrey Peddle's benchmarks">
|
||||
</section>
|
||||
<section id="Geoffrey Peddle's benchmarks"><title>Geoffrey Peddle's benchmarks</title>
|
||||
<p>
|
||||
I'm doing a technical evaluation of search engines
|
||||
for Ariba, an enterprise application software company.
|
||||
|
@ -515,7 +518,7 @@ A 34 minutes
|
|||
</p>
|
||||
</p>
|
||||
</ul>
|
||||
</subsection>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
</body>
|
|
@ -1,15 +1,18 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<header>
|
||||
<title>
|
||||
Apache Lucene - Contributions
|
||||
</title>
|
||||
</header>
|
||||
<properties>
|
||||
<author email="carlson@apache.org">
|
||||
Peter Carlson
|
||||
</author>
|
||||
<title>
|
||||
Contributions - Apache Lucene
|
||||
</title>
|
||||
</properties>
|
||||
<body>
|
||||
<section name="Overview">
|
||||
<section id="Overview">
|
||||
<title>Overview</title>
|
||||
<p>This page lists external Lucene resources. If you have
|
||||
written something that should be included, please post all
|
||||
relevant information to one of the mailing lists. Nothing
|
||||
|
@ -21,14 +24,15 @@
|
|||
<a href="http://wiki.apache.org/jakarta-lucene/HowToContribute">How To Contribute</a> on the Lucene Wiki.</p>
|
||||
</section>
|
||||
|
||||
<section name="Lucene Tools">
|
||||
<section id="Lucene Tools">
|
||||
<title>Lucene Tools</title>
|
||||
<p>
|
||||
Software that works with Lucene indices.
|
||||
</p>
|
||||
<subsection name="Luke">
|
||||
<section id="Luke"><title>Luke</title>
|
||||
<table>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
URL
|
||||
</th>
|
||||
<td>
|
||||
|
@ -38,7 +42,7 @@
|
|||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
author
|
||||
</th>
|
||||
<td>
|
||||
|
@ -46,11 +50,12 @@
|
|||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</subsection>
|
||||
<subsection name="LIMO (Lucene Index Monitor)">
|
||||
</section>
|
||||
<section id="LIMO (Lucene Index Monitor)">
|
||||
<title>LIMO (Lucene Index Monitor)</title>
|
||||
<table>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
URL
|
||||
</th>
|
||||
<td>
|
||||
|
@ -60,7 +65,7 @@
|
|||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
author
|
||||
</th>
|
||||
<td>
|
||||
|
@ -68,20 +73,22 @@
|
|||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</subsection>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
<section name="Lucene Document Converters">
|
||||
<section id="Lucene Document Converters">
|
||||
<title>Lucene Document Converters</title>
|
||||
<p>
|
||||
Lucene requires information you want to index to be
|
||||
converted into a Document class. Here are
|
||||
contributions for various solutions that convert different
|
||||
content types to Lucene's Document classes.
|
||||
</p>
|
||||
<subsection name="XML Document #1">
|
||||
<section id="XML Document #1">
|
||||
<title>XML Document #1</title>
|
||||
<table>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
URL
|
||||
</th>
|
||||
<td>
|
||||
|
@ -91,7 +98,7 @@
|
|||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
author
|
||||
</th>
|
||||
<td>
|
||||
|
@ -99,11 +106,12 @@
|
|||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</subsection>
|
||||
<subsection name="XML Document #2">
|
||||
</section>
|
||||
<section id="XML Document #2">
|
||||
<title>XML Document #2</title>
|
||||
<table>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
URL
|
||||
</th>
|
||||
<td>
|
||||
|
@ -113,7 +121,7 @@
|
|||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
author
|
||||
</th>
|
||||
<td>
|
||||
|
@ -121,11 +129,12 @@
|
|||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</subsection>
|
||||
<subsection name="PDF Box">
|
||||
</section>
|
||||
<section id="PDF Box">
|
||||
<title>PDF Box</title>
|
||||
<table>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
URL
|
||||
</th>
|
||||
<td>
|
||||
|
@ -135,7 +144,7 @@
|
|||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
author
|
||||
</th>
|
||||
<td>
|
||||
|
@ -143,11 +152,12 @@
|
|||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</subsection>
|
||||
<subsection name="XPDF - PDF Document Conversion">
|
||||
</section>
|
||||
<section id="XPDF - PDF Document Conversion">
|
||||
<title>XPDF - PDF Document Conversion</title>
|
||||
<table>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
URL
|
||||
</th>
|
||||
<td>
|
||||
|
@ -157,7 +167,7 @@
|
|||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
author
|
||||
</th>
|
||||
<td>
|
||||
|
@ -165,11 +175,12 @@
|
|||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</subsection>
|
||||
<subsection name="PDFTextStream -- PDF text and metadata extraction">
|
||||
</section>
|
||||
<section id="PDFTextStream -- PDF text and metadata extraction">
|
||||
<title>PDFTextStream -- PDF text and metadata extraction</title>
|
||||
<table>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
URL
|
||||
</th>
|
||||
<td>
|
||||
|
@ -179,7 +190,7 @@
|
|||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
author
|
||||
</th>
|
||||
<td>
|
||||
|
@ -187,11 +198,12 @@
|
|||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</subsection>
|
||||
<subsection name="PJ Classic & PJ Professional - PDF Document Conversion">
|
||||
</section>
|
||||
<section id="PJ Classic & PJ Professional - PDF Document Conversion">
|
||||
<title>PJ Classic & PJ Professional - PDF Document Conversion</title>
|
||||
<table>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
URL
|
||||
</th>
|
||||
<td>
|
||||
|
@ -201,7 +213,7 @@
|
|||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
author
|
||||
</th>
|
||||
<td>
|
||||
|
@ -209,16 +221,18 @@
|
|||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</subsection>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
<section name="Miscellaneous">
|
||||
<section id="Miscellaneous">
|
||||
<title>Miscellaneous</title>
|
||||
<p>
|
||||
</p>
|
||||
<subsection name="Arabic Analyzer for Java">
|
||||
<section id="Arabic Analyzer for Java">
|
||||
<title>Arabic Analyzer for Java</title>
|
||||
<table>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
URL
|
||||
</th>
|
||||
<td>
|
||||
|
@ -228,7 +242,7 @@
|
|||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
author
|
||||
</th>
|
||||
<td>
|
||||
|
@ -236,11 +250,12 @@
|
|||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</subsection>
|
||||
<subsection name="Phonetix">
|
||||
</section>
|
||||
<section id="Phonetix">
|
||||
<title>Phonetix</title>
|
||||
<table>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
URL
|
||||
</th>
|
||||
<td>
|
||||
|
@ -250,7 +265,7 @@
|
|||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
author
|
||||
</th>
|
||||
<td>
|
||||
|
@ -258,13 +273,14 @@
|
|||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</subsection>
|
||||
<subsection name="ejIndex - JBoss MBean for Lucene">
|
||||
</section>
|
||||
<section id="ejIndex - JBoss MBean for Lucene">
|
||||
<title>ejIndex - JBoss MBean for Lucene</title>
|
||||
<p>
|
||||
</p>
|
||||
<table>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
URL
|
||||
</th>
|
||||
<td>
|
||||
|
@ -274,7 +290,7 @@
|
|||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
author
|
||||
</th>
|
||||
<td>
|
||||
|
@ -282,11 +298,12 @@
|
|||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</subsection>
|
||||
<subsection name="JavaCC">
|
||||
</section>
|
||||
<section id="JavaCC">
|
||||
<title>JavaCC</title>
|
||||
<table>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
URL
|
||||
</th>
|
||||
<td>
|
||||
|
@ -296,7 +313,7 @@
|
|||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>
|
||||
<th width="%1">
|
||||
author
|
||||
</th>
|
||||
<td>
|
||||
|
@ -304,7 +321,7 @@
|
|||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</subsection>
|
||||
</section>
|
||||
</section>
|
||||
</body>
|
||||
</document>
|
|
@ -1,12 +1,16 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<header>
|
||||
<title>
|
||||
Apache Lucene - Building and Installing the Basic Demo
|
||||
</title>
|
||||
</header>
|
||||
<properties>
|
||||
<author email="acoliver@apache.org">Andrew C. Oliver</author>
|
||||
<title>Apache Lucene - Building and Installing the Basic Demo</title>
|
||||
</properties>
|
||||
<body>
|
||||
|
||||
<section name="About this Document">
|
||||
<section id="About this Document"><title>About this Document</title>
|
||||
<p>
|
||||
This document is intended as a "getting started" guide to using and running the Lucene demos.
|
||||
It walks you through some basic installation and configuration.
|
||||
|
@ -14,14 +18,14 @@ It walks you through some basic installation and configuration.
|
|||
</section>
|
||||
|
||||
|
||||
<section name="About the Demos">
|
||||
<section id="About the Demos"><title>About the Demos</title>
|
||||
<p>
|
||||
The Lucene command-line demo code consists of two applications that demonstrate various
|
||||
functionalities of Lucene and how one should go about adding Lucene to their applications.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Setting your CLASSPATH">
|
||||
<section id="Setting your CLASSPATH"><title>Setting your CLASSPATH</title>
|
||||
<p>
|
||||
First, you should <a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">download</a> the
|
||||
latest Lucene distribution and then extract it to a working directory. Alternatively, you can <a
|
||||
|
@ -37,7 +41,7 @@ successfully). Put both of these files in your Java CLASSPATH.
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Indexing Files">
|
||||
<section id="Indexing Files"><title>Indexing Files</title>
|
||||
<p>
|
||||
Once you've gotten this far you're probably itching to go. Let's <b>build an index!</b> Assuming
|
||||
you've set your CLASSPATH correctly, just type:
|
||||
|
@ -63,7 +67,7 @@ you whether you want more results.
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="About the code...">
|
||||
<section id="About the code..."><title>About the code...</title>
|
||||
<p>
|
||||
<a href="demo2.html">read on>>></a>
|
||||
</p>
|
|
@ -1,12 +1,16 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<header>
|
||||
<title>
|
||||
Apache Lucene - Basic Demo Sources Walk-through
|
||||
</title>
|
||||
</header>
|
||||
<properties>
|
||||
<author email="acoliver@apache.org">Andrew C. Oliver</author>
|
||||
<title>Apache Lucene - Basic Demo Sources Walk-through</title>
|
||||
</properties>
|
||||
<body>
|
||||
|
||||
<section name="About the Code">
|
||||
<section id="About the Code"><title>About the Code</title>
|
||||
<p>
|
||||
In this section we walk through the sources behind the command-line Lucene demo: where to find them,
|
||||
their parts and their function. This section is intended for Java developers wishing to understand
|
||||
|
@ -15,7 +19,7 @@ how to use Lucene in their applications.
|
|||
</section>
|
||||
|
||||
|
||||
<section name="Location of the source">
|
||||
<section id="Location of the source"><title>Location of the source</title>
|
||||
|
||||
<p>
|
||||
Relative to the directory created when you extracted Lucene or retrieved it from Subversion, you
|
||||
|
@ -31,7 +35,7 @@ Bring it up in <code>vi</code> or your editor of choice and let's take a look at
|
|||
|
||||
</section>
|
||||
|
||||
<section name="IndexFiles">
|
||||
<section id="IndexFiles"><title>IndexFiles</title>
|
||||
|
||||
<p>
|
||||
As we discussed in the previous walk-through, the <code><a
|
||||
|
@ -95,7 +99,7 @@ complex but builds upon this example.
|
|||
|
||||
</section>
|
||||
|
||||
<section name="Searching Files">
|
||||
<section id="Searching Files"><title>Searching Files</title>
|
||||
|
||||
<p>
|
||||
The <code><a href="api/org/apache/lucene/demo/SearchFiles.html">SearchFiles</a></code> class is
|
||||
|
@ -122,7 +126,7 @@ displayed to the user.
|
|||
|
||||
</section>
|
||||
|
||||
<section name="The Web example...">
|
||||
<section id="The Web example..."><title>The Web example...</title>
|
||||
|
||||
<p>
|
||||
<a href="demo3.html">read on>>></a>
|
|
@ -1,13 +1,17 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<document>
|
||||
<header>
|
||||
<title>
|
||||
Apache Lucene - Building and Installing the Basic Demo
|
||||
</title>
|
||||
</header>
|
||||
<properties>
|
||||
<author email="acoliver@apache.org">Andrew C. Oliver</author>
|
||||
<title>Apache Lucene - Building and Installing the Basic Demo</title>
|
||||
</properties>
|
||||
<body>
|
||||
|
||||
<section name="About this Document">
|
||||
<section id="About this Document"><title>About this Document</title>
|
||||
<p>
|
||||
This document is intended as a "getting started" guide to installing and running the Lucene
|
||||
web application demo. This guide assumes that you have read the information in the previous two
|
||||
|
@ -17,7 +21,7 @@ container, but you may have to adapt them appropriately.
|
|||
</section>
|
||||
|
||||
|
||||
<section name="About the Demos">
|
||||
<section id="About the Demos"><title>About the Demos</title>
|
||||
<p>
|
||||
The Lucene Web Application demo is a template web application intended for deployment on Tomcat or a
|
||||
similar web container. It's NOT designed as a "best practices" implementation by ANY means. It's
|
||||
|
@ -27,7 +31,7 @@ in Tomcat or a similar application server.
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Indexing Files">
|
||||
<section id="Indexing Files"><title>Indexing Files</title>
|
||||
<p> Once you've gotten this far you're probably itching to go. Let's start by creating the index
|
||||
you'll need for the web examples. Since you've already set your CLASSPATH in the previous examples,
|
||||
all you need to do is type:
|
||||
|
@ -44,13 +48,13 @@ outside of a web accessible context. By default the webapp is configured to loo
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Deploying the Demos">
|
||||
<section id="Deploying the Demos"><title>Deploying the Demos</title>
|
||||
<p>Located in your distribution directory you should see a war file called
|
||||
<code>luceneweb.war</code>. If you're working with a Subversion checkout, this will be under the
|
||||
<code>build</code> subdirectory. Copy this to your <code>{tomcat-home}/webapps</code> directory.
|
||||
You may need to restart Tomcat. </p> </section>
|
||||
|
||||
<section name="Configuration">
|
||||
<section id="Configuration"><title>Configuration</title>
|
||||
<p> From your Tomcat directory look in the <code>webapps/luceneweb</code> subdirectory. If it's not
|
||||
present, try browsing to <code>http://localhost:8080/luceneweb</code> (which causes Tomcat to deploy
|
||||
the webapp), then look again. Edit a file called <code>configuration.jsp</code>. Ensure that the
|
||||
|
@ -63,7 +67,7 @@ war file).
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Running the Demos">
|
||||
<section id="Running the Demos"><title>Running the Demos</title>
|
||||
<p>Now you're ready to roll. In your browser set the url to
|
||||
<code>http://localhost:8080/luceneweb</code> enter <code>test</code> and the number of items per
|
||||
page and press search.</p>
|
||||
|
@ -74,7 +78,7 @@ path in <code>configuration.jsp</code> incorrectly or Tomcat doesn't have permis
|
|||
per page you set and results returned, there may be a link at the bottom that says <b>More
|
||||
Results>></b>; clicking it takes you to subsequent pages. </p> </section>
|
||||
|
||||
<section name="About the code...">
|
||||
<section id="About the code..."><title>About the code...</title>
|
||||
<p>
|
||||
If you want to know more about how this web app works or how to customize it then <a
|
||||
href="demo4.html">read on>>></a>.
|
|
@ -1,12 +1,16 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<header>
|
||||
<title>
|
||||
Apache Lucene - Basic Demo Sources Walkthrough
|
||||
</title>
|
||||
</header>
|
||||
<properties>
|
||||
<author email="acoliver@apache.org">Andrew C. Oliver</author>
|
||||
<title>Apache Lucene - Basic Demo Sources Walkthrough</title>
|
||||
</properties>
|
||||
<body>
|
||||
|
||||
<section name="About the Code">
|
||||
<section id="About the Code"><title>About the Code</title>
|
||||
<p>
|
||||
In this section we walk through the sources behind the basic Lucene Web Application demo: where to
|
||||
find them, their parts and their function. This section is intended for Java developers wishing to
|
||||
|
@ -16,7 +20,7 @@ applications based on Lucene.
|
|||
</section>
|
||||
|
||||
|
||||
<section name="Location of the source (developers/deployers)">
|
||||
<section id="Location of the source (developers/deployers)"><title>Location of the source (developers/deployers)</title>
|
||||
<p>
|
||||
Relative to the directory created when you extracted Lucene or retrieved it from Subversion, you
|
||||
should see a directory called <code>src</code> which in turn contains a directory called
|
||||
|
@ -28,7 +32,7 @@ choice.
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="index.jsp (developers/deployers)">
|
||||
<section id="index.jsp (developers/deployers)"><title>index.jsp (developers/deployers)</title>
|
||||
<p>
|
||||
This jsp page is pretty boring by itself. All it does is include a header, display a form and
|
||||
include a footer. If you look at the form, it has two fields: <code>query</code> (where you enter
|
||||
|
@ -39,7 +43,7 @@ file. You could simply change the header and footer. Let's look at the <code>h
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="header.jsp (developers/deployers)">
|
||||
<section id="header.jsp (developers/deployers)"><title>header.jsp (developers/deployers)</title>
|
||||
<p>
|
||||
The header is also very simple by itself. The only thing it does is include the
|
||||
<code>configuration.jsp</code> (which you looked at in the last section of this guide) and set the
|
||||
|
@ -49,7 +53,7 @@ Let's look at the <code>results.jsp</code>, the meat of this application, next.
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="results.jsp (developers)">
|
||||
<section id="results.jsp (developers)"><title>results.jsp (developers)</title>
|
||||
<p>
|
||||
Most of the functionality lies in <code>results.jsp</code>. Much of it is for paging the search
|
||||
results, which we'll not cover here as it's commented well enough. The first thing in this page is
|
||||
|
@ -108,7 +112,7 @@ share them across search requests, instead of re-instantiating per search reques
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="More sources (developers)">
|
||||
<section id="More sources (developers)"><title>More sources (developers)</title>
|
||||
<p>
|
||||
There are additional sources used by the web app that were not specifically covered by either
|
||||
walkthrough. For example the HTML parser, the <code><a
|
||||
|
@ -120,7 +124,7 @@ started" with Lucene.
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Where to go from here? (everyone!)">
|
||||
<section id="Where to go from here? (everyone!)"><title>Where to go from here? (everyone!)</title>
|
||||
<p>
|
||||
There are a number of things this demo doesn't do or doesn't do quite right. For instance, you may
|
||||
have noticed that documents in the root context are unreachable (unless you reconfigure Tomcat to
|
||||
|
@ -139,7 +143,7 @@ Users' or Developers' <a href="mailinglists.html">mailing lists</a>!
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="When to contact the Author">
|
||||
<section id="When to contact the Author"><title>When to contact the Author</title>
|
||||
<p>
|
||||
Please resist the urge to contact the authors of this document (without bribes of fame and fortune
|
||||
attached). First contact the <a href="mailinglists.html">mailing lists</a>, taking care to <a
|
|
@ -1,15 +1,15 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<properties>
|
||||
<title>Features</title>
|
||||
</properties>
|
||||
<header>
|
||||
<title>Apache Lucene - Features</title>
|
||||
</header>
|
||||
<body>
|
||||
|
||||
<section name="Features">
|
||||
<section id="Features"><title>Features</title>
|
||||
<p>Lucene offers powerful features through a simple API:</p>
|
||||
</section>
|
||||
|
||||
<section name="Scalable, High-Performance Indexing">
|
||||
<section id="Scalable, High-Performance Indexing"><title>Scalable, High-Performance Indexing</title>
|
||||
<ul>
|
||||
<li>over 20MB/minute on Pentium M 1.5GHz<br/></li>
|
||||
<li>small RAM requirements -- only 1MB heap</li>
|
||||
|
@ -18,7 +18,7 @@
|
|||
</ul>
|
||||
</section>
|
||||
|
||||
<section name="Powerful, Accurate and Efficient Search Algorithms">
|
||||
<section id="Powerful, Accurate and Efficient Search Algorithms"><title>Powerful, Accurate and Efficient Search Algorithms</title>
|
||||
<ul>
|
||||
<li>ranked searching -- best results returned first</li>
|
||||
<li>many powerful query types: phrase queries, wildcard queries, proximity
|
||||
|
@ -31,13 +31,13 @@
|
|||
</ul>
|
||||
</section>
|
||||
|
||||
<section name="Cross-Platform Solution">
|
||||
<section id="Cross-Platform Solution"><title>Cross-Platform Solution</title>
|
||||
<ul>
|
||||
<li>Available as Open Source software under the
|
||||
<a href="http://www.apache.org/licenses/LICENSE-2.0.html">Apache License</a>
|
||||
which lets you use Lucene in both commercial and Open Source programs</li>
|
||||
<li>100%-pure Java</li>
|
||||
<li>implementations <a href="http://wiki.apache.org/jakarta-lucene/LuceneImplementations">in other
|
||||
<li>Implementations <a href="http://wiki.apache.org/jakarta-lucene/LuceneImplementations">in other
|
||||
programming languages available</a> that are index-compatible</li>
|
||||
</ul>
|
||||
</section>
|
|
@ -1,20 +1,24 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<document>
|
||||
|
||||
<header>
|
||||
<title>
|
||||
Apache Lucene - Index File Formats
|
||||
</title>
|
||||
</header>
|
||||
<properties>
|
||||
<title>Index File Formats</title>
|
||||
|
||||
<authors>
|
||||
<person email="cutting@apache.org" name="Doug Cutting"/>
|
||||
</authors>
|
||||
</properties>
|
||||
|
||||
<body>
|
||||
<section name="Index File Formats">
|
||||
|
||||
<section id="Index File Formats">
|
||||
<title>Index File Formats</title>
|
||||
<p>
|
||||
This document defines the index file formats used
|
||||
in Lucene version 2.1. If you are using a different
|
||||
in Lucene version 2.0. If you are using a different
|
||||
version of Lucene, please consult the copy of
|
||||
<code>docs/fileformats.html</code> that was distributed
|
||||
with the version you are using.
|
||||
|
@ -43,22 +47,10 @@
|
|||
describing how file formats have changed from prior versions.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
In version 2.1, the file format was changed to allow
|
||||
lock-less commits (ie, no more commit lock). The
|
||||
change is fully backwards compatible: you can open a
|
||||
pre-2.1 index for searching or adding/deleting of
|
||||
docs. When the new segments file is saved
|
||||
(committed), it will be written in the new file format
|
||||
(meaning no specific "upgrade" process is needed).
|
||||
But note that once a commit has occurred, pre-2.1
|
||||
Lucene will not be able to read the index.
|
||||
</p>
|
||||
|
||||
</section>
|
||||
|
||||
<section name="Definitions">
|
||||
|
||||
<section id="Definitions">
|
||||
<title>Definitions</title>
|
||||
<p>
|
||||
The fundamental concepts in Lucene are index,
|
||||
document, field and term.
|
||||
|
@ -94,8 +86,8 @@
|
|||
within the field.
|
||||
</p>
|
||||
|
||||
<subsection name="Inverted Indexing">
|
||||
|
||||
<section id="Inverted Indexing">
|
||||
<title>Inverted Indexing</title>
|
||||
<p>
|
||||
The index stores statistics about terms in order
|
||||
to make term-based search more efficient. Lucene's
|
||||
|
@ -104,9 +96,9 @@
|
|||
it. This is the inverse of the natural relationship, in which
|
||||
documents list terms.
|
||||
</p>
|
||||
</subsection>
|
||||
<subsection name="Types of Fields">
|
||||
|
||||
</section>
|
||||
<section id="Types of Fields">
|
||||
<title>Types of Fields</title>
|
||||
<p>
|
||||
In Lucene, fields may be <i>stored</i>, in which
|
||||
case their text is stored in the index literally, in a non-inverted
|
||||
|
@ -120,10 +112,10 @@
|
|||
to be indexed literally.
|
||||
</p>
|
||||
<p>See the <a href="http://lucene.apache.org/java/docs/api/org/apache/lucene/document/Field.html">Field</a> java docs for more information on Fields.</p>
|
||||
</subsection>
|
||||
|
||||
<subsection name="Segments">
|
||||
</section>
|
||||
|
||||
<section id="Segments">
|
||||
<title>Segments</title>
|
||||
<p>
|
||||
Lucene indexes may be composed of multiple sub-indexes, or<i>
|
||||
segments</i>. Each segment is a fully independent index, which could be searched
|
||||
|
@ -141,10 +133,10 @@
|
|||
Searches may involve multiple segments and/or multiple indexes, each
|
||||
index potentially composed of a set of segments.
|
||||
</p>
|
||||
</subsection>
|
||||
|
||||
<subsection name="Document Numbers">
|
||||
</section>
|
||||
|
||||
<section id="Document Numbers">
|
||||
<title>Document Numbers</title>
|
||||
<p>
|
||||
Internally, Lucene refers to documents by an integer <i>document
|
||||
number</i>. The first document added to an index is numbered zero, and each
|
||||
|
@ -191,12 +183,12 @@
|
|||
</li>
|
||||
</ul>
|
||||
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
</section>
|
||||
|
||||
<section name="Overview">
|
||||
|
||||
<section id="Overview">
|
||||
<title>Overview</title>
|
||||
<p>
|
||||
Each segment index maintains the following:
|
||||
</p>
|
||||
|
@ -257,8 +249,8 @@
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="File Naming">
|
||||
|
||||
<section id="File Naming">
|
||||
<title>File Naming</title>
|
||||
<p>
|
||||
All files belonging to a segment have the same name with varying
|
||||
extensions. The extensions correspond to the different file formats
|
||||
|
@ -272,24 +264,12 @@
|
|||
required.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
As of version 2.1 (lock-less commits), file names are
|
||||
never re-used (there is one exception, "segments.gen",
|
||||
see below). That is, when any file is saved to the
|
||||
Directory it is given a never before used filename.
|
||||
This is achieved using a simple generations approach.
|
||||
For example, the first segments file is segments_1,
|
||||
then segments_2, etc. The generation is a sequential
|
||||
long integer represented in alpha-numeric (base 36)
|
||||
form.
|
||||
</p>
|
||||
|
||||
</section>
|
||||
|
||||
<section name="Primitive Types">
|
||||
|
||||
<subsection name="Byte">
|
||||
|
||||
<section id="Primitive Types">
|
||||
<title>Primitive Types</title>
|
||||
<section id="Byte">
|
||||
<title>Byte</title>
|
||||
<p>
|
||||
The most primitive type
|
||||
is an eight-bit byte. Files are accessed as sequences of bytes. All
|
||||
|
@ -297,10 +277,10 @@
|
|||
of bytes, so file formats are byte-order independent.
|
||||
</p>
|
||||
|
||||
</subsection>
|
||||
|
||||
<subsection name="UInt32">
|
||||
</section>
|
||||
|
||||
<section id="UInt32">
|
||||
<title>UInt32</title>
|
||||
<p>
|
||||
32-bit unsigned integers are written as four
|
||||
bytes, high-order bytes first.
|
||||
|
@ -309,10 +289,10 @@
|
|||
UInt32 --> <Byte><sup>4</sup>
|
||||
</p>
|
||||
|
||||
</subsection>
|
||||
|
||||
<subsection name="Uint64">
|
||||
</section>
|
||||
|
||||
<section id="Uint64">
|
||||
<title>Uint64</title>
|
||||
<p>
|
||||
64-bit unsigned integers are written as eight
|
||||
bytes, high-order bytes first.
|
||||
|
@ -321,10 +301,10 @@
|
|||
<p>UInt64 --> <Byte><sup>8</sup>
|
||||
</p>
|
||||
|
||||
</subsection>
|
||||
|
||||
<subsection name="VInt">
|
||||
</section>
|
||||
|
||||
<section id="VInt">
|
||||
<title>VInt</title>
|
||||
<p>
|
||||
A variable-length format for positive integers is
|
||||
defined where the high-order bit of each byte indicates whether more
|
||||
|
@ -681,10 +661,10 @@
|
|||
efficient to decode.
|
||||
</p>
|
||||
|
||||
</subsection>
|
||||
|
||||
<subsection name="Chars">
|
||||
</section>
|
||||
|
||||
<section id="Chars">
|
||||
<title>Chars</title>
|
||||
<p>
|
||||
Lucene writes unicode
|
||||
character sequences using Java's
|
||||
|
@ -693,10 +673,10 @@
|
|||
</p>
|
||||
|
||||
|
||||
</subsection>
|
||||
|
||||
<subsection name="String">
|
||||
</section>
|
||||
|
||||
<section id="String">
|
||||
<title>String</title>
|
||||
<p>
|
||||
Lucene writes strings as a VInt representing the length, followed by
|
||||
the character data.
|
||||
|
@ -706,62 +686,36 @@
|
|||
String --> VInt, Chars
|
||||
</p>
|
||||
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
</section>
|
||||
|
||||
<section name="Per-Index Files">
|
||||
|
||||
<section id="Per-Index Files">
|
||||
<title>Per-Index Files</title>
|
||||
<p>
|
||||
The files in this section exist one-per-index.
|
||||
</p>
|
||||
|
||||
<subsection name="Segments File">
|
||||
|
||||
<section id="Segments File">
|
||||
<title>Segments File</title>
|
||||
<p>
|
||||
The active segments in the index are stored in the
|
||||
segment info file, <tt>segments_N</tt>. There may
|
||||
be one or more <tt>segments_N</tt> files in the
|
||||
index; however, the one with the largest
|
||||
generation is the active one (when older
|
||||
segments_N files are present it's because they
|
||||
temporarily cannot be deleted, or, a writer is in
|
||||
the process of committing). This file lists each
|
||||
segment by name, has details about the separate
|
||||
norms and deletion files, and also contains the
|
||||
size of each segment.
|
||||
segment info file. An index only has
|
||||
a single file in this format, and it is named "segments".
|
||||
This lists each segment by name, and also contains the size of each
|
||||
segment.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
As of 2.1, there is also a file
|
||||
<tt>segments.gen</tt>. This file contains the
|
||||
current generation (the <tt>_N</tt> in
|
||||
<tt>segments_N</tt>) of the index. This is
|
||||
used only as a fallback in case the current
|
||||
generation cannot be accurately determined by
|
||||
directory listing alone (as is the case for some
|
||||
NFS clients with time-based directory cache
|
||||
expiraation). This file simply contains an Int32
|
||||
version header (SegmentInfos.FORMAT_LOCKLESS =
|
||||
-2), followed by the generation recorded as Int64,
|
||||
written twice.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
<b>Pre-2.1:</b>
|
||||
Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize><sup>SegCount</sup>
|
||||
</p>
|
||||
<p>
|
||||
<b>2.1 and above:</b>
|
||||
Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, NumField, NormGen<sup>NumField</sup> ><sup>SegCount</sup>, IsCompoundFile
|
||||
|
||||
<p>
|
||||
Format, NameCounter, SegCount, SegSize --> UInt32
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Format, NameCounter, SegCount, SegSize, NumField --> Int32
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Version, DelGen, NormGen --> Int64
|
||||
Version --> UInt64
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
@ -769,11 +723,7 @@
|
|||
</p>
|
||||
|
||||
<p>
|
||||
IsCompoundFile --> Int8
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Format is -1 as of Lucene 1.4 and -2 as of Lucene 2.1.
|
||||
Format is -1 in Lucene 1.4.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
@ -794,83 +744,69 @@
|
|||
SegSize is the number of documents contained in the segment index.
|
||||
</p>
|
||||
|
||||
|
||||
</section>
|
||||
|
||||
<section id="Lock Files">
|
||||
<title>Lock Files</title>
|
||||
<p>
|
||||
DelGen is the generation count of the separate
|
||||
deletes file. If this is -1, there are no
|
||||
separate deletes. If it is 0, this is a pre-2.1
|
||||
segment and you must check filesystem for the
|
||||
existence of _X.del. Anything above zero means
|
||||
there are separate deletes (_X_N.del).
|
||||
</p>
|
||||
|
||||
<p>
|
||||
NumField is the size of the array for NormGen, or
|
||||
-1 if there are no NormGens stored.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
NormGen records the generation of the separate
|
||||
norms files. If NumField is -1, there are no
|
||||
normGens stored and they are all assumed to be 0
|
||||
when the segment file was written pre-2.1 and all
|
||||
assumed to be -1 when the segments file is 2.1 or
|
||||
above. The generation then has the same meaning
|
||||
as delGen (above).
|
||||
</p>
|
||||
|
||||
<p>
|
||||
IsCompoundFile records whether the segment is
|
||||
written as a compound file or not. If this is -1,
|
||||
the segment is not a compound file. If it is 1,
|
||||
the segment is a compound file. Else it is 0,
|
||||
which means we check filesystem to see if _X.cfs
|
||||
exists.
|
||||
</p>
|
||||
|
||||
|
||||
</subsection>
|
||||
|
||||
<subsection name="Lock File">
|
||||
|
||||
<p>
|
||||
A write lock is used to indicate that another
|
||||
process is writing to the index. Note that this file is not
|
||||
Several files are used to indicate that another
|
||||
process is using an index. Note that these files are not
|
||||
stored in the index directory itself, but rather in the
|
||||
system's temporary directory, as indicated in the Java
|
||||
system property "java.io.tmpdir".
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<p>
|
||||
When a file named "commit.lock"
|
||||
is present, a process is currently re-writing the "segments"
|
||||
file and deleting outdated segment index files, or a process is
|
||||
reading the "segments"
|
||||
file and opening the files of the segments it names. This lock file
|
||||
prevents files from being deleted by another process after a process
|
||||
has read the "segments"
|
||||
file but before it has managed to open all of the files of the
|
||||
segments named therein.
|
||||
</p>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<p>
|
||||
When a file named "write.lock"
|
||||
is present, a process is currently adding documents to an index, or
|
||||
removing files from that index. This lock file prevents several
|
||||
processes from attempting to modify an index at the same time.
|
||||
</p>
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<section id="Deletable File">
|
||||
<title>Deletable File</title>
|
||||
<p>
|
||||
The write lock is named "XXXX-write.lock" where
|
||||
XXXX is typically a unique prefix computed by the
|
||||
directory path to the index. When this file is
|
||||
present, a process is currently adding documents
|
||||
to an index, or removing files from that index.
|
||||
This lock file prevents several processes from
|
||||
attempting to modify an index at the same time.
|
||||
A file named "deletable"
|
||||
contains the names of files that are no longer used by the index, but
|
||||
which could not be deleted. This is only used on Win32, where a
|
||||
file may not be deleted while it is still open. On other platforms
|
||||
the file contains only null bytes.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Note that prior to version 2.1, Lucene also used a
|
||||
commit lock. This was removed in 2.1.
|
||||
</p>
|
||||
|
||||
</subsection>
|
||||
|
||||
<subsection name="Deletable File">
|
||||
|
||||
<p>
|
||||
Prior to Lucene 2.1 there was a file "deletable"
|
||||
that contained details about files that need to be
|
||||
deleted. As of 2.1, a writer dynamically computes
|
||||
the files that are deletable, instead, so no file
|
||||
is written.
|
||||
Deletable --> DeletableCount,
|
||||
<DelableName><sup>DeletableCount</sup>
|
||||
</p>
|
||||
|
||||
</subsection>
|
||||
<p>DeletableCount --> UInt32
|
||||
</p>
|
||||
<p>DeletableName -->
|
||||
String
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<subsection name="Compound Files">
|
||||
|
||||
<section id="Compound Files">
|
||||
<title>Compound Files</title>
|
||||
<p>Starting with Lucene 1.4 the compound file format became default. This
|
||||
is simply a container for all files described in the next section.</p>
|
||||
|
||||
|
@ -886,17 +822,18 @@
|
|||
<p>FileData --> raw file data</p>
|
||||
<p>The raw file data is the data from the individual files named above.</p>
|
||||
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
</section>
|
||||
|
||||
<section name="Per-Segment Files">
|
||||
|
||||
<section id="Per-Segment Files">
|
||||
<title>Per-Segment Files</title>
|
||||
<p>
|
||||
The remaining files are all per-segment, and are
|
||||
thus defined by suffix.
|
||||
</p>
|
||||
<subsection name="Fields">
|
||||
<section id="Fields">
|
||||
<title>Fields</title>
|
||||
<p><br/><b>Field Info</b><br/></p>
|
||||
|
||||
<p>
|
||||
|
@ -1039,9 +976,9 @@
|
|||
</li>
|
||||
</ol>
|
||||
|
||||
</subsection>
|
||||
<subsection name="Term Dictionary">
|
||||
|
||||
</section>
|
||||
<section id="Term Dictionary">
|
||||
<title>Term Dictionary</title>
|
||||
<p>
|
||||
The term dictionary is represented as two files:
|
||||
</p>
|
||||
|
@ -1173,10 +1110,10 @@
|
|||
accelerable cases.</p>
|
||||
</li>
|
||||
</ol>
|
||||
</subsection>
|
||||
|
||||
<subsection name="Frequencies">
|
||||
</section>
|
||||
|
||||
<section id="Frequencies">
|
||||
<title>Frequencies</title>
|
||||
<p>
|
||||
The .frq file contains the lists of documents
|
||||
which contain each term, along with the frequency of the term in that
|
||||
|
@ -1245,9 +1182,9 @@
|
|||
bytes after that that the 32<sup>nd</sup> starts.
|
||||
</p>
|
||||
|
||||
</subsection>
|
||||
<subsection name="Positions">
|
||||
|
||||
</section>
|
||||
<section id="Positions">
|
||||
<title>Positions</title>
|
||||
<p>
|
||||
The .prx file contains the lists of positions that
|
||||
each term occurs at within documents.
|
||||
|
@ -1285,8 +1222,9 @@
|
|||
<p> 4,
|
||||
5, 4
|
||||
</p>
|
||||
</subsection>
|
||||
<subsection name="Normalization Factors">
|
||||
</section>
|
||||
<section id="Normalization Factors">
|
||||
<title>Normalization Factors</title>
|
||||
<p>There's a norm file for each indexed field with a byte for
|
||||
each document. The .f[0-9]* file contains,
|
||||
for each document, a byte that encodes a value that is multiplied
|
||||
|
@ -1326,8 +1264,9 @@
|
|||
</li>
|
||||
</ol>
|
||||
|
||||
</subsection>
|
||||
<subsection name="Term Vectors">
|
||||
</section>
|
||||
<section id="Term Vectors">
|
||||
<title>Term Vectors</title>
|
||||
Term Vector support is an optional on a field by field basis. It consists of 4
|
||||
files.
|
||||
<ol>
|
||||
|
@ -1380,9 +1319,10 @@
|
|||
</p>
|
||||
</li>
|
||||
</ol>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
<subsection name="Deleted Documents">
|
||||
<section id="Deleted Documents">
|
||||
<title>Deleted Documents</title>
|
||||
|
||||
<p>The .del file is
|
||||
optional, and only exists when a segment contains deletions:
|
||||
|
@ -1417,11 +1357,11 @@
|
|||
Bits contains two bytes, 0x00 and 0x02, then document 9 is marked as
|
||||
deleted.
|
||||
</p>
|
||||
</subsection>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
<section name="Limitations">
|
||||
|
||||
<section id="Limitations">
|
||||
<title>Limitations</title>
|
||||
<p>There
|
||||
are a few places where these file formats limit the maximum number of
|
||||
terms and documents to a 32-bit quantity, or to approximately 4
|
|
@ -1,12 +1,17 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<header>
|
||||
<title>
|
||||
Apache Lucene - Getting Started Guide
|
||||
</title>
|
||||
</header>
|
||||
<properties>
|
||||
<author email="acoliver@apache.org">Andrew C. Oliver</author>
|
||||
<title>Apache Lucene - Getting Started Guide</title>
|
||||
</properties>
|
||||
<body>
|
||||
|
||||
<section name="Getting Started">
|
||||
<section id="Getting Started">
|
||||
<title>Getting Started</title>
|
||||
<p>
|
||||
This document is intended as a "getting started" guide. It has three audiences: first-time users
|
||||
looking to install Apache Lucene in their application or web server; developers looking to modify or base
|
Before Width: | Height: | Size: 7.1 KiB After Width: | Height: | Size: 7.1 KiB |
After Width: | Height: | Size: 3.6 KiB |
Before Width: | Height: | Size: 43 KiB After Width: | Height: | Size: 43 KiB |
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 24 KiB |
Before Width: | Height: | Size: 4.3 KiB After Width: | Height: | Size: 4.3 KiB |
Before Width: | Height: | Size: 696 B After Width: | Height: | Size: 696 B |
Before Width: | Height: | Size: 1.1 KiB After Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 1.4 KiB After Width: | Height: | Size: 1.4 KiB |
Before Width: | Height: | Size: 1.8 KiB After Width: | Height: | Size: 1.8 KiB |
Before Width: | Height: | Size: 2.1 KiB After Width: | Height: | Size: 2.1 KiB |
Before Width: | Height: | Size: 515 B After Width: | Height: | Size: 515 B |
Before Width: | Height: | Size: 895 B After Width: | Height: | Size: 895 B |
Before Width: | Height: | Size: 1.2 KiB After Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 1.5 KiB After Width: | Height: | Size: 1.5 KiB |
Before Width: | Height: | Size: 1.8 KiB After Width: | Height: | Size: 1.8 KiB |
|
@ -0,0 +1,186 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<header>
|
||||
<title>
|
||||
Apache Lucene - Overview
|
||||
</title>
|
||||
</header>
|
||||
<properties>
|
||||
<author email="jon at latchkey.com">Jon S. Stevens</author>
|
||||
<author email="husted at apache.org">Ted Husted</author>
|
||||
<author email="cutting at apache.org">Doug Cutting</author>
|
||||
<author email="carlson at apache.org">Peter Carlson</author>
|
||||
</properties>
|
||||
<body>
|
||||
<section id="Apache Lucene">
|
||||
<title>Apache Lucene</title>
|
||||
<p>
|
||||
Apache Lucene is a high-performance, full-featured text search engine
|
||||
library written entirely in Java. It is a technology suitable for nearly any
|
||||
application that requires full-text search, especially cross-platform.
|
||||
</p>
|
||||
<p>
|
||||
Apache Lucene is an open source project available for
|
||||
<a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">free download</a>.
|
||||
Please use the links on the left to access Lucene.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section id="Lucene News">
|
||||
<title>Lucene News</title>
|
||||
<section><title>10 November 2006</title>
|
||||
<p>New <a href="http://forrest.apache.org">Forrest</a> based site released. The Lucene Java website now has a consistent look and feel with it's <a href="http://lucene.apache.org">Lucene</a> siblings.
|
||||
</p>
|
||||
</section>
|
||||
<section>
|
||||
<title>26 May 2006 - Release 2.0.0 available</title>
|
||||
|
||||
<p>This is mostly a bugfix release from release 1.9.1.
|
||||
Note however that deprecated 1.x features have now
|
||||
been removed. Any code that compiles against Lucene
|
||||
1.9.1 without deprecation warnings should work without
|
||||
further changes with any 2.x release. For more
|
||||
information about this release, please read
|
||||
<a
|
||||
href="http://svn.apache.org/repos/asf/lucene/java/tags/lucene_2_0_0/CHANGES.txt">
|
||||
CHANGES.txt</a>
|
||||
.
|
||||
</p>
|
||||
|
||||
<p>Binary and source distributions are
|
||||
available
|
||||
<a
|
||||
href="http://www.apache.org/dyn/closer.cgi/lucene/java/">here</a>
|
||||
.
|
||||
</p>
|
||||
</section>
|
||||
<section>
|
||||
<title>2 March 2006 - Release 1.9.1 available</title>
|
||||
|
||||
<p>This fixes a serious bug in release 1.9-final.
|
||||
<a
|
||||
href="http://svn.apache.org/repos/asf/lucene/java/tags/lucene_1_9_1/CHANGES.txt">
|
||||
CHANGES.txt</a>
|
||||
for details.
|
||||
</p>
|
||||
|
||||
<p>Binary and source distributions are
|
||||
available
|
||||
<a
|
||||
href="http://www.apache.org/dyn/closer.cgi/lucene/java/">here</a>
|
||||
.
|
||||
</p>
|
||||
</section>
|
||||
<section>
|
||||
<title>27 February 2006 - 1.9 final available</title>
|
||||
|
||||
<p>This release has many improvements since release
|
||||
1.4.3, including new features, performance
|
||||
improvements, bug fixes, etc. See
|
||||
<a
|
||||
href="http://svn.apache.org/repos/asf/lucene/java/tags/lucene_1_9_final/CHANGES.txt">
|
||||
CHANGES.txt</a>
|
||||
for details.
|
||||
</p>
|
||||
|
||||
<p>1.9 will be the last 1.x release. It is both
|
||||
back-compatible with 1.4.3 and forward-compatible with
|
||||
the upcoming 2.0 release. Many methods and classes in
|
||||
1.4.3 have been deprecated in 1.9 and will be removed
|
||||
in 2.0. Applications must compile against 1.9 without
|
||||
deprecation warnings before they are compatible with
|
||||
2.0.</p>
|
||||
|
||||
<p>Binary and source distributions are
|
||||
available
|
||||
<a
|
||||
href="http://www.apache.org/dyn/closer.cgi/lucene/java/">here</a>
|
||||
.
|
||||
</p>
|
||||
</section>
|
||||
<section>
|
||||
<title>26 January 2006 - Nightly builds available</title>
|
||||
|
||||
<p>Nightly builds of the current development version of Lucene, to be released as Lucene 1.9,
|
||||
are now available at
|
||||
<a href="http://cvs.apache.org/dist/lucene/java/nightly/">
|
||||
http://cvs.apache.org/dist/lucene/java/nightly/</a>
|
||||
.
|
||||
</p>
|
||||
|
||||
<title>28 October 2005 - Lucene at ApacheCon</title>
|
||||
<p>
|
||||
<a href="http://www.apachecon.com">
|
||||
<img src="http://apachecon.com/2005/US/logos/Conference135x59.jpg"/>
|
||||
</a>
|
||||
</p>
|
||||
<p>Monday, December 12, 2005 at 3pm by Grant Ingersoll:
|
||||
<br/>
|
||||
Abstract:
|
||||
<br/>
|
||||
Lucene is a high performance, scalable, cross-platform search engine that contains many advanced
|
||||
features that often go untapped by the majority of users. In this session, designed for those
|
||||
familiar with Lucene, we will examine some of Lucene's more advanced topics and their application,
|
||||
including:
|
||||
</p>
|
||||
<ol>
|
||||
<li>Term Vectors: Manual and Pseudo relevance feedback; Advanced document collection analysis for
|
||||
domain specialization</li>
|
||||
<li>Span Queries: Better phrase matching; Candidate Identification for Question Answering</li>
|
||||
<li>Tying it all Together: Building a search framework for experimentation and rapid deployment</li>
|
||||
<li>Case Studies from
|
||||
<a href="http://www.cnlp.org">CNLP</a>
|
||||
: Crosslingual/multilingual retrieval in Arabic, English and Dutch;
|
||||
Sublanguage specialization for commercial trouble ticket analysis; Passage retrieval and
|
||||
analysis for Question Answering application
|
||||
</li>
|
||||
</ol>
|
||||
<p>Topics 1 through 3 will provide technical details on implementing the advanced Lucene features, while
|
||||
the fourth topic will provide a broader context for understanding when and where to use these
|
||||
features.
|
||||
</p>
|
||||
</section>
|
||||
<section>
|
||||
<title>14 February 2005 - Lucene moves to Apache top-level</title>
|
||||
|
||||
<p>Lucene has migrated from Apache's Jakarta project to the top-level. Along with this migration,
|
||||
the source code repository has been converted to Subversion. The migration is in progress with
|
||||
some loose ends. Please stay tuned!
|
||||
</p>
|
||||
</section>
|
||||
<section>
|
||||
<title>December 2004 -
|
||||
<em>Lucene in Action</em>
|
||||
is published
|
||||
</title>
|
||||
|
||||
<a href="http://www.lucenebook.com/">
|
||||
<img border="0" align="left"
|
||||
src="images/lia_3d.jpg"/>
|
||||
</a>
|
||||
<p>The first book dedicated solely to Lucene is published. The
|
||||
"search inside the book" feature implemented with Lucene can
|
||||
be seen at
|
||||
<a href="http://www.lucenebook.com/">lucenebook.com</a>
|
||||
.
|
||||
</p>
|
||||
</section>
|
||||
<p style="clear: both;"/>
|
||||
<section>
|
||||
<title>29 November 2004 - Lucene 1.4.3 Released</title>
|
||||
|
||||
<p>This fixes a few bugs in 1.4.2. See
|
||||
<a
|
||||
href="http://svn.apache.org/repos/asf/lucene/java/tags/lucene_1_4_3/CHANGES.txt">
|
||||
CHANGES.txt</a>
|
||||
for details. Binary and source distributions are
|
||||
available
|
||||
<a href="http://www.apache.org/dyn/closer.cgi/lucene/">here</a>
|
||||
. After choosing your mirror, navigate to the archive section via the java link.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
</section>
|
||||
|
||||
</body>
|
||||
</document>
|
|
@ -1,12 +1,16 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<header>
|
||||
<title>
|
||||
Apache Lucene - Lucene Sandbox
|
||||
</title>
|
||||
</header>
|
||||
<properties>
|
||||
<author>Otis Gospodentic</author>
|
||||
<title>Lucene Sandbox</title>
|
||||
</properties>
|
||||
<body>
|
||||
|
||||
<section name="Lucene Sandbox">
|
||||
<section id="Lucene Sandbox"><title>Lucene Sandbox</title>
|
||||
<p>
|
||||
Lucene project also contains a workspace, Lucene Sandbox, that is open to all Lucene committers, as well
|
||||
as a few other developers. The purpose of the Sandbox is to host various third party contributions,
|
||||
|
@ -21,7 +25,7 @@
|
|||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/">http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/</a>.
|
||||
</p>
|
||||
|
||||
<subsection name="Snowball Stemmers for Lucene">
|
||||
<section id="Snowball Stemmers for Lucene"><title>Snowball Stemmers for Lucene</title>
|
||||
<p>
|
||||
This project provides pre-compiled versions of the Snowball stemmers
|
||||
for Lucene.
|
||||
|
@ -36,9 +40,9 @@
|
|||
<a href="http://snowball.tartarus.org/">Background information on Snowball</a>,
|
||||
which is a language for stemmers developed by Martin Porter.
|
||||
</p>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
<subsection name="Analyzers, Tokenizers, Filters">
|
||||
<section id="Analyzers, Tokenizers, Filters"><title>Analyzers, Tokenizers, Filters</title>
|
||||
<p>
|
||||
Contributed Analyzers, Tokenizers, and Filters for various languages.
|
||||
</p>
|
||||
|
@ -47,9 +51,9 @@
|
|||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/analyzers/">The
|
||||
repository for the Analyzers contribution.</a>
|
||||
</p>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
<subsection name="Ant">
|
||||
<section id="Ant"><title>Ant</title>
|
||||
<p>
|
||||
The Ant project is a useful Ant task that creates a Lucene index out of an Ant fileset. It also
|
||||
contains an example HTML parser that uses JTidy.
|
||||
|
@ -58,9 +62,9 @@
|
|||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/ant/">The
|
||||
repository for the Ant contribution.</a>
|
||||
</p>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
<subsection name="WordNet/Synonyms">
|
||||
<section id="WordNet/Synonyms"><title>WordNet/Synonyms</title>
|
||||
<p>
|
||||
The Lucene WordNet code consists of a single class which parses a prolog file
|
||||
from the WordNet site that contains a list of English words and synonyms.
|
||||
|
@ -77,9 +81,9 @@
|
|||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/wordnet/">The
|
||||
repository for the WordNet module.</a>
|
||||
</p>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
<subsection name="Lucli - Lucene Command-line Interface">
|
||||
<section id="Lucli - Lucene Command-line Interface"><title>Lucli - Lucene Command-line Interface</title>
|
||||
<p>
|
||||
The Lucli application allows index manipulation from the
|
||||
command-line.
|
||||
|
@ -89,18 +93,18 @@
|
|||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/lucli/">The
|
||||
repository for the Lucli contribution.</a>
|
||||
</p>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
<subsection name="Term Highlighter">
|
||||
<section id="Term Highlighter"><title>Term Highlighter</title>
|
||||
<p>
|
||||
A small set of classes for highlighting matching terms in
|
||||
search results.
|
||||
</p>
|
||||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/highlighter/">The
|
||||
repository for the Highlighter contribution.</a>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
<subsection name="Javascript Query Constructor">
|
||||
<section id="Javascript Query Constructor"><title>Javascript Query Constructor</title>
|
||||
<p>
|
||||
Javascript library to support client-side query-building. Provides support for a user interface similar to
|
||||
<a href="http://www.google.com.sg/advanced_search">Google's Advanced Search</a>.
|
||||
|
@ -110,9 +114,9 @@
|
|||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/javascript/queryConstructor/">The
|
||||
repository for the Javascript Query Constructor files.</a>
|
||||
</p>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
<subsection name="Javascript Query Validator">
|
||||
<section id="Javascript Query Validator"><title>Javascript Query Validator</title>
|
||||
<p>
|
||||
Javascript library to support client-side query validation. Lucene doesn't like malformed queries and tends to
|
||||
throw ParseException, which are often difficult to interpret and pass on to the user. This library hopes to
|
||||
|
@ -123,9 +127,9 @@
|
|||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/javascript/queryValidator/">The
|
||||
repository for the Javascript Query Validator files.</a>
|
||||
</p>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
<subsection name="High Frequency Terms">
|
||||
<section id="High Frequency Terms"><title>High Frequency Terms</title>
|
||||
<p>
|
||||
The miscellaneous package is for classes that don't fit anywhere else. The only class in it right now determines
|
||||
what terms occur the most inside a Lucene index. This could be useful for analyzing which terms may need to go
|
||||
|
@ -136,7 +140,7 @@
|
|||
<a href="http://svn.apache.org/repos/asf/lucene/java/trunk/contrib/miscellaneous/">The
|
||||
repository for miscellaneous classes.</a>
|
||||
</p>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
</section>
|
||||
|
|
@ -1,10 +1,12 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<properties>
|
||||
<title>Apache Lucene - Mailing Lists</title>
|
||||
</properties>
|
||||
<header>
|
||||
<title>
|
||||
Apache Lucene - Mailing Lists
|
||||
</title>
|
||||
</header>
|
||||
<body>
|
||||
<section name="Java User List">
|
||||
<section id="Java User List"><title>Java User List</title>
|
||||
<p>
|
||||
This list is for users of Java Lucene to ask questions, share knowledge,
|
||||
and discuss issues.
|
||||
|
@ -19,7 +21,7 @@
|
|||
</ul>
|
||||
</section>
|
||||
|
||||
<section name="Java Developer List">
|
||||
<section id="Java Developer List"><title>Java Developer List</title>
|
||||
<p>
|
||||
This is the list where participating developers of the Java Lucene project meet
|
||||
and discuss issues, code changes/additions, etc. Do not send mail to this list
|
||||
|
@ -34,7 +36,7 @@
|
|||
(<a href="http://mail-archives.apache.org/mod_mbox/jakarta-lucene-dev/">old archive</a>)</li>
|
||||
<li><a href="http://www.gossamer-threads.com/lists/lucene/java-dev/">Alternative
|
||||
archive with search feature</a></li>
|
||||
</ul>
|
||||
</ul><br/>
|
||||
Commit notifications:
|
||||
<ul>
|
||||
<li><a href="mailto:java-commits-subscribe@lucene.apache.org">Subscribe</a></li>
|
||||
|
@ -44,10 +46,10 @@
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Lucene4c Developer List">
|
||||
<section id="Lucene4c Developer List"><title>Lucene4c Developer List</title>
|
||||
<p>
|
||||
This is the list where participating developers of the lucene4c
|
||||
project meet and disucss issues related to development of
|
||||
project meet and discuss issues related to development of
|
||||
lucene4c. Do not send mail to this list with usage or
|
||||
configuration questions and problems.
|
||||
</p>
|
||||
|
@ -59,7 +61,7 @@
|
|||
<li><a href="http://mail-archives.apache.org/mod_mbox/lucene-c-dev/">Archive</a></li>
|
||||
<li><a href="http://www.gossamer-threads.com/lists/lucene/c-dev/">Alternative
|
||||
archive with search feature</a></li>
|
||||
</ul>
|
||||
</ul><br/>
|
||||
Commit notifications:
|
||||
<ul>
|
||||
<li><a href="mailto:c-commits-subscribe@lucene.apache.org">Subscribe</a></li>
|
||||
|
@ -69,7 +71,7 @@
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Ruby Developer List">
|
||||
<section id="Ruby Developer List"><title>Ruby Developer List</title>
|
||||
<p>
|
||||
Discussion list for developers of Ruby/SWIG Lucene.
|
||||
</p>
|
||||
|
@ -82,7 +84,7 @@
|
|||
</ul>
|
||||
</section>
|
||||
|
||||
<section name="General Lucene List">
|
||||
<section id="General Lucene List"><title>General Lucene List</title>
|
||||
<p>
|
||||
General discussion concerning all Lucene subprojects.
|
||||
</p>
|
|
@ -1,13 +1,16 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<header>
|
||||
<title>
|
||||
Apache Lucene - Query Parser Syntax
|
||||
</title>
|
||||
</header>
|
||||
<properties>
|
||||
<author email="carlson@apache.org">Peter Carlson</author>
|
||||
<title>
|
||||
Query Parser Syntax - Apache Lucene
|
||||
</title>
|
||||
</properties>
|
||||
<body>
|
||||
<section name="Overview">
|
||||
<section id="Overview">
|
||||
<title>Overview</title>
|
||||
<p>Although Lucene provides the ability to create your own
|
||||
queries through its API, it also provides a rich query
|
||||
language through the Query Parser, a lexer which
|
||||
|
@ -46,7 +49,8 @@
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Terms">
|
||||
<section id="Terms">
|
||||
<title>Terms</title>
|
||||
<p>A query is broken up into terms and operators. There are two types of terms: Single Terms and Phrases.</p>
|
||||
<p>A Single Term is a single word such as "test" or "hello".</p>
|
||||
<p>A Phrase is a group of words surrounded by double quotes such as "hello dolly".</p>
|
||||
|
@ -55,7 +59,8 @@
|
|||
So it is important to choose an analyzer that will not interfere with the terms used in the query string.</p>
|
||||
</section>
|
||||
|
||||
<section name="Fields">
|
||||
<section id="Fields">
|
||||
<title>Fields</title>
|
||||
<p>Lucene supports fielded data. When performing a search you can either specify a field, or use the default field. The field names and default field is implementation specific.</p>
|
||||
<p>You can search any field by typing the field name followed by a colon ":" and then the term you are looking for. </p>
|
||||
<p>As an example, let's assume a Lucene index contains two fields, title and text and text is the default field.
|
||||
|
@ -71,11 +76,12 @@
|
|||
<p>Will only find "Do" in the title field. It will find "it" and "right" in the default field (in this case the text field). </p>
|
||||
</section>
|
||||
|
||||
<section name="Term Modifiers">
|
||||
|
||||
<section id="Term Modifiers">
|
||||
<title>Term Modifiers</title>
|
||||
<p>Lucene supports modifying query terms to provide a wide range of searching options.</p>
|
||||
|
||||
<subsection name="Wildcard Searches">
|
||||
<section id="Wildcard Searches">
|
||||
<title>Wildcard Searches</title>
|
||||
<p>Lucene supports single and multiple character wildcard searches.</p>
|
||||
<p>To perform a single character wildcard search use the "?" symbol.</p>
|
||||
<p>To perform a multiple character wildcard search use the "*" symbol.</p>
|
||||
|
@ -88,10 +94,11 @@
|
|||
<p>You can also use the wildcard searches in the middle of a term.</p>
|
||||
<source>te*t</source>
|
||||
<p>Note: You cannot use a * or ? symbol as the first character of a search.</p>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
|
||||
<subsection name="Fuzzy Searches">
|
||||
<section id="Fuzzy Searches">
|
||||
<title>Fuzzy Searches</title>
|
||||
<p>Lucene supports fuzzy searches based on the Levenshtein Distance, or Edit Distance algorithm. To do a fuzzy search use the tilde, "~", symbol at the end of a Single word Term. For example to search for a term similar in spelling to "roam" use the fuzzy search: </p>
|
||||
|
||||
<source>roam~</source>
|
||||
|
@ -100,17 +107,19 @@
|
|||
<p>Starting with Lucene 1.9 an additional (optional) parameter can specify the required similarity. The value is between 0 and 1, with a value closer to 1 only terms with a higher similarity will be matched. For example:</p>
|
||||
<source>roam~0.8</source>
|
||||
<p>The default that is used if the parameter is not given is 0.5.</p>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
|
||||
<subsection name="Proximity Searches">
|
||||
<section id="Proximity Searches">
|
||||
<title>Proximity Searches</title>
|
||||
<p>Lucene supports finding words are a within a specific distance away. To do a proximity search use the tilde, "~", symbol at the end of a Phrase. For example to search for a "apache" and "jakarta" within 10 words of each other in a document use the search: </p>
|
||||
|
||||
<source>"jakarta apache"~10</source>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
|
||||
<subsection name="Range Searches">
|
||||
<section id="Range Searches">
|
||||
<title>Range Searches</title>
|
||||
<p>Range Queries allow one to match documents whose field(s) values
|
||||
are between the lower and upper bound specified by the Range Query.
|
||||
Range Queries can be inclusive or exclusive of the upper and lower bounds.
|
||||
|
@ -122,10 +131,11 @@
|
|||
<p>This will find all documents whose titles are between Aida and Carmen, but not including Aida and Carmen.</p>
|
||||
<p>Inclusive range queries are denoted by square brackets. Exclusive range queries are denoted by
|
||||
curly brackets.</p>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
|
||||
<subsection name="Boosting a Term">
|
||||
<section id="Boosting a Term">
|
||||
<title>Boosting a Term</title>
|
||||
<p>Lucene provides the relevance level of matching documents based on the terms found. To boost a term use the caret, "^", symbol with a boost factor (a number) at the end of the term you are searching. The higher the boost factor, the more relevant the term will be.</p>
|
||||
<p>Boosting allows you to control the relevance of a document by boosting its term. For example, if you are searching for</p>
|
||||
|
||||
|
@ -137,17 +147,17 @@
|
|||
|
||||
<source>"jakarta apache"^4 "Apache Lucene"</source>
|
||||
<p>By default, the boost factor is 1. Although the boost factor must be positive, it can be less than 1 (e.g. 0.2)</p>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
</section>
|
||||
|
||||
|
||||
<section name="Boolean operators">
|
||||
|
||||
<section id="Boolean operators">
|
||||
<title>Boolean Operators</title>
|
||||
<p>Boolean operators allow terms to be combined through logic operators.
|
||||
Lucene supports AND, "+", OR, NOT and "-" as Boolean operators(Note: Boolean operators must be ALL CAPS).</p>
|
||||
|
||||
<subsection name="OR">
|
||||
<section id="OR">
|
||||
<p>The OR operator is the default conjunction operator. This means that if there is no Boolean operator between two terms, the OR operator is used.
|
||||
The OR operator links two terms and finds a matching document if either of the terms exist in a document. This is equivalent to a union using sets.
|
||||
The symbol || can be used in place of the word OR.</p>
|
||||
|
@ -159,23 +169,26 @@
|
|||
|
||||
<source>"jakarta apache" OR jakarta</source>
|
||||
|
||||
</subsection>
|
||||
<subsection name="AND">
|
||||
</section>
|
||||
<section id="AND">
|
||||
<title>AND</title>
|
||||
<p>The AND operator matches documents where both terms exist anywhere in the text of a single document.
|
||||
This is equivalent to an intersection using sets. The symbol && can be used in place of the word AND.</p>
|
||||
<p>To search for documents that contain "jakarta apache" and "Apache Lucene" use the query: </p>
|
||||
|
||||
<source>"jakarta apache" AND "Apache Lucene"</source>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
<subsection name="+">
|
||||
<section id="+">
|
||||
<title>+</title>
|
||||
<p>The "+" or required operator requires that the term after the "+" symbol exist somewhere in a the field of a single document.</p>
|
||||
<p>To search for documents that must contain "jakarta" and may contain "lucene" use the query:</p>
|
||||
|
||||
<source>+jakarta apache</source>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
<subsection name="NOT">
|
||||
<section id="NOT">
|
||||
<title>NOT</title>
|
||||
<p>The NOT operator excludes documents that contain the term after NOT.
|
||||
This is equivalent to a difference using sets. The symbol ! can be used in place of the word NOT.</p>
|
||||
<p>To search for documents that contain "jakarta apache" but not "Apache Lucene" use the query: </p>
|
||||
|
@ -184,31 +197,35 @@
|
|||
<p>Note: The NOT operator cannot be used with just one term. For example, the following search will return no results:</p>
|
||||
|
||||
<source>NOT "jakarta apache"</source>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
<subsection name="-">
|
||||
<section id="-">
|
||||
<title>-</title>
|
||||
<p>The "-" or prohibit operator excludes documents that contain the term after the "-" symbol.</p>
|
||||
<p>To search for documents that contain "jakarta apache" but not "Apache Lucene" use the query: </p>
|
||||
|
||||
<source>"jakarta apache" -"Apache Lucene"</source>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
</section>
|
||||
|
||||
<section name="Grouping">
|
||||
<section id="Grouping">
|
||||
<title>Grouping</title>
|
||||
<p>Lucene supports using parentheses to group clauses to form sub queries. This can be very useful if you want to control the boolean logic for a query.</p>
|
||||
<p>To search for either "jakarta" or "apache" and "website" use the query:</p>
|
||||
<source>(jakarta OR apache) AND website</source>
|
||||
<p>This eliminates any confusion and makes sure you that website must exist and either term jakarta or apache may exist.</p>
|
||||
</section>
|
||||
|
||||
<section name="Field Grouping">
|
||||
<section id="Field Grouping">
|
||||
<title>Field Grouping</title>
|
||||
<p>Lucene supports using parentheses to group multiple clauses to a single field.</p>
|
||||
<p>To search for a title that contains both the word "return" and the phrase "pink panther" use the query:</p>
|
||||
<source>title:(+return +"pink panther")</source>
|
||||
</section>
|
||||
|
||||
<section name="Escaping Special Characters">
|
||||
<section id="Escaping Special Characters">
|
||||
<title>Escaping Special Characters</title>
|
||||
<p>Lucene supports escaping special characters that are part of the query syntax. The current list special characters are</p>
|
||||
<p>+ - && || ! ( ) { } [ ] ^ " ~ * ? : \</p>
|
||||
<p>To escape these character use the \ before the character. For example to search for (1+1):2 use the query:</p>
|
|
@ -0,0 +1,35 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<header><title>Apache Lucene - Downloads and Releases</title></header>
|
||||
<properties>
|
||||
<author email="gsingers@apache.org">Grant Ingersoll</author>
|
||||
</properties>
|
||||
<body>
|
||||
|
||||
<section id="Downloads"><title>Downloads and Releases</title>
|
||||
<p>Information on Lucene Java Downloads and Releases.</p>
|
||||
<section id="Official"><title>Official Release</title>
|
||||
<p>Official releases are usually created when the <a href="whoweare.html">developers</a> feel there are
|
||||
sufficient changes, improvements and bug fixes to warrant a release.
|
||||
Due to the voluntary nature of Lucene, no releases are scheduled in advance.</p>
|
||||
<p>Both binary and source releases are available for
|
||||
<a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">download from the Apache Mirrors</a></p>
|
||||
</section>
|
||||
<section id="Nightly"><title>Nightly Build Download</title>
|
||||
<p>Nightly builds are based on the trunk version of the code checked into
|
||||
<a href="https://svn.apache.org/repos/asf/lucene/java/trunk">SVN</a></p>
|
||||
<a href="http://people.apache.org/dist/lucene/java/nightly/">Download</a>
|
||||
</section>
|
||||
<section id="source"><title>Source Code</title>
|
||||
<p>Subversion
|
||||
The sourcefiles are now stored using Subversion (see http://subversion.tigris.org/ and http://svnbook.red-bean.com/)
|
||||
</p><p>
|
||||
<code>svn checkout http://svn.apache.org/repos/asf/lucene/java/trunk lucene/java/trunk</code>
|
||||
</p>
|
||||
|
||||
</section>
|
||||
</section>
|
||||
|
||||
|
||||
</body>
|
||||
</document>
|
|
@ -1,12 +1,17 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<header>
|
||||
<title>
|
||||
Apache Lucene - Resources
|
||||
</title>
|
||||
</header>
|
||||
<properties>
|
||||
<author email="cutting@apache.org">Doug Cutting</author>
|
||||
<title>Resources - Apache Lucene</title>
|
||||
</properties>
|
||||
<body>
|
||||
|
||||
<section name="Page moved">
|
||||
<section id="Page moved"><title>Page moved</title>
|
||||
|
||||
<a href="http://wiki.apache.org/jakarta-lucene/Resources">This page is now part of the Wiki</a>
|
||||
|
|
@ -1,14 +1,18 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<document>
|
||||
<header>
|
||||
<title>
|
||||
Apache Lucene - Scoring
|
||||
</title>
|
||||
</header>
|
||||
<properties>
|
||||
<author email="gsingers at apache.org">Grant Ingersoll</author>
|
||||
<title>Scoring - Apache Lucene</title>
|
||||
</properties>
|
||||
|
||||
<body>
|
||||
|
||||
<section name="Introduction">
|
||||
<section id="Introduction"><title>Introduction</title>
|
||||
<p>Lucene scoring is the heart of why we all love Lucene. It is blazingly fast and it hides almost all of the complexity from the user.
|
||||
In a nutshell, it works. At least, that is, until it doesn't work, or doesn't work as one would expect it to
|
||||
work. Then we are left digging into Lucene internals or asking for help on java-user@lucene.apache.org to figure out why a document with five of our query terms
|
||||
|
@ -37,7 +41,7 @@
|
|||
will finish up with some reference material in the <a href="#Appendix">Appendix</a>.
|
||||
</p>
|
||||
</section>
|
||||
<section name="Scoring">
|
||||
<section id="Scoring"><title>Scoring</title>
|
||||
<p>Scoring is very much dependent on the way documents are indexed,
|
||||
so it is important to understand indexing (see
|
||||
<a href="gettingstarted.html">Apache Lucene - Getting Started Guide</a>
|
||||
|
@ -47,7 +51,7 @@
|
|||
<a href="api/org/apache/lucene/search/Searcher.html#explain(Query query, int doc)">Searcher.explain(Query query, int doc)</a> functionality,
|
||||
which can go a long way in informing why a score is returned.
|
||||
</p>
|
||||
<subsection name="Fields and Documents">
|
||||
<section id="Fields and Documents"><title>Fields and Documents</title>
|
||||
<p>In Lucene, the objects we are scoring are
|
||||
<a href="api/org/apache/lucene/document/Document.html">Documents</a>. A Document is a collection
|
||||
of
|
||||
|
@ -60,8 +64,8 @@
|
|||
<a href="api/org/apache/lucene/search/DefaultSimilarity.html">DefaultSimilarity</a>
|
||||
on the Fields).
|
||||
</p>
|
||||
</subsection>
|
||||
<subsection name="Score Boosting">
|
||||
</section>
|
||||
<section id="Score Boosting"><title>Score Boosting</title>
|
||||
<p>Lucene allows influencing search results by "boosting" in more than one level:
|
||||
<ul>
|
||||
<li><b>Document level boosting</b>
|
||||
|
@ -107,8 +111,8 @@
|
|||
as <b>norm(t, d)</b>, as shown by the formula in
|
||||
<a href="api/org/apache/lucene/search/Similarity.html">Similarity</a>.
|
||||
</p>
|
||||
</subsection>
|
||||
<subsection name="Understanding the Scoring Formula">
|
||||
</section>
|
||||
<section id="Understanding the Scoring Formula"><title>Understanding the Scoring Formula</title>
|
||||
|
||||
<p>
|
||||
This scoring formula is described in the
|
||||
|
@ -116,8 +120,8 @@
|
|||
basics of Lucene scoring work, especially the
|
||||
<a href="api/org/apache/lucene/search/TermQuery.html">TermQuery</a>.
|
||||
</p>
|
||||
</subsection>
|
||||
<subsection name="The Big Picture">
|
||||
</section>
|
||||
<section id="The Big Picture"><title>The Big Picture</title>
|
||||
<p>OK, so the tf-idf formula and the
|
||||
<a href="api/org/apache/lucene/search/Similarity.html">Similarity</a>
|
||||
is great for understanding the basics of Lucene scoring, but what really drives Lucene scoring are
|
||||
|
@ -162,38 +166,38 @@
|
|||
provided by each scorer while factoring in the coord() score.
|
||||
<!-- Do we want to fill in the details of the counting sum scorer, disjunction scorer, etc.? -->
|
||||
</p>
|
||||
</subsection>
|
||||
<subsection name="Query Classes">
|
||||
</section>
|
||||
<section id="Query Classes"><title>Query Classes</title>
|
||||
<p>For information on the Query Classes, refer to the
|
||||
<a href="api/org/apache/lucene/search/package-summary.html#query">search package javadocs</a>
|
||||
</p>
|
||||
</subsection>
|
||||
<subsection name="Changing Similarity">
|
||||
</section>
|
||||
<section id="Changing Similarity"><title>Changing Similarity</title>
|
||||
<p>One of the ways of changing the scoring characteristics of Lucene is to change the similarity factors. For information on
|
||||
how to do this, see the
|
||||
<a href="api/org/apache/lucene/search/package-summary.html#changingSimilarity">search package javadocs</a></p>
|
||||
</subsection>
|
||||
</section>
|
||||
|
||||
</section>
|
||||
<section name="Changing your Scoring -- Expert Level">
|
||||
<section id="Changing your Scoring -- Expert Level"><title>Changing your Scoring -- Expert Level</title>
|
||||
<p>At a much deeper level, one can affect scoring by implementing their own Query classes (and related scoring classes.) To learn more
|
||||
about how to do this, refer to the
|
||||
<a href="api/org/apache/lucene/search/package-summary.html#scoring">search package javadocs</a>
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Appendix">
|
||||
<subsection name="Class Diagrams">
|
||||
<section id="Appendix"><title>Appendix</title>
|
||||
<section id="Class Diagrams"><title>Class Diagrams</title>
|
||||
<p>
|
||||
<a href="http://wiki.apache.org/jakarta-lucene/KarlWettin?action=AttachFile&do=view&target=search_uml_1.jpg">
|
||||
Karl Wettin's UML on the Wiki</a>
|
||||
</p>
|
||||
</subsection>
|
||||
<subsection name="Sequence Diagrams">
|
||||
<p class="highlight-for-editing">FILL IN HERE. Volunteers?</p>
|
||||
</subsection>
|
||||
<subsection name="Algorithm" class="highlight-for-editing">
|
||||
<p>GSI Note: This section is mostly my notes on stepping through the Scoring process and serves as
|
||||
</section>
|
||||
<section id="Sequence Diagrams"><title>Sequence Diagrams</title>
|
||||
<p >FILL IN HERE. Volunteers?</p>
|
||||
</section>
|
||||
<section id="Algorithm"><title>Algorithm</title>
|
||||
<p>This section is mostly notes on stepping through the Scoring process and serves as
|
||||
fertilizer for the earlier sections.</p>
|
||||
<p>In the typical search application, a
|
||||
<a href="api/org/apache/lucene/search/Query.html">Query</a>
|
||||
|
@ -281,7 +285,7 @@
|
|||
implementations. <!-- DOUBLE CHECK THIS -->If you have a simple OR query
|
||||
your internal Scorer is most likely a DisjunctionSumScorer, which essentially combines the scorers
|
||||
from the sub scorers of the OR'd terms.</p>
|
||||
</subsection>
|
||||
</section>
|
||||
</section>
|
||||
</body>
|
||||
</document>
|
|
@ -0,0 +1,112 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Copyright 2002-2004 The Apache Software Foundation or its licensors,
|
||||
as applicable.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!--
|
||||
Forrest site.xml
|
||||
|
||||
This file contains an outline of the site's information content. It is used to:
|
||||
- Generate the website menus (though these can be overridden - see docs)
|
||||
- Provide semantic, location-independent aliases for internal 'site:' URIs, eg
|
||||
<link href="site:changes"> links to changes.html (or ../changes.html if in
|
||||
subdir).
|
||||
- Provide aliases for external URLs in the external-refs section. Eg, <link
|
||||
href="ext:cocoon"> links to http://cocoon.apache.org/
|
||||
|
||||
See http://forrest.apache.org/docs/linking.html for more info
|
||||
-->
|
||||
|
||||
<site label="Lucene" href="" xmlns="http://apache.org/forrest/linkmap/1.0" tab="">
|
||||
<!-- Note: No matter what you configure here, Forrest will always try to load
|
||||
index.html when you request http://yourHost/.
|
||||
'How can I use a start-up-page other than index.html?' in the FAQs has more
|
||||
information tells you how to change that.
|
||||
-->
|
||||
<about label="About">
|
||||
<overview label="Overview" href="index.html" description="Welcome to Java Lucene"/>
|
||||
<features label="Features" href="features.html"/>
|
||||
<powered-by label="Powered by Lucene" href="ext:powered-by"/>
|
||||
<who-we-are label="Who We Are" href="whoweare.html"/>
|
||||
</about>
|
||||
<!-- keep in submenu items alpha order -->
|
||||
<docs label="Documentation">
|
||||
|
||||
<apidocs label="API Docs" href="api/"/>
|
||||
<benchmarks label="Benchmarks" href="benchmarks.html"/>
|
||||
<contributions label="Contributions" href="contributions.html"/>
|
||||
<faq label="FAQ" href="ext:faq" />
|
||||
<file-formats label="File Formats" href="fileformats.html"/>
|
||||
<tutorial label="Getting Started" href="gettingstarted.html"/>
|
||||
<lucene-sandbox label="Lucene Sandbox" href="lucene-sandbox/index.html"/>
|
||||
<query-syntax label="Query Syntax" href="queryparsersyntax.html"/>
|
||||
<scoring label="Scoring" href="scoring.html"/>
|
||||
<wiki label="Wiki" href="ext:wiki" />
|
||||
</docs>
|
||||
|
||||
<resources label="Resources">
|
||||
<issues label="Issue Tracking" href="ext:issues"/>
|
||||
<contact label="Mailing Lists" href="mailinglists.html"/>
|
||||
<release label="Downloads" href="releases.html"/>
|
||||
<svn label="Version Control" href="ext:source" />
|
||||
</resources>
|
||||
<versions label="Site Versions">
|
||||
<official label="Official" href="./"/>
|
||||
<!-- Needs to be filled in -->
|
||||
<!-- <nightly label="Nightly" href=""/> -->
|
||||
|
||||
</versions>
|
||||
<projects label="Related Projects">
|
||||
<lucene label="Lucene (Top-Level)" href="ext:topLevel"/>
|
||||
<lucene label="Hadoop" href="ext:hadoop"/>
|
||||
<lucene label="Lucy" href="ext:lucy"/>
|
||||
<lucene label="Lucene.NET" href="ext:lucene-net"/>
|
||||
<lucene label="Nutch" href="ext:nutch" />
|
||||
<lucene label="SOLR" href="ext:solr"/>
|
||||
</projects>
|
||||
|
||||
<!--
|
||||
The href must be wholesite.html/pdf You can change the labels and node names
|
||||
<all label="All">
|
||||
<whole_site_html label="Whole Site HTML" href="wholesite.html"/>
|
||||
<whole_site_pdf label="Whole Site PDF" href="wholesite.pdf"/>
|
||||
</all>
|
||||
-->
|
||||
|
||||
<external-refs>
|
||||
<forrest href="http://forrest.apache.org/">
|
||||
<linking href="docs/linking.html"/>
|
||||
<validation href="docs/validation.html"/>
|
||||
<webapp href="docs/your-project.html#webapp"/>
|
||||
<dtd-docs href="docs/dtd-docs.html"/>
|
||||
</forrest>
|
||||
<cocoon href="http://cocoon.apache.org/"/>
|
||||
<xml.apache.org href="http://xml.apache.org/"/>
|
||||
<issues href="http://issues.apache.org/jira/browse/LUCENE"/>
|
||||
<topLevel href="http://lucene.apache.org"/>
|
||||
<solr href="http://incubator.apache.org/solr/" />
|
||||
<nutch href="http://lucene.apache.org/nutch/" />
|
||||
<lucy href="http://lucene.apache.org/lucy/"/>
|
||||
<lucene-net href="http://incubator.apache.org/projects/lucene.net.html"/>
|
||||
<hadoop href="http://lucene.apache.org/hadoop/"/>
|
||||
<wiki href="http://wiki.apache.org/jakarta-lucene" />
|
||||
<faq href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ" />
|
||||
<releases href="http://www.apache.org/dyn/closer.cgi/lucene/java/" />
|
||||
<source href="http://svn.apache.org/viewcvs.cgi/lucene/java/"/>
|
||||
<powered-by href="http://wiki.apache.org/jakarta-lucene/PoweredBy"/>
|
||||
</external-refs>
|
||||
|
||||
</site>
|
|
@ -1,12 +1,16 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<header>
|
||||
<title>
|
||||
Apache Lucene - System Properties
|
||||
</title>
|
||||
</header>
|
||||
<properties>
|
||||
<author email="otis @ apache dot org">Otis Gospodnetić</author>
|
||||
<title>Apache Lucene - System Properties</title>
|
||||
</properties>
|
||||
<body>
|
||||
|
||||
<section name="About this Document">
|
||||
<section id="About this Document"><title>About this Document</title>
|
||||
<p>
|
||||
Lucene has a number of properties that can be tuned. They can be adjusted either
|
||||
programmatically, using the Lucene API, or their default values can be set via
|
||||
|
@ -16,7 +20,7 @@
|
|||
</p>
|
||||
</section>
|
||||
|
||||
<section name="System Properties">
|
||||
<section id="System Properties"><title>System Properties</title>
|
||||
<p>
|
||||
<table width="100%" border="0" cellpadding="4" cellspacing="0">
|
||||
<tr valign="top">
|
|
@ -0,0 +1,59 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Copyright 2002-2004 The Apache Software Foundation or its licensors,
|
||||
as applicable.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<!DOCTYPE tabs PUBLIC "-//APACHE//DTD Cocoon Documentation Tab V1.1//EN" "http://forrest.apache.org/dtd/tab-cocoon-v11.dtd">
|
||||
|
||||
<tabs software="Java"
|
||||
title="Java"
|
||||
copyright="The Apache Software Foundation"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
|
||||
<!-- The rules for tabs are:
|
||||
@dir will always have '/@indexfile' added.
|
||||
@indexfile gets appended to @dir if the tab is selected. Defaults to 'index.html'
|
||||
@href is not modified unless it is root-relative and obviously specifies a
|
||||
directory (ends in '/'), in which case /index.html will be added
|
||||
If @id's are present, site.xml entries with a matching @tab will be in that tab.
|
||||
|
||||
Tabs can be embedded to a depth of two. The second level of tabs will only
|
||||
be displayed when their parent tab is selected.
|
||||
-->
|
||||
|
||||
<!--
|
||||
<tab id="" label="Home" dir="" indexfile="index.html"/>
|
||||
-->
|
||||
|
||||
<tab id="" label="Main" dir=""/>
|
||||
<tab id="wiki" label="Wiki" href="http://wiki.apache.org/jakarta-lucene"/>
|
||||
|
||||
<!--
|
||||
<tab id="samples" label="Samples" dir="samples" indexfile="sample.html">
|
||||
<tab id="samples-index" label="Index" dir="samples" indexfile="index.html"/>
|
||||
<tab id="samples-sample2" label="Sample2" dir="samples" indexfile="static.html"/>
|
||||
</tab>
|
||||
<tab label="Apache XML Projects" href="http://xml.apache.org">
|
||||
<tab label="Forrest" href="http://forrest.apache.org"/>
|
||||
<tab label="Xerces" href="http://xml.apache.org/xerces"/>
|
||||
</tab>
|
||||
<tab id="plugins" label="Plugins" dir="pluginDocs/plugins_0_70" indexfile="index.html"/>
|
||||
-->
|
||||
<!-- Add new tabs here, eg:
|
||||
<tab label="How-Tos" dir="community/howto/"/>
|
||||
<tab label="XML Site" dir="xml-site/"/>
|
||||
-->
|
||||
|
||||
</tabs>
|
|
@ -1,17 +1,21 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<header>
|
||||
<title>
|
||||
Apache Lucene - Who We Are
|
||||
</title>
|
||||
</header>
|
||||
<properties>
|
||||
<author email="husted@apache.org">Ted Husted</author>
|
||||
<author email="cutting@apache.org">Doug Cutting</author>
|
||||
<title>Who We Are - Apache Lucene</title>
|
||||
</properties>
|
||||
<body>
|
||||
|
||||
<section name="Who We Are">
|
||||
<section id="Who We Are"><title>Who We Are</title>
|
||||
<p>Lucene is maintained by a team of volunteer developers.</p>
|
||||
</section>
|
||||
|
||||
<section name="Committers">
|
||||
<section id="Committers"><title>Committers</title>
|
||||
<ul>
|
||||
<li><b><a href="http://www.nutch.org/blog/cutting.html">Doug Cutting</a></b> (cutting@...)
|
||||
|
||||
|
@ -43,14 +47,13 @@ patents</a>.</p>
|
|||
<li><b>Yonik Seeley</b> (yonik@...)</li>
|
||||
<li><b>Grant Ingersoll</b> (gsingers@...) </li>
|
||||
<li><b>Mike McCandless</b> (mikemccand@...) </li>
|
||||
<li><b>Mark Harwood</b> (mharwood@...) </li>
|
||||
</ul>
|
||||
|
||||
<p>Note that the email addresses above end with @apache.org.</p>
|
||||
|
||||
</section>
|
||||
|
||||
<section name="Other Contributors">
|
||||
<section id="Other Contributors"><title>Other Contributors</title>
|
||||
<ul>
|
||||
<li>Josh Bloch</li>
|
||||
<li>Ted Husted</li>
|
|
@ -0,0 +1,72 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Copyright 2002-2005 The Apache Software Foundation or its licensors,
|
||||
as applicable.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<map:sitemap xmlns:map="http://apache.org/cocoon/sitemap/1.0">
|
||||
|
||||
<map:components>
|
||||
<map:actions>
|
||||
<map:action logger="sitemap.action.sourcetype" name="sourcetype" src="org.apache.forrest.sourcetype.SourceTypeAction">
|
||||
<sourcetype name="hello-v1.0">
|
||||
<document-declaration public-id="-//Acme//DTD Hello Document V1.0//EN" />
|
||||
</sourcetype>
|
||||
</map:action>
|
||||
</map:actions>
|
||||
|
||||
<map:selectors default="parameter">
|
||||
<map:selector logger="sitemap.selector.parameter" name="parameter" src="org.apache.cocoon.selection.ParameterSelector" />
|
||||
</map:selectors>
|
||||
</map:components>
|
||||
|
||||
<map:resources>
|
||||
<map:resource name="transform-to-document">
|
||||
<map:act type="sourcetype" src="{src}">
|
||||
<map:select type="parameter">
|
||||
<map:parameter name="parameter-selector-test" value="{sourcetype}" />
|
||||
|
||||
<map:when test="hello-v1.0">
|
||||
<map:generate src="{project:content.xdocs}{../../1}.xml" />
|
||||
<map:transform src="{project:resources.stylesheets}/hello2document.xsl" />
|
||||
<map:serialize type="xml-document"/>
|
||||
</map:when>
|
||||
</map:select>
|
||||
</map:act>
|
||||
</map:resource>
|
||||
</map:resources>
|
||||
|
||||
<map:pipelines>
|
||||
<map:pipeline>
|
||||
<map:match pattern="old_site/*.html">
|
||||
<map:select type="exists">
|
||||
<map:when test="{project:content}{1}.html">
|
||||
<map:read src="{project:content}{1}.html" mime-type="text/html"/>
|
||||
<!--
|
||||
Use this instead if you want JTidy to clean up your HTML
|
||||
<map:generate type="html" src="{project:content}/{0}" />
|
||||
<map:serialize type="html"/>
|
||||
-->
|
||||
</map:when>
|
||||
</map:select>
|
||||
</map:match>
|
||||
|
||||
<map:match pattern="**.xml">
|
||||
<map:call resource="transform-to-document">
|
||||
<map:parameter name="src" value="{project:content.xdocs}{1}.xml" />
|
||||
</map:call>
|
||||
</map:match>
|
||||
</map:pipeline>
|
||||
</map:pipelines>
|
||||
</map:sitemap>
|
|
@ -0,0 +1,453 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Copyright 2002-2005 The Apache Software Foundation or its licensors,
|
||||
as applicable.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!--
|
||||
Skin configuration file. This file contains details of your project,
|
||||
which will be used to configure the chosen Forrest skin.
|
||||
-->
|
||||
|
||||
<!DOCTYPE skinconfig PUBLIC "-//APACHE//DTD Skin Configuration V0.7-1//EN" "http://forrest.apache.org/dtd/skinconfig-v07-1.dtd">
|
||||
<skinconfig>
|
||||
<!-- To enable lucene search add provider="lucene" (default is google).
|
||||
Add box-location="alt" to move the search box to an alternate location
|
||||
(if the skin supports it) and box-location="all" to show it in all
|
||||
available locations on the page. Remove the <search> element to show
|
||||
no search box. @domain will enable sitesearch for the specific domain with google.
|
||||
In other words google will search the @domain for the query string.
|
||||
-->
|
||||
<search name="Lucene Java" domain="lucene.apache.org" provider="google"/>
|
||||
|
||||
<!-- Disable the print link? If enabled, invalid HTML 4.0.1 -->
|
||||
<disable-print-link>true</disable-print-link>
|
||||
<!-- Disable the PDF link? -->
|
||||
<disable-pdf-link>false</disable-pdf-link>
|
||||
<!-- Disable the POD link? -->
|
||||
<disable-pod-link>true</disable-pod-link>
|
||||
<!-- Disable the Text link? FIXME: NOT YET IMPLEMENETED. -->
|
||||
<disable-txt-link>true</disable-txt-link>
|
||||
<!-- Disable the xml source link? -->
|
||||
<!-- The xml source link makes it possible to access the xml rendition
|
||||
of the source frim the html page, and to have it generated statically.
|
||||
This can be used to enable other sites and services to reuse the
|
||||
xml format for their uses. Keep this disabled if you don't want other
|
||||
sites to easily reuse your pages.-->
|
||||
<disable-xml-link>true</disable-xml-link>
|
||||
|
||||
<!-- Disable navigation icons on all external links? -->
|
||||
<disable-external-link-image>true</disable-external-link-image>
|
||||
|
||||
<!-- Disable w3c compliance links?
|
||||
Use e.g. align="center" to move the compliance links logos to
|
||||
an alternate location default is left.
|
||||
(if the skin supports it) -->
|
||||
<disable-compliance-links>true</disable-compliance-links>
|
||||
|
||||
<!-- Render mailto: links unrecognisable by spam harvesters? -->
|
||||
<obfuscate-mail-links>false</obfuscate-mail-links>
|
||||
<obfuscate-mail-value> ... </obfuscate-mail-value>
|
||||
|
||||
<!-- Disable the javascript facility to change the font size -->
|
||||
<disable-font-script>true</disable-font-script>
|
||||
|
||||
<!-- mandatory project logo
|
||||
default skin: renders it at the top -->
|
||||
<project-name>Lucene</project-name>
|
||||
<project-description>Apache Lucene is a high-performance, full-featured text search engine library written entirely in
|
||||
Java. It is a technology suitable for nearly any application that requires full-text search, especially cross-platform.</project-description>
|
||||
<project-url>http://lucene.apache.org/java/</project-url>
|
||||
<project-logo>http://lucene.apache.org/images/lucene_green_300.gif</project-logo>
|
||||
<!-- http://lucene.apache.org/images/lucene_green_300.gif -->
|
||||
<!-- Alternative static image:
|
||||
<project-logo>images/project-logo.gif</project-logo> -->
|
||||
|
||||
<!-- optional group logo
|
||||
default skin: renders it at the top-left corner -->
|
||||
<!-- group logo -->
|
||||
<group-name>Lucene</group-name>
|
||||
<group-description>Apache Lucene</group-description>
|
||||
<group-url>http://lucene.apache.org/</group-url>
|
||||
<group-logo>http://lucene.apache.org/java/docs/images/asf-logo.gif</group-logo>
|
||||
|
||||
<!-- Alternative static image:
|
||||
<group-logo>images/group-logo.gif</group-logo> -->
|
||||
|
||||
<!-- optional host logo (e.g. sourceforge logo)
|
||||
default skin: renders it at the bottom-left corner -->
|
||||
<host-url></host-url>
|
||||
<host-logo></host-logo>
|
||||
|
||||
<!-- relative url of a favicon file, normally favicon.ico -->
|
||||
<favicon-url>images/favicon.ico</favicon-url>
|
||||
|
||||
<!-- The following are used to construct a copyright statement -->
|
||||
<year>2006</year>
|
||||
<vendor>The Apache Software Foundation.</vendor>
|
||||
<copyright-link>http://www.apache.org/licenses/</copyright-link>
|
||||
|
||||
<!-- Some skins use this to form a 'breadcrumb trail' of links.
|
||||
Use location="alt" to move the trail to an alternate location
|
||||
(if the skin supports it).
|
||||
Omit the location attribute to display the trail in the default location.
|
||||
Use location="none" to not display the trail (if the skin supports it).
|
||||
For some skins just set the attributes to blank.
|
||||
|
||||
NOTE: If a breadcrumb entry points at a local file the href must
|
||||
be complete, that is it must point to the file itself, not to a
|
||||
directory.
|
||||
-->
|
||||
<trail>
|
||||
<link1 name="Apache" href="http://www.apache.org/"/>
|
||||
<link2 name="Lucene" href="http://lucene.apache.org/"/>
|
||||
<link3 name="Java" href="http://lucene.apache.org/java/"/>
|
||||
</trail>
|
||||
|
||||
<!-- Configure the TOC, i.e. the Table of Contents.
|
||||
@max-depth
|
||||
how many "section" levels need to be included in the
|
||||
generated Table of Contents (TOC).
|
||||
@min-sections
|
||||
Minimum required to create a TOC.
|
||||
@location ("page","menu","page,menu", "none")
|
||||
Where to show the TOC.
|
||||
-->
|
||||
<toc max-depth="2" min-sections="1" location="page"/>
|
||||
|
||||
<!-- Heading types can be clean|underlined|boxed -->
|
||||
<headings type="boxed"/>
|
||||
|
||||
<!-- The optional feedback element will be used to construct a
|
||||
feedback link in the footer with the page pathname appended:
|
||||
<a href="@href">{@to}</a>
|
||||
-->
|
||||
<!--
|
||||
<feedback to="webmaster@foo.com"
|
||||
href="mailto:webmaster@foo.com?subject=Feedback " >
|
||||
Send feedback about the website to:
|
||||
</feedback>
|
||||
-->
|
||||
|
||||
<!-- Optional message of the day (MOTD).
|
||||
Note: This is only implemented in the pelt skin.
|
||||
If the optional <motd> element is used, then messages will be appended
|
||||
depending on the URI string pattern.
|
||||
motd-option : Specifies a pattern to match and provides small text content.
|
||||
motd-title : This text will be added in brackets after the <html><title>
|
||||
motd-page : This text will be added in a panel on the face of the page,
|
||||
with the "motd-page-url" being the hyperlink "More".
|
||||
Values for the "location" attribute are:
|
||||
page : on the face of the page, e.g. in the spare space of the toc
|
||||
alt : at the bottom of the left-hand navigation panel
|
||||
both : both
|
||||
-->
|
||||
<!--
|
||||
<motd>
|
||||
<motd-option pattern="docs_0_80">
|
||||
<motd-title>v0.8-dev</motd-title>
|
||||
<motd-page location="both">
|
||||
This is documentation for development version v0.8
|
||||
</motd-page>
|
||||
<motd-page-url>/versions/index.html</motd-page-url>
|
||||
</motd-option>
|
||||
<motd-option pattern="docs_0_70">
|
||||
<motd-title>v0.7</motd-title>
|
||||
<motd-page location="both">
|
||||
This is documentation for current version v0.7
|
||||
</motd-page>
|
||||
<motd-page-url>/versions/index.html</motd-page-url>
|
||||
</motd-option>
|
||||
</motd>
|
||||
-->
|
||||
|
||||
<!--
|
||||
extra-css - here you can define custom css-elements that are
|
||||
A) overriding the fallback elements or
|
||||
B) adding the css definition from new elements that you may have
|
||||
used in your documentation.
|
||||
-->
|
||||
<extra-css>
|
||||
<!--Example of reason B:
|
||||
To define the css definition of a new element that you may have used
|
||||
in the class attribute of a <p> node.
|
||||
e.g. <p class="quote"/>
|
||||
-->
|
||||
p.quote {
|
||||
margin-left: 2em;
|
||||
padding: .5em;
|
||||
background-color: #f0f0f0;
|
||||
font-family: monospace;
|
||||
}
|
||||
<!--Example:
|
||||
To override the colours of links only in the footer.
|
||||
-->
|
||||
#footer a { color: #0F3660; }
|
||||
#footer a:visited { color: #009999; }
|
||||
|
||||
pre.code {
|
||||
margin-left: 2em;
|
||||
margin-right: 2em;
|
||||
padding: 0.5em;
|
||||
background-color: #f0f0f0;
|
||||
}
|
||||
|
||||
|
||||
</extra-css>
|
||||
|
||||
<colors>
|
||||
<!-- These values are used for the generated CSS files.
|
||||
They essentially "override" the default colors defined in the chosen skin.
|
||||
There are four duplicate "groups" of colors below, denoted by comments:
|
||||
Color group: Forrest, Krysalis, Collabnet, and Lenya using Pelt.
|
||||
They are provided for example only. To customize the colors of any skin,
|
||||
uncomment one of these groups of color elements and change the values
|
||||
of the particular color elements that you wish to change.
|
||||
Note that by default, all color groups are commented-out which means that
|
||||
the default colors provided by the skin are being used.
|
||||
-->
|
||||
|
||||
<!-- Color group: Forrest: example colors similar to forrest.apache.org
|
||||
Some of the element names are obscure, so comments are added to show how
|
||||
the "pelt" skin uses them, other skins might use these elements in a different way.
|
||||
Tip: temporarily change the value of an element to red (#ff0000) and see the effect.
|
||||
pelt: breadtrail: the strip at the top of the page and the second strip under the tabs
|
||||
pelt: header: top strip containing project and group logos
|
||||
pelt: heading|subheading: section headings within the content
|
||||
pelt: navstrip: the strip under the tabs which contains the published date
|
||||
pelt: menu: the left-hand navigation panel
|
||||
pelt: toolbox: the selected menu item
|
||||
pelt: searchbox: the background of the searchbox
|
||||
pelt: border: line border around selected menu item
|
||||
pelt: body: any remaining parts, e.g. the bottom of the page
|
||||
pelt: footer: the second from bottom strip containing credit logos and published date
|
||||
pelt: feedback: the optional bottom strip containing feedback link
|
||||
-->
|
||||
<!--
|
||||
<color name="breadtrail" value="#cedfef" font="#0F3660" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
|
||||
<color name="header" value="#294563"/>
|
||||
<color name="tab-selected" value="#4a6d8c" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
|
||||
<color name="tab-unselected" value="#b5c7e7" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
|
||||
<color name="subtab-selected" value="#4a6d8c" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
|
||||
<color name="subtab-unselected" value="#4a6d8c" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
|
||||
<color name="heading" value="#294563"/>
|
||||
<color name="subheading" value="#4a6d8c"/>
|
||||
<color name="published" value="#4C6C8F" font="#FFFFFF"/>
|
||||
<color name="feedback" value="#4C6C8F" font="#FFFFFF" align="center"/>
|
||||
<color name="navstrip" value="#4a6d8c" font="#ffffff" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
|
||||
<color name="menu" value="#4a6d8c" font="#cedfef" link="#ffffff" vlink="#ffffff" hlink="#ffcf00"/>
|
||||
<color name="toolbox" value="#4a6d8c"/>
|
||||
<color name="border" value="#294563"/>
|
||||
<color name="dialog" value="#4a6d8c"/>
|
||||
<color name="searchbox" value="#4a6d8c" font="#000000"/>
|
||||
<color name="body" value="#ffffff" link="#0F3660" vlink="#009999" hlink="#000066"/>
|
||||
<color name="table" value="#7099C5"/>
|
||||
<color name="table-cell" value="#f0f0ff"/>
|
||||
<color name="highlight" value="#ffff00"/>
|
||||
<color name="fixme" value="#cc6600"/>
|
||||
<color name="note" value="#006699"/>
|
||||
<color name="warning" value="#990000"/>
|
||||
<color name="code" value="#CFDCED"/>
|
||||
<color name="footer" value="#cedfef"/>
|
||||
-->
|
||||
|
||||
<!-- Color group: Krysalis -->
|
||||
<!--
|
||||
<color name="header" value="#FFFFFF"/>
|
||||
|
||||
<color name="tab-selected" value="#a5b6c6" link="#000000" vlink="#000000" hlink="#000000"/>
|
||||
<color name="tab-unselected" value="#F7F7F7" link="#000000" vlink="#000000" hlink="#000000"/>
|
||||
<color name="subtab-selected" value="#a5b6c6" link="#000000" vlink="#000000" hlink="#000000"/>
|
||||
<color name="subtab-unselected" value="#a5b6c6" link="#000000" vlink="#000000" hlink="#000000"/>
|
||||
|
||||
<color name="heading" value="#a5b6c6"/>
|
||||
<color name="subheading" value="#CFDCED"/>
|
||||
|
||||
<color name="navstrip" value="#CFDCED" font="#000000" link="#000000" vlink="#000000" hlink="#000000"/>
|
||||
<color name="toolbox" value="#a5b6c6"/>
|
||||
<color name="border" value="#a5b6c6"/>
|
||||
|
||||
<color name="menu" value="#F7F7F7" link="#000000" vlink="#000000" hlink="#000000"/>
|
||||
<color name="dialog" value="#F7F7F7"/>
|
||||
|
||||
<color name="body" value="#ffffff" link="#0F3660" vlink="#009999" hlink="#000066"/>
|
||||
|
||||
<color name="table" value="#a5b6c6"/>
|
||||
<color name="table-cell" value="#ffffff"/>
|
||||
<color name="highlight" value="#ffff00"/>
|
||||
<color name="fixme" value="#cc6600"/>
|
||||
<color name="note" value="#006699"/>
|
||||
<color name="warning" value="#990000"/>
|
||||
<color name="code" value="#a5b6c6"/>
|
||||
|
||||
<color name="footer" value="#a5b6c6"/>
|
||||
-->
|
||||
|
||||
<!-- Color group: Collabnet -->
|
||||
<!--
|
||||
<color name="header" value="#003366"/>
|
||||
|
||||
<color name="tab-selected" value="#dddddd" link="#555555" vlink="#555555" hlink="#555555"/>
|
||||
<color name="tab-unselected" value="#999999" link="#ffffff" vlink="#ffffff" hlink="#ffffff"/>
|
||||
<color name="subtab-selected" value="#cccccc" link="#000000" vlink="#000000" hlink="#000000"/>
|
||||
<color name="subtab-unselected" value="#cccccc" link="#555555" vlink="#555555" hlink="#555555"/>
|
||||
|
||||
<color name="heading" value="#003366"/>
|
||||
<color name="subheading" value="#888888"/>
|
||||
|
||||
<color name="navstrip" value="#dddddd" font="#555555"/>
|
||||
<color name="toolbox" value="#dddddd" font="#555555"/>
|
||||
<color name="border" value="#999999"/>
|
||||
|
||||
<color name="menu" value="#ffffff"/>
|
||||
<color name="dialog" value="#eeeeee"/>
|
||||
|
||||
<color name="body" value="#ffffff"/>
|
||||
|
||||
<color name="table" value="#ccc"/>
|
||||
<color name="table-cell" value="#ffffff"/>
|
||||
<color name="highlight" value="#ffff00"/>
|
||||
<color name="fixme" value="#cc6600"/>
|
||||
<color name="note" value="#006699"/>
|
||||
<color name="warning" value="#990000"/>
|
||||
<color name="code" value="#003366"/>
|
||||
|
||||
<color name="footer" value="#ffffff"/>
|
||||
-->
|
||||
<!-- Color group: Lenya using pelt-->
|
||||
<!--
|
||||
|
||||
<color name="header" value="#ffffff"/>
|
||||
|
||||
<color name="tab-selected" value="#E5E4D9" link="#000000" vlink="#000000" hlink="#000000"/>
|
||||
<color name="tab-unselected" value="#F5F4E9" link="#000000" vlink="#000000" hlink="#000000"/>
|
||||
<color name="subtab-selected" value="#000000" link="#000000" vlink="#000000" hlink="#000000"/>
|
||||
<color name="subtab-unselected" value="#E5E4D9" link="#000000" vlink="#000000" hlink="#000000"/>
|
||||
|
||||
<color name="heading" value="#E5E4D9"/>
|
||||
<color name="subheading" value="#000000"/>
|
||||
<color name="published" value="#000000"/>
|
||||
<color name="navstrip" value="#E5E4D9" font="#000000"/>
|
||||
<color name="toolbox" value="#CFDCED" font="#000000"/>
|
||||
<color name="border" value="#999999"/>
|
||||
|
||||
<color name="menu" value="#E5E4D9" font="#000000" link="#000000" vlink="#000000" hlink="#000000"/>
|
||||
<color name="dialog" value="#CFDCED"/>
|
||||
<color name="body" value="#ffffff" />
|
||||
|
||||
<color name="table" value="#ccc"/>
|
||||
<color name="table-cell" value="#ffffff"/>
|
||||
<color name="highlight" value="#ffff00"/>
|
||||
<color name="fixme" value="#cc6600"/>
|
||||
<color name="note" value="#006699"/>
|
||||
<color name="warning" value="#990000"/>
|
||||
<color name="code" value="#003366"/>
|
||||
|
||||
<color name="footer" value="#E5E4D9"/>
|
||||
-->
|
||||
</colors>
|
||||
|
||||
<!-- Settings specific to PDF output. -->
|
||||
<pdf>
|
||||
<!--
|
||||
Supported page sizes are a0, a1, a2, a3, a4, a5, executive,
|
||||
folio, legal, ledger, letter, quarto, tabloid (default letter).
|
||||
Supported page orientations are portrait, landscape (default
|
||||
portrait).
|
||||
Supported text alignments are left, right, justify (default left).
|
||||
-->
|
||||
<page size="letter" orientation="portrait" text-align="left"/>
|
||||
|
||||
<!--
|
||||
Pattern of the page numbering in the footer - Default is "Page x".
|
||||
first occurrence of '1' digit represents the current page number,
|
||||
second occurrence of '1' digit represents the total page number,
|
||||
anything else is considered as the static part of the numbering pattern.
|
||||
Examples : x is the current page number, y the total page number.
|
||||
<page-numbering-format>none</page-numbering-format> Do not displays the page numbering
|
||||
<page-numbering-format>1</page-numbering-format> Displays "x"
|
||||
<page-numbering-format>p1.</page-numbering-format> Displays "px."
|
||||
<page-numbering-format>Page 1/1</page-numbering-format> Displays "Page x/y"
|
||||
<page-numbering-format>(1-1)</page-numbering-format> Displays "(x-y)"
|
||||
-->
|
||||
<page-numbering-format>Page 1</page-numbering-format>
|
||||
|
||||
<!--
|
||||
Margins can be specified for top, bottom, inner, and outer
|
||||
edges. If double-sided="false", the inner edge is always left
|
||||
and the outer is always right. If double-sided="true", the
|
||||
inner edge will be left on odd pages, right on even pages,
|
||||
the outer edge vice versa.
|
||||
Specified below are the default settings.
|
||||
-->
|
||||
<margins double-sided="false">
|
||||
<top>1in</top>
|
||||
<bottom>1in</bottom>
|
||||
<inner>1.25in</inner>
|
||||
<outer>1in</outer>
|
||||
</margins>
|
||||
|
||||
<!--
|
||||
Print the URL text next to all links going outside the file
|
||||
-->
|
||||
<show-external-urls>false</show-external-urls>
|
||||
|
||||
<!--
|
||||
Disable the copyright footer on each page of the PDF.
|
||||
A footer is composed for each page. By default, a "credit" with role=pdf
|
||||
will be used, as explained below. Otherwise a copyright statement
|
||||
will be generated. This latter can be disabled.
|
||||
-->
|
||||
<disable-copyright-footer>false</disable-copyright-footer>
|
||||
</pdf>
|
||||
|
||||
<!--
|
||||
Credits are typically rendered as a set of small clickable
|
||||
images in the page footer.
|
||||
|
||||
Use box-location="alt" to move the credits to an alternate location
|
||||
(if the skin supports it).
|
||||
|
||||
For example, pelt skin:
|
||||
- box-location="alt" will place the logo at the end of the
|
||||
left-hand coloured menu panel.
|
||||
- box-location="alt2" will place them underneath that panel
|
||||
in the left-hand whitespace.
|
||||
- Otherwise they are placed next to the compatibility icons
|
||||
at the bottom of the screen.
|
||||
|
||||
Comment out the whole <credit>-element if you want no credits in the
|
||||
web pages
|
||||
-->
|
||||
<credits>
|
||||
<credit box-location="alt">
|
||||
<name>Built with Apache Forrest</name>
|
||||
<url>http://forrest.apache.org/</url>
|
||||
<image>images/built-with-forrest-button.png</image>
|
||||
<width>88</width>
|
||||
<height>31</height>
|
||||
</credit>
|
||||
<!-- A credit with @role="pdf" will be used to compose a footer
|
||||
for each page in the PDF, using either "name" or "url" or both.
|
||||
-->
|
||||
<!--
|
||||
<credit role="pdf">
|
||||
<name>Built with Apache Forrest</name>
|
||||
<url>http://forrest.apache.org/</url>
|
||||
</credit>
|
||||
-->
|
||||
</credits>
|
||||
|
||||
</skinconfig>
|
Before Width: | Height: | Size: 7.1 KiB |
Before Width: | Height: | Size: 43 KiB |
Before Width: | Height: | Size: 24 KiB |
Before Width: | Height: | Size: 4.3 KiB |
Before Width: | Height: | Size: 696 B |
Before Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 1.4 KiB |
Before Width: | Height: | Size: 1.8 KiB |
Before Width: | Height: | Size: 2.1 KiB |
Before Width: | Height: | Size: 515 B |
Before Width: | Height: | Size: 895 B |
Before Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 1.5 KiB |
Before Width: | Height: | Size: 1.8 KiB |
119
xdocs/index.xml
|
@ -1,119 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<document>
|
||||
<properties>
|
||||
<author email="jon at latchkey.com">Jon S. Stevens</author>
|
||||
<author email="husted at apache.org">Ted Husted</author>
|
||||
<author email="cutting at apache.org">Doug Cutting</author>
|
||||
<author email="carlson at apache.org">Peter Carlson</author>
|
||||
<title>Overview - Apache Lucene</title>
|
||||
</properties>
|
||||
<body>
|
||||
<section name="Apache Lucene">
|
||||
<p>
|
||||
Apache Lucene is a high-performance, full-featured text search engine
|
||||
library written entirely in Java. It is a technology suitable for nearly any
|
||||
application that requires full-text search, especially cross-platform.
|
||||
</p>
|
||||
<p>
|
||||
Apache Lucene is an open source project available for
|
||||
<a href="http://www.apache.org/dyn/closer.cgi/lucene/java/">free download</a>.
|
||||
Please use the links on the left to access Lucene.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section name="Lucene News">
|
||||
|
||||
<h3>26 May 2006 - Release 2.0.0 available </h3>
|
||||
|
||||
<p>This is mostly a bugfix release from release 1.9.1.
|
||||
Note however that deprecated 1.x features have now
|
||||
been removed. Any code that compiles against Lucene
|
||||
1.9.1 without deprecation warnings should work without
|
||||
further changes with any 2.x release. For more
|
||||
information about this release, please read <a
|
||||
href="http://svn.apache.org/repos/asf/lucene/java/tags/lucene_2_0_0/CHANGES.txt">CHANGES.txt</a>.</p>
|
||||
|
||||
<p>Binary and source distributions are
|
||||
available <a
|
||||
href="http://www.apache.org/dyn/closer.cgi/lucene/java/">here</a>.</p>
|
||||
|
||||
<h3>2 March 2006 - Release 1.9.1 available </h3>
|
||||
|
||||
<p>This fixes a serious bug in release 1.9-final. <a
|
||||
href="http://svn.apache.org/repos/asf/lucene/java/tags/lucene_1_9_1/CHANGES.txt">CHANGES.txt</a>
|
||||
for details.</p>
|
||||
|
||||
<p>Binary and source distributions are
|
||||
available <a
|
||||
href="http://www.apache.org/dyn/closer.cgi/lucene/java/">here</a>.</p>
|
||||
<h3>27 February 2006 - 1.9 final available </h3>
|
||||
|
||||
<p>This release has many improvements since release
|
||||
1.4.3, including new features, performance
|
||||
improvements, bug fixes, etc. See <a
|
||||
href="http://svn.apache.org/repos/asf/lucene/java/tags/lucene_1_9_final/CHANGES.txt">CHANGES.txt</a>
|
||||
for details.</p>
|
||||
|
||||
<p>1.9 will be the last 1.x release. It is both
|
||||
back-compatible with 1.4.3 and forward-compatible with
|
||||
the upcoming 2.0 release. Many methods and classes in
|
||||
1.4.3 have been deprecated in 1.9 and will be removed
|
||||
in 2.0. Applications must compile against 1.9 without
|
||||
deprecation warnings before they are compatible with
|
||||
2.0.</p>
|
||||
|
||||
<p>Binary and source distributions are
|
||||
available <a
|
||||
href="http://www.apache.org/dyn/closer.cgi/lucene/java/">here</a>.</p>
|
||||
|
||||
<h3>26 January 2006 - Nightly builds available</h3>
|
||||
|
||||
<p>Nightly builds of the current development version of Lucene, to be released as Lucene 1.9,
|
||||
are now available at <a href="http://cvs.apache.org/dist/lucene/java/nightly/">http://cvs.apache.org/dist/lucene/java/nightly/</a>.
|
||||
</p>
|
||||
|
||||
<h3>28 October 2005 - Lucene at ApacheCon</h3>
|
||||
<p><a href="http://www.apachecon.com"><img src="http://apachecon.com/2005/US/logos/Conference135x59.jpg"/></a></p>
|
||||
<p>Monday, December 12, 2005 at 3pm by Grant Ingersoll:<br/>
|
||||
Abstract:<br/>
|
||||
Lucene is a high performance, scalable, cross-platform search engine that contains many advanced features that often go untapped by the majority of users. In this session, designed for those familiar with Lucene, we will examine some of Lucene's more advanced topics and their application, including:</p>
|
||||
<ol>
|
||||
<li>Term Vectors: Manual and Pseudo relevance feedback; Advanced document collection analysis for
|
||||
domain specialization</li>
|
||||
<li>Span Queries: Better phrase matching; Candidate Identification for Question Answering</li>
|
||||
<li>Tying it all Together: Building a search framework for experimentation and rapid deployment</li>
|
||||
<li>Case Studies from <a href="http://www.cnlp.org">CNLP</a>: Crosslingual/multilingual retrieval in Arabic, English and Dutch;
|
||||
Sublanguage specialization for commercial trouble ticket analysis; Passage retrieval and
|
||||
analysis for Question Answering application</li>
|
||||
</ol>
|
||||
<p>Topics 1 through 3 will provide technical details on implementing the advanced Lucene features, while the fourth topic will provide a broader context for understanding when and where to use these features.
|
||||
</p>
|
||||
|
||||
<h3>14 February 2005 - Lucene moves to Apache top-level</h3>
|
||||
|
||||
<p>Lucene has migrated from Apache's Jakarta project to the top-level. Along with this migration,
|
||||
the source code repository has been converted to Subversion. The migration is in progress with
|
||||
some loose ends. Please stay tuned!
|
||||
</p>
|
||||
|
||||
<h3>December 2004 - <em>Lucene in Action</em> is published</h3>
|
||||
|
||||
<a href="http://www.lucenebook.com/"><img border="0" align="left"
|
||||
src="images/lia_3d.jpg"/></a>
|
||||
<p>The first book dedicated solely to Lucene is published. The
|
||||
"search inside the book" feature implemented with Lucene can
|
||||
be seen at <a href="http://www.lucenebook.com/">lucenebook.com</a>.
|
||||
</p>
|
||||
<p style="clear: both;"/>
|
||||
<h3>29 November 2004 - Lucene 1.4.3 Released</h3>
|
||||
|
||||
<p>This fixes a few bugs in 1.4.2. See <a
|
||||
href="http://svn.apache.org/repos/asf/lucene/java/tags/lucene_1_4_3/CHANGES.txt">CHANGES.txt</a>
|
||||
for details. Binary and source distributions are
|
||||
available <a href="http://www.apache.org/dyn/closer.cgi/lucene/">here</a>. After choosing your mirror, navigate to the archive section via the java link.
|
||||
</p>
|
||||
|
||||
</section>
|
||||
|
||||
</body>
|
||||
</document>
|
|
@ -1,34 +0,0 @@
|
|||
/*
|
||||
Place for sharing style information across the XDocs
|
||||
|
||||
*/
|
||||
|
||||
|
||||
.big{
|
||||
font-size: 1.5em;
|
||||
}
|
||||
|
||||
.formula{
|
||||
font-size: 0.9em;
|
||||
display: block;
|
||||
position: relative;
|
||||
left: -25px;
|
||||
}
|
||||
|
||||
#summation{
|
||||
|
||||
}
|
||||
|
||||
.summation-range{
|
||||
position: relative;
|
||||
top: 5px;
|
||||
font-size: 0.85em;
|
||||
}
|
||||
|
||||
/*
|
||||
Useful for highlighting pieces of documentation that others should pay special attention to
|
||||
when proof reading
|
||||
*/
|
||||
.highlight-for-editing{
|
||||
background-color: yellow;
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
<project name="Apache Lucene" href="http://lucene.apache.org/">
|
||||
|
||||
<title>Apache Lucene</title>
|
||||
<logo href="/images/lucene_green_300.gif">Apache Lucene</logo>
|
||||
|
||||
<body>
|
||||
<menu name="About">
|
||||
<item name="Overview" href="/index.html"/>
|
||||
<item name="Features" href="/features.html"/>
|
||||
<item name="Powered by Lucene" href="http://wiki.apache.org/jakarta-lucene/PoweredBy"/>
|
||||
<item name="Who We Are" href="/whoweare.html"/>
|
||||
<item name="Mailing Lists" href="/mailinglists.html"/>
|
||||
</menu>
|
||||
|
||||
<menu name="Resources">
|
||||
<item name="Wiki" href="http://wiki.apache.org/jakarta-lucene"/>
|
||||
<item name="FAQ" href="http://wiki.apache.org/jakarta-lucene/LuceneFAQ"/>
|
||||
<item name="Getting Started" href="/gettingstarted.html"/>
|
||||
<item name="Query Syntax" href="/queryparsersyntax.html"/>
|
||||
<item name="File Formats" href="/fileformats.html"/>
|
||||
<item name="Scoring" href="/scoring.html"/>
|
||||
<item name="Javadoc" href="/api/index.html"/>
|
||||
<item name="Contributions" href="/contributions.html"/>
|
||||
<item name="Benchmarks" href="/benchmarks.html"/>
|
||||
<item name="Issue Tracker" href="http://issues.apache.org/jira/browse/LUCENE"/>
|
||||
<item name="Lucene Sandbox" href="/lucene-sandbox/"/>
|
||||
</menu>
|
||||
|
||||
<menu name="Download">
|
||||
<item name="Releases" href="http://www.apache.org/dyn/closer.cgi/lucene/java/"/>
|
||||
<item name="Source Repository" href="http://svn.apache.org/viewcvs.cgi/lucene/java/"/>
|
||||
</menu>
|
||||
|
||||
</body>
|
||||
</project>
|
|
@ -1,316 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
<!-- Content Stylesheet for Site -->
|
||||
|
||||
## Defined variables
|
||||
#set ($bodybg = "#ffffff")
|
||||
#set ($bodyfg = "#000000")
|
||||
#set ($bodylink = "#525D76")
|
||||
#set ($bannerbg = "#525D76")
|
||||
#set ($bannerfg = "#ffffff")
|
||||
#set ($subbannerbg = "#828DA6")
|
||||
#set ($subbannerfg = "#ffffff")
|
||||
#set ($tablethbg = "#039acc")
|
||||
#set ($tabletdbg = "#a0ddf0")
|
||||
|
||||
<!-- start the processing -->
|
||||
#document()
|
||||
<!-- end the processing -->
|
||||
|
||||
## This is where the macro's live
|
||||
|
||||
#macro ( table $table)
|
||||
<table>
|
||||
#foreach ( $items in $table.getChildren() )
|
||||
#if ($items.getName().equals("tr"))
|
||||
#tr ($items)
|
||||
#end
|
||||
#end
|
||||
</table>
|
||||
#end
|
||||
|
||||
#macro ( tr $tr)
|
||||
<tr>
|
||||
#foreach ( $items in $tr.getChildren() )
|
||||
#if ($items.getName().equals("td"))
|
||||
#td ($items)
|
||||
#elseif ($items.getName().equals("th"))
|
||||
#th ($items)
|
||||
#end
|
||||
#end
|
||||
</tr>
|
||||
#end
|
||||
|
||||
#macro ( td $value)
|
||||
#if ($value.getAttributeValue("colspan"))
|
||||
#set ($colspan = $value.getAttributeValue("colspan"))
|
||||
#end
|
||||
#if ($value.getAttributeValue("rowspan"))
|
||||
#set ($rowspan = $value.getAttributeValue("rowspan"))
|
||||
#end
|
||||
<td bgcolor="$tabletdbg" colspan="$!colspan" rowspan="$!rowspan" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
#if ($value.getText().length() != 0 || $value.hasChildren())
|
||||
$value.getContent()
|
||||
#else
|
||||
|
||||
#end
|
||||
</font>
|
||||
</td>
|
||||
#end
|
||||
|
||||
#macro ( th $value)
|
||||
#if ($value.getAttributeValue("colspan"))
|
||||
#set ($colspan = $value.getAttributeValue("colspan"))
|
||||
#end
|
||||
#if ($value.getAttributeValue("rowspan"))
|
||||
#set ($rowspan = $value.getAttributeValue("rowspan"))
|
||||
#end
|
||||
<th bgcolor="$tablethbg" colspan="$!colspan" rowspan="$!rowspan" valign="top" align="left">
|
||||
<font color="#000000" size="-1" face="arial,helvetica,sanserif">
|
||||
#if ($value.getText().length() != 0 || $value.hasChildren())
|
||||
$value.getContent()
|
||||
#else
|
||||
|
||||
#end
|
||||
</font>
|
||||
</th>
|
||||
#end
|
||||
|
||||
#macro ( projectanchor $name $value )
|
||||
#if ($value.startsWith("http://"))
|
||||
<a href="$value">$name</a>
|
||||
#elseif ($value.startsWith("/site"))
|
||||
<a href="http://www.apache.org$value">$name</a>
|
||||
#else
|
||||
<a href="$relativePath$value">$name</a>
|
||||
#end
|
||||
#end
|
||||
|
||||
#macro ( metaauthor $author $email )
|
||||
<meta name="author" value="$author">
|
||||
<meta name="email" value="$email">
|
||||
#end
|
||||
|
||||
#macro ( image $value )
|
||||
#if ($value.getAttributeValue("width"))
|
||||
#set ($width=$value.getAttributeValue("width"))
|
||||
#end
|
||||
#if ($value.getAttributeValue("height"))
|
||||
#set ($height=$value.getAttributeValue("height"))
|
||||
#end
|
||||
#if ($value.getAttributeValue("align"))
|
||||
#set ($align=$value.getAttributeValue("align"))
|
||||
#end
|
||||
<img src="$relativePath$value.getAttributeValue("src")" width="$!width" height="$!height" align="$!align">
|
||||
#end
|
||||
|
||||
#macro ( source $value)
|
||||
<div align="left">
|
||||
<table cellspacing="4" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#ffffff"><pre>$escape.getText($value.getText())</pre></td>
|
||||
<td bgcolor="#023264" width="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
<td bgcolor="#023264" width="1" height="1"><img src="/images/void.gif" width="1" height="1" vspace="0" hspace="0" border="0"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
#end
|
||||
|
||||
#macro ( subsection $subsection)
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="$subbannerbg">
|
||||
<font color="$subbannerfg" face="arial,helvetica,sanserif">
|
||||
<a name="$subsection.getAttributeValue("name")"><strong>$subsection.getAttributeValue("name")</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
#foreach ( $items in $subsection.getChildren() )
|
||||
#if ($items.getName().equals("img"))
|
||||
#image ($items)
|
||||
#elseif ($items.getName().equals("source"))
|
||||
#source ($items)
|
||||
#elseif ($items.getName().equals("table"))
|
||||
#table ($items)
|
||||
#else
|
||||
$items
|
||||
#end
|
||||
#end
|
||||
</blockquote>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
#end
|
||||
|
||||
#macro ( section $section)
|
||||
<table border="0" cellspacing="0" cellpadding="2" width="100%">
|
||||
<tr><td bgcolor="$bannerbg">
|
||||
<font color="$bannerfg" face="arial,helvetica,sanserif">
|
||||
<a name="$section.getAttributeValue("name")"><strong>$section.getAttributeValue("name")</strong></a>
|
||||
</font>
|
||||
</td></tr>
|
||||
<tr><td>
|
||||
<blockquote>
|
||||
#foreach ( $items in $section.getChildren() )
|
||||
#if ($items.getName().equals("img"))
|
||||
#image ($items)
|
||||
#elseif ($items.getName().equals("source"))
|
||||
#source ($items)
|
||||
#elseif ($items.getName().equals("table"))
|
||||
#table ($items)
|
||||
#elseif ($items.getName().equals("subsection"))
|
||||
#subsection ($items)
|
||||
#else
|
||||
$items
|
||||
#end
|
||||
#end
|
||||
</blockquote>
|
||||
</p>
|
||||
</td></tr>
|
||||
<tr><td><br/></td></tr>
|
||||
</table>
|
||||
#end
|
||||
|
||||
#macro ( makeProject )
|
||||
|
||||
<!-- ============================================================ -->
|
||||
|
||||
#set ($menus = $project.getChild("body").getChildren("menu"))
|
||||
#foreach ( $menu in $menus )
|
||||
<p><strong>$menu.getAttributeValue("name")</strong></p>
|
||||
<ul>
|
||||
#foreach ( $item in $menu.getChildren() )
|
||||
#set ($name = $item.getAttributeValue("name"))
|
||||
<li>#projectanchor($name $item.getAttributeValue("href"))</li>
|
||||
#end
|
||||
</ul>
|
||||
#end
|
||||
#end
|
||||
|
||||
#macro (getProjectImage)
|
||||
#if ($project.getChild("logo"))
|
||||
<td align="left">
|
||||
<a href="http://www.apache.org"><img src="http://lucene.apache.org/java/docs/images/asf-logo.gif" width="387" height="100" border="0"/></a>
|
||||
</td>
|
||||
<td align="right">
|
||||
#set ( $logoString = $project.getChild("logo").getAttributeValue("href") )
|
||||
#if ( $logoString.startsWith("/") )
|
||||
<a href="$project.getAttributeValue("href")"><img src="$relativePath$logoString" alt="$project.getChild("logo").getText()" border="0"/></a>
|
||||
#else
|
||||
<a href="$project.getAttributeValue("href")"><img src="$relativePath/$logoString" alt="$project.getChild("logo").getText()" border="0"/></a>
|
||||
#end
|
||||
</td>
|
||||
#else
|
||||
<td colspan="2">
|
||||
<a href="http://www.apache.org"><img src="images/asf-logo.gif" width="387" height="100" align="left" border="0"/></a>
|
||||
</td>
|
||||
#end
|
||||
#end
|
||||
|
||||
#macro (printMeta $metaElement)
|
||||
<meta #set ($attribs = $metaElement.getAttributes())
|
||||
#foreach ($a in $attribs) $a.getName()="$a.getValue()" #end />
|
||||
#end
|
||||
|
||||
#macro (document)
|
||||
<!-- ====================================================================== -->
|
||||
<!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
|
||||
<!-- Main Page Section -->
|
||||
<!-- ====================================================================== -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
|
||||
|
||||
#set ($authors = $root.getChild("properties").getChildren("author"))
|
||||
#foreach ( $au in $authors )
|
||||
#metaauthor ( $au.getText() $au.getAttributeValue("email") )
|
||||
#end
|
||||
|
||||
#set ($metas = $root.getChildren("meta"))
|
||||
|
||||
## Parse meta directives such as
|
||||
## <meta name="keyword" content="jakarta, java"/>
|
||||
#foreach ($meta in $metas) #printMeta($meta) #end
|
||||
|
||||
## Support for <base> tags.
|
||||
#if ($root.getChild("properties").getChild("base"))
|
||||
#set ($url = $root.getChild("properties").getChild("base").getAttributeValue("href"))
|
||||
<base href="$url"/>
|
||||
#end
|
||||
|
||||
<title>$project.getChild("title").getText() - $root.getChild("properties").getChild("title").getText()</title>
|
||||
<link rel="stylesheet" type="text/css" href="styles/lucene.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="$bodybg" text="$bodyfg" link="$bodylink">
|
||||
<table border="0" width="100%" cellspacing="0">
|
||||
<!-- TOP IMAGE -->
|
||||
<tr>
|
||||
#getProjectImage()
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0" width="100%" cellspacing="4">
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
|
||||
<tr>
|
||||
<!-- LEFT SIDE NAVIGATION -->
|
||||
<td width="20%" valign="top" nowrap="true">
|
||||
#makeProject()
|
||||
</td>
|
||||
<td width="80%" align="left" valign="top">
|
||||
#set ($allSections = $root.getChild("body").getChildren("section"))
|
||||
#foreach ( $section in $allSections )
|
||||
#section ($section)
|
||||
#end
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<!-- FOOTER -->
|
||||
<tr><td colspan="2">
|
||||
<hr noshade="" size="1"/>
|
||||
</td></tr>
|
||||
<tr><td colspan="2">
|
||||
<div align="center"><font color="$bodylink" size="-1"><em>
|
||||
Copyright © 1999-2005, The Apache Software Foundation
|
||||
</em></font></div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
#end
|
||||
|
||||
|
||||
|
||||
|
||||
|