mirror of https://github.com/apache/lucene.git
864 lines
35 KiB
HTML
Executable File
864 lines
35 KiB
HTML
Executable File
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
|
<html>
|
|
<head>
|
|
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
|
<meta content="Apache Forrest" name="Generator">
|
|
<meta name="Forrest-version" content="0.9">
|
|
<meta name="Forrest-skin-name" content="lucene">
|
|
<title>Solr tutorial</title>
|
|
<link type="text/css" href="skin/basic.css" rel="stylesheet">
|
|
<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
|
|
<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
|
|
<link type="text/css" href="skin/profile.css" rel="stylesheet">
|
|
<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
|
|
<link rel="shortcut icon" href="images/favicon.ico">
|
|
</head>
|
|
<body onload="init()">
|
|
<script type="text/javascript">ndeSetTextSize();</script>
|
|
<div id="top">
|
|
<!--+
|
|
|breadtrail
|
|
+-->
|
|
<div class="breadtrail">
|
|
<a href="http://www.apache.org/">apache</a> > <a href="http://lucene.apache.org/">lucene</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
|
|
</div>
|
|
<!--+
|
|
|header
|
|
+-->
|
|
<div class="header">
|
|
<!--+
|
|
|start group logo
|
|
+-->
|
|
<div class="grouplogo">
|
|
<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="images/lucene_green_150.gif" title="Apache Lucene"></a>
|
|
</div>
|
|
<!--+
|
|
|end group logo
|
|
+-->
|
|
<!--+
|
|
|start Project Logo
|
|
+-->
|
|
<div class="projectlogo">
|
|
<a href="http://lucene.apache.org/solr/"><img class="logoImage" alt="Solr" src="images/solr.jpg" title="Solr Description"></a>
|
|
</div>
|
|
<!--+
|
|
|end Project Logo
|
|
+-->
|
|
<!--+
|
|
|start Search
|
|
+-->
|
|
<div class="searchbox">
|
|
<form id="searchform" action="http://search.lucidimagination.com/p:solr" method="get" class="roundtopsmall">
|
|
<input onFocus="getBlank (this, 'Search the site with Solr');" size="25" name="q" id="query" type="text" value="Search the site with Solr">
|
|
<input onclick="selectProvider(this.form)" name="Search" value="Search" type="submit">
|
|
@
|
|
<select id="searchProvider" name="searchProvider"><option value="any">select provider</option><option value="lucid">Lucid Find</option><option value="sl">Search-Lucene</option></select>
|
|
</form>
|
|
</div>
|
|
<!--+
|
|
|end search
|
|
+-->
|
|
<!--+
|
|
|start Tabs
|
|
+-->
|
|
<ul id="tabs">
|
|
<li class="current">
|
|
<a class="selected" href="index.html">Main</a>
|
|
</li>
|
|
<li>
|
|
<a class="unselected" href="http://wiki.apache.org/solr/FrontPage">Wiki</a>
|
|
</li>
|
|
</ul>
|
|
<!--+
|
|
|end Tabs
|
|
+-->
|
|
</div>
|
|
</div>
|
|
<div id="main">
|
|
<div id="publishedStrip">
|
|
<!--+
|
|
|start Subtabs
|
|
+-->
|
|
<div id="level2tabs"></div>
|
|
<!--+
|
|
|end Endtabs
|
|
+-->
|
|
<script type="text/javascript"><!--
|
|
document.write("Last Published: " + document.lastModified);
|
|
// --></script>
|
|
</div>
|
|
<!--+
|
|
|breadtrail
|
|
+-->
|
|
<div class="breadtrail">
|
|
|
|
|
|
</div>
|
|
<!--+
|
|
|start Menu, mainarea
|
|
+-->
|
|
<!--+
|
|
|start Menu
|
|
+-->
|
|
<div id="menu">
|
|
<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">About</div>
|
|
<div id="menu_1.1" class="menuitemgroup">
|
|
<div class="menuitem">
|
|
<a href="index.html" title="Welcome to Solr">Welcome</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="http://lucene.apache.org/java/docs/whoweare.html" title="Lucene/Solr Committers">Who We Are</a>
|
|
</div>
|
|
</div>
|
|
<div onclick="SwitchMenu('menu_selected_1.2', 'skin/')" id="menu_selected_1.2Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
|
|
<div id="menu_selected_1.2" class="selectedmenuitemgroup" style="display: block;">
|
|
<div class="menuitem">
|
|
<a href="features.html">Features</a>
|
|
</div>
|
|
<div class="menupage">
|
|
<div class="menupagetitle">Tutorial</div>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="http://wiki.apache.org/solr/FrontPage">Docs (Wiki)</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="http://wiki.apache.org/solr/FAQ">FAQ</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="api/index.html">All Javadoc</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="api/solrj/index.html">SolrJ Javadoc</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="api/test-framework/index.html">Test Framework Javadoc</a>
|
|
</div>
|
|
</div>
|
|
<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">Resources</div>
|
|
<div id="menu_1.3" class="menuitemgroup">
|
|
<div class="menuitem">
|
|
<a href="http://www.apache.org/dyn/closer.cgi/lucene/solr/">Download</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="mailing_lists.html">Mailing Lists</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="issue_tracking.html">Issue Tracking</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="version_control.html">Version Control</a>
|
|
</div>
|
|
</div>
|
|
<div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Related Projects</div>
|
|
<div id="menu_1.4" class="menuitemgroup">
|
|
<div class="menuitem">
|
|
<a href="http://lucene.apache.org/java/">Lucene Java</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="http://lucene.apache.org/nutch/">Nutch</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="http://incubator.apache.org/connectors/">ManifoldCF</a>
|
|
</div>
|
|
</div>
|
|
<script type="text/javascript">
|
|
function selectProvider(form) {
|
|
provider = form.elements['searchProvider'].value;
|
|
if (provider == "any") {
|
|
if (Math.random() > 0.5) {
|
|
provider = "lucid";
|
|
} else {
|
|
provider = "sl";
|
|
}
|
|
}
|
|
|
|
if (provider == "lucid") {
|
|
form.action = "http://search.lucidimagination.com/p:solr";
|
|
} else if (provider == "sl") {
|
|
form.action = "http://search-lucene.com/solr";
|
|
}
|
|
|
|
days = 365; // cookie will be valid for a year
|
|
date = new Date();
|
|
date.setTime(date.getTime() + (days * 24 * 60 * 60 * 1000));
|
|
expires = "; expires=" + date.toGMTString();
|
|
document.cookie = "searchProvider=" + provider + expires + "; path=/";
|
|
}
|
|
|
|
if (document.cookie.length>0) {
|
|
cStart=document.cookie.indexOf("searchProvider=");
|
|
if (cStart!=-1) {
|
|
cStart=cStart + "searchProvider=".length;
|
|
cEnd=document.cookie.indexOf(";", cStart);
|
|
if (cEnd==-1) {
|
|
cEnd=document.cookie.length;
|
|
}
|
|
provider = unescape(document.cookie.substring(cStart,cEnd));
|
|
document.forms['searchform'].elements['searchProvider'].value = provider;
|
|
}
|
|
}
|
|
</script>
|
|
<div id="credit"></div>
|
|
<div id="roundbottom">
|
|
<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
|
|
<!--+
|
|
|alternative credits
|
|
+-->
|
|
<div id="credit2">
|
|
<a href="http://www.packtpub.com/solr-1-4-enterprise-search-server?utm_source=http%3A%2F%2Flucene.apache.org%2Fsolr%2F&utm_medium=spons&utm_content=pod&utm_campaign=mdb_000275"><img border="0" title="Book: Solr 1.4 Enterprise Search Server" alt="Book: Solr 1.4 Enterprise Search Server - logo" src="images/solr-book-image.jpg" style="width: 150px;height: 185px;"></a><a href="http://www.lucidimagination.com/Downloads/LucidWorks-for-Solr/Reference-Guide?sc=AP"><img border="0" title="LucidWorks for Solr Certified Distribution Reference Guide" alt="LucidWorks for Solr Certified Distribution Reference Guide - logo" src="images/lucidworks_reference_guide.png" style="width: 150px;height: 185px;"></a>
|
|
</div>
|
|
</div>
|
|
<!--+
|
|
|end Menu
|
|
+-->
|
|
<!--+
|
|
|start content
|
|
+-->
|
|
<div id="content">
|
|
<div title="Portable Document Format" class="pdflink">
|
|
<a class="dida" href="tutorial.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
|
|
PDF</a>
|
|
</div>
|
|
<h1>Solr tutorial</h1>
|
|
<div id="minitoc-area">
|
|
<ul class="minitoc">
|
|
<li>
|
|
<a href="#Overview">Overview</a>
|
|
</li>
|
|
<li>
|
|
<a href="#Requirements">Requirements</a>
|
|
</li>
|
|
<li>
|
|
<a href="#Getting+Started">Getting Started</a>
|
|
</li>
|
|
<li>
|
|
<a href="#Indexing+Data">Indexing Data</a>
|
|
</li>
|
|
<li>
|
|
<a href="#Updating+Data">Updating Data</a>
|
|
<ul class="minitoc">
|
|
<li>
|
|
<a href="#Deleting+Data">Deleting Data</a>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
<a href="#Querying+Data">Querying Data</a>
|
|
<ul class="minitoc">
|
|
<li>
|
|
<a href="#Sorting">Sorting</a>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
<a href="#Highlighting">Highlighting</a>
|
|
</li>
|
|
<li>
|
|
<a href="#Faceted+Search">Faceted Search</a>
|
|
</li>
|
|
<li>
|
|
<a href="#Search+UI">Search UI</a>
|
|
</li>
|
|
<li>
|
|
<a href="#Text+Analysis">Text Analysis</a>
|
|
<ul class="minitoc">
|
|
<li>
|
|
<a href="#Analysis+Debugging">Analysis Debugging</a>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
<a href="#Conclusion">Conclusion</a>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
|
|
|
|
<a name="N1000E"></a><a name="Overview"></a>
|
|
<h2 class="boxed">Overview</h2>
|
|
<div class="section">
|
|
<p>
|
|
This document covers the basics of running Solr using an example
|
|
schema, and some sample data.
|
|
</p>
|
|
</div>
|
|
|
|
|
|
<a name="N10018"></a><a name="Requirements"></a>
|
|
<h2 class="boxed">Requirements</h2>
|
|
<div class="section">
|
|
<p>
|
|
To follow along with this tutorial, you will need...
|
|
</p>
|
|
<ol>
|
|
|
|
<li>Java 1.6 or greater. Some places you can get it are from
|
|
<a href="http://www.oracle.com/technetwork/java/javase/downloads/index.html">Oracle</a>,
|
|
<a href="http://openjdk.java.net/">Open JDK</a>,
|
|
<a href="http://www.ibm.com/developerworks/java/jdk/">IBM</a>, or
|
|
<br>
|
|
Running <span class="codefrag">java -version</span> at the command line should indicate a version
|
|
number starting with 1.6. Gnu's GCJ is not supported and does not work with Solr.
|
|
</li>
|
|
|
|
<li>A <a href="http://www.apache.org/dyn/closer.cgi/lucene/solr/">Solr release</a>.
|
|
</li>
|
|
|
|
</ol>
|
|
</div>
|
|
|
|
|
|
<a name="N10040"></a><a name="Getting+Started"></a>
|
|
<h2 class="boxed">Getting Started</h2>
|
|
<div class="section">
|
|
<p>
|
|
<strong>
|
|
Please run the browser showing this tutorial and the Solr server on the same machine so tutorial links will correctly point to your Solr server.
|
|
</strong>
|
|
</p>
|
|
<p>
|
|
Begin by unziping the Solr release and changing your working directory
|
|
to be the "<span class="codefrag">example</span>" directory. (Note that the base directory name may vary with the version of Solr downloaded.) For example, with a shell in UNIX, Cygwin, or MacOS:
|
|
</p>
|
|
<pre class="code">
|
|
user:~solr$ <strong>ls</strong>
|
|
solr-nightly.zip
|
|
user:~solr$ <strong>unzip -q solr-nightly.zip</strong>
|
|
user:~solr$ <strong>cd solr-nightly/example/</strong>
|
|
|
|
</pre>
|
|
<p>
|
|
Solr can run in any Java Servlet Container of your choice, but to simplify
|
|
this tutorial, the example index includes a small installation of Jetty.
|
|
</p>
|
|
<p>
|
|
To launch Jetty with the Solr WAR, and the example configs, just run the <span class="codefrag">start.jar</span> ...
|
|
</p>
|
|
<pre class="code">
|
|
user:~/solr/example$ <strong>java -jar start.jar</strong>
|
|
2009-10-23 16:42:53.816::INFO: Logging to STDERR via org.mortbay.log.StdErrLog
|
|
2009-10-23 16:42:53.907::INFO: jetty-6.1.26
|
|
|
|
...
|
|
|
|
Oct 23, 2009 4:41:56 PM org.apache.solr.core.SolrCore registerSearcher
|
|
INFO: [] Registered new searcher Searcher@7c3885 main
|
|
|
|
</pre>
|
|
<p>
|
|
This will start up the Jetty application server on port 8983, and use your terminal to display the logging information from Solr.
|
|
</p>
|
|
<p>
|
|
You can see that the Solr is running by loading <a href="http://localhost:8983/solr/admin/">http://localhost:8983/solr/admin/</a> in your web browser. This is the main starting point for Administering Solr.
|
|
</p>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<a name="N10078"></a><a name="Indexing+Data"></a>
|
|
<h2 class="boxed">Indexing Data</h2>
|
|
<div class="section">
|
|
<p>
|
|
Your Solr server is up and running, but it doesn't contain any data. You can
|
|
modify a Solr index by POSTing XML Documents containing instructions to add (or
|
|
update) documents, delete documents, commit pending adds and deletes, and
|
|
optimize your index.
|
|
</p>
|
|
<p>
|
|
The <span class="codefrag">exampledocs</span> directory contains samples of the types of
|
|
instructions Solr expects, as well as a java utility for posting them from the
|
|
command line (a <span class="codefrag">post.sh</span> shell script is also available, but for
|
|
this tutorial we'll use the cross-platform Java client). </p>
|
|
<p> To try this,
|
|
open a new terminal window, enter the exampledocs directory, and run
|
|
"<span class="codefrag">java -jar post.jar</span>" on some of the XML files in that directory,
|
|
indicating the URL of the Solr server:
|
|
</p>
|
|
<pre class="code">
|
|
user:~/solr/example/exampledocs$ <strong>java -jar post.jar solr.xml monitor.xml</strong>
|
|
SimplePostTool: version 1.2
|
|
SimplePostTool: WARNING: Make sure your XML documents are encoded in UTF-8, other encodings are not currently supported
|
|
SimplePostTool: POSTing files to http://localhost:8983/solr/update..
|
|
SimplePostTool: POSTing file solr.xml
|
|
SimplePostTool: POSTing file monitor.xml
|
|
SimplePostTool: COMMITting Solr index changes..
|
|
</pre>
|
|
<p>
|
|
You have now indexed two documents in Solr, and committed these changes.
|
|
You can now search for "solr" using the "Make a Query" interface on the Admin screen, and you should get one result.
|
|
Clicking the "Search" button should take you to the following URL...
|
|
</p>
|
|
<p>
|
|
|
|
<a href="http://localhost:8983/solr/select/?q=solr&start=0&rows=10&indent=on">http://localhost:8983/solr/select/?q=solr&start=0&rows=10&indent=on</a>
|
|
|
|
</p>
|
|
<p>
|
|
You can index all of the sample data, using the following command (assuming your command line shell supports the *.xml notation):
|
|
</p>
|
|
<pre class="code">
|
|
user:~/solr/example/exampledocs$ <strong>java -jar post.jar *.xml</strong>
|
|
SimplePostTool: version 1.2
|
|
SimplePostTool: WARNING: Make sure your XML documents are encoded in UTF-8, other encodings are not currently supported
|
|
SimplePostTool: POSTing files to http://localhost:8983/solr/update..
|
|
SimplePostTool: POSTing file hd.xml
|
|
SimplePostTool: POSTing file ipod_other.xml
|
|
SimplePostTool: POSTing file ipod_video.xml
|
|
SimplePostTool: POSTing file mem.xml
|
|
SimplePostTool: POSTing file monitor.xml
|
|
SimplePostTool: POSTing file monitor2.xml
|
|
SimplePostTool: POSTing file mp500.xml
|
|
SimplePostTool: POSTing file sd500.xml
|
|
SimplePostTool: POSTing file solr.xml
|
|
SimplePostTool: POSTing file spellchecker.xml
|
|
SimplePostTool: POSTing file utf8-example.xml
|
|
SimplePostTool: POSTing file vidcard.xml
|
|
SimplePostTool: COMMITting Solr index changes..
|
|
</pre>
|
|
<p>
|
|
...and now you can search for all sorts of things using the default <a href="http://wiki.apache.org/solr/SolrQuerySyntax">Solr Query Syntax</a> (a superset of the Lucene query syntax)...
|
|
</p>
|
|
<ul>
|
|
|
|
<li>
|
|
<a href="http://localhost:8983/solr/select/?q=video">video</a>
|
|
</li>
|
|
|
|
<li>
|
|
<a href="http://localhost:8983/solr/select/?q=name:video">name:video</a>
|
|
</li>
|
|
|
|
<li>
|
|
<a href="http://localhost:8983/solr/select/?q=%2Bvideo+%2Bprice%3A[*+TO+400]">+video +price:[* TO 400]</a>
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
<p></p>
|
|
<p>
|
|
There are many other different ways to import your data into Solr... one can
|
|
</p>
|
|
<ul>
|
|
|
|
<li>Import records from a database using the
|
|
<a href="http://wiki.apache.org/solr/DataImportHandler">Data Import Handler (DIH)</a>.
|
|
</li>
|
|
|
|
<li>
|
|
<a href="http://wiki.apache.org/solr/UpdateCSV">Load a CSV file</a> (comma separated values),
|
|
including those exported by Excel or MySQL.
|
|
</li>
|
|
|
|
<li>
|
|
<a href="http://wiki.apache.org/solr/UpdateJSON">POST JSON documents</a>
|
|
|
|
</li>
|
|
|
|
<li>Index binary documents such as Word and PDF with
|
|
<a href="http://wiki.apache.org/solr/ExtractingRequestHandler">Solr Cell</a> (ExtractingRequestHandler).
|
|
</li>
|
|
|
|
<li>
|
|
Use <a href="http://wiki.apache.org/solr/Solrj">SolrJ</a> for Java or other Solr clients to
|
|
programatically create documents to send to Solr.
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<a name="N100EE"></a><a name="Updating+Data"></a>
|
|
<h2 class="boxed">Updating Data</h2>
|
|
<div class="section">
|
|
<p>
|
|
You may have noticed that even though the file <span class="codefrag">solr.xml</span> has now
|
|
been POSTed to the server twice, you still only get 1 result when searching for
|
|
"solr". This is because the example <span class="codefrag">schema.xml</span> specifies a "<span class="codefrag">uniqueKey</span>" field
|
|
called "<span class="codefrag">id</span>". Whenever you POST instructions to Solr to add a
|
|
document with the same value for the <span class="codefrag">uniqueKey</span> as an existing document, it
|
|
automatically replaces it for you. You can see that that has happened by
|
|
looking at the values for <span class="codefrag">numDocs</span> and <span class="codefrag">maxDoc</span> in the
|
|
"CORE"/searcher section of the statistics page... </p>
|
|
<p>
|
|
|
|
<a href="http://localhost:8983/solr/admin/stats.jsp">http://localhost:8983/solr/admin/stats.jsp</a>
|
|
|
|
</p>
|
|
<p>
|
|
|
|
<strong><span class="codefrag">numDocs</span></strong> represents the number of searchable documents in the
|
|
index (and will be larger than the number of XML files since some files
|
|
contained more than one <span class="codefrag"><doc></span>). <strong><span class="codefrag">maxDoc</span></strong>
|
|
may be larger as the <span class="codefrag">maxDoc</span> count includes logically deleted documents that
|
|
have not yet been removed from the index. You can re-post the sample XML
|
|
files over and over again as much as you want and <span class="codefrag">numDocs</span> will never
|
|
increase, because the new documents will constantly be replacing the old.
|
|
</p>
|
|
<p>
|
|
Go ahead and edit the existing XML files to change some of the data, and re-run
|
|
the <span class="codefrag">java -jar post.jar</span> command, you'll see your changes reflected
|
|
in subsequent searches.
|
|
</p>
|
|
<a name="N1012D"></a><a name="Deleting+Data"></a>
|
|
<h3 class="boxed">Deleting Data</h3>
|
|
<p>You can delete data by POSTing a delete command to the update URL and specifying the value
|
|
of the document's unique key field, or a query that matches multiple documents (be careful with that one!). Since these commands
|
|
are smaller, we will specify them right on the command line rather than reference an XML file.
|
|
</p>
|
|
<p>Execute the following command to delete a document</p>
|
|
<pre class="code">java -Ddata=args -Dcommit=no -jar post.jar "<delete><id>SP2514N</id></delete>"</pre>
|
|
<p>Now if you go to the <a href="http://localhost:8983/solr/admin/stats.jsp">statistics</a> page and scroll down
|
|
to the UPDATE_HANDLERS section and verify that "<span class="codefrag">deletesById : 1</span>"</p>
|
|
<p>If you search for <a href="http://localhost:8983/solr/select?q=id:SP2514N">id:SP2514N</a> it will still be found,
|
|
because index changes are not visible until changes are committed and a new searcher is opened. To cause
|
|
this to happen, send a commit command to Solr (post.jar does this for you by default):</p>
|
|
<pre class="code">java -jar post.jar</pre>
|
|
<p>Now re-execute the previous search and verify that no matching documents are found. Also revisit the
|
|
statistics page and observe the changes in both the UPDATE_HANDLERS section and the CORE section.</p>
|
|
<p>Here is an example of using delete-by-query to delete anything with
|
|
<a href="http://localhost:8983/solr/select?q=name:DDR&fl=name">DDR</a> in the name:</p>
|
|
<pre class="code">java -Ddata=args -jar post.jar "<delete><query>name:DDR</query></delete>"</pre>
|
|
<p>Commit can be an expensive operation so it's best to make many changes to an index in a batch and
|
|
then send the <span class="codefrag">commit</span> command at the end. There is also an <span class="codefrag">optimize</span> command that does the same thing as <span class="codefrag">commit</span>,
|
|
in addition to merging all index segments into a single segment, making it faster to search and causing any
|
|
deleted documents to be removed. All of the update commands are documented <a href="http://wiki.apache.org/solr/UpdateXmlMessages">here</a>.
|
|
</p>
|
|
<p>To continue with the tutorial, re-add any documents you may have deleted by going to the <span class="codefrag">exampledocs</span> directory and executing</p>
|
|
<pre class="code">java -jar post.jar *.xml</pre>
|
|
</div>
|
|
|
|
|
|
<a name="N1017C"></a><a name="Querying+Data"></a>
|
|
<h2 class="boxed">Querying Data</h2>
|
|
<div class="section">
|
|
<p>
|
|
Searches are done via HTTP GET on the <span class="codefrag">select</span> URL with the query string in the <span class="codefrag">q</span> parameter.
|
|
You can pass a number of optional <a href="http://wiki.apache.org/solr/StandardRequestHandler">request parameters</a>
|
|
to the request handler to control what information is returned. For example, you can use the "<span class="codefrag">fl</span>" parameter
|
|
to control what stored fields are returned, and if the relevancy score is returned:
|
|
</p>
|
|
<ul>
|
|
|
|
<li>
|
|
<a href="http://localhost:8983/solr/select/?indent=on&q=video&fl=name,id">q=video&fl=name,id</a> (return only name and id fields) </li>
|
|
|
|
<li>
|
|
<a href="http://localhost:8983/solr/select/?indent=on&q=video&fl=name,id,score">q=video&fl=name,id,score</a> (return relevancy score as well) </li>
|
|
|
|
<li>
|
|
<a href="http://localhost:8983/solr/select/?indent=on&q=video&fl=*,score">q=video&fl=*,score</a> (return all stored fields, as well as relevancy score) </li>
|
|
|
|
<li>
|
|
<a href="http://localhost:8983/solr/select/?indent=on&q=video&sort=price desc&fl=name,id,price">q=video&sort=price desc&fl=name,id,price</a> (add sort specification: sort by price descending) </li>
|
|
|
|
<li>
|
|
<a href="http://localhost:8983/solr/select/?indent=on&q=video&wt=json">q=video&wt=json</a> (return response in JSON format) </li>
|
|
|
|
</ul>
|
|
<p>
|
|
Solr provides a <a href="http://localhost:8983/solr/admin/form.jsp">query form</a> within the web admin interface
|
|
that allows setting the various request parameters and is useful when testing or debugging queries.
|
|
</p>
|
|
<a name="N101BA"></a><a name="Sorting"></a>
|
|
<h3 class="boxed">Sorting</h3>
|
|
<p>
|
|
Solr provides a simple method to sort on one or more indexed fields.
|
|
Use the "<span class="codefrag">sort</span>' parameter to specify "field direction" pairs, separated by commas if there's more than one sort field:
|
|
</p>
|
|
<ul>
|
|
|
|
<li>
|
|
<a href="http://localhost:8983/solr/select/?indent=on&q=video&sort=price+desc">q=video&sort=price desc</a>
|
|
</li>
|
|
|
|
<li>
|
|
<a href="http://localhost:8983/solr/select/?indent=on&q=video&sort=price+asc">q=video&sort=price asc</a>
|
|
</li>
|
|
|
|
<li>
|
|
<a href="http://localhost:8983/solr/select/?indent=on&q=video&sort=inStock+asc,price+desc">q=video&sort=inStock asc, price desc</a>
|
|
</li>
|
|
|
|
</ul>
|
|
<p>
|
|
"<span class="codefrag">score</span>" can also be used as a field name when specifying a sort:
|
|
</p>
|
|
<ul>
|
|
|
|
<li>
|
|
<a href="http://localhost:8983/solr/select/?indent=on&q=video&sort=score+desc">q=video&sort=score desc</a>
|
|
</li>
|
|
|
|
<li>
|
|
<a href="http://localhost:8983/solr/select/?indent=on&q=video&sort=inStock+asc,score+desc">q=video&sort=inStock asc, score desc</a>
|
|
</li>
|
|
|
|
</ul>
|
|
<p>
|
|
Complex functions may also be used to sort results:
|
|
</p>
|
|
<ul>
|
|
|
|
<li>
|
|
<a href="http://localhost:8983/solr/select/?indent=on&q=*:*&sort=div(popularity,add(price,1))+desc">q=video&sort=div(popularity,add(price,1)) desc</a>
|
|
</li>
|
|
|
|
</ul>
|
|
<p>
|
|
If no sort is specified, the default is <span class="codefrag">score desc</span> to return the matches having the highest relevancy.
|
|
</p>
|
|
</div>
|
|
|
|
|
|
|
|
<a name="N101FE"></a><a name="Highlighting"></a>
|
|
<h2 class="boxed">Highlighting</h2>
|
|
<div class="section">
|
|
<p>
|
|
Hit highlighting returns relevent snippets of each returned document, and highlights
|
|
terms from the query within those context snippets.
|
|
</p>
|
|
<p>
|
|
The following example searches for <span class="codefrag">video card</span> and requests
|
|
highlighting on the fields <span class="codefrag">name,features</span>. This causes a
|
|
<span class="codefrag">highlighting</span> section to be added to the response with the
|
|
words to highlight surrounded with <span class="codefrag"><em></span> (for emphasis)
|
|
tags.
|
|
</p>
|
|
<p>
|
|
|
|
<a href="http://localhost:8983/solr/select/?wt=json&indent=on&q=video+card&fl=name,id&hl=true&hl.fl=name,features">...&q=video card&fl=name,id&hl=true&hl.fl=name,features</a>
|
|
|
|
</p>
|
|
<p>
|
|
More request parameters related to controlling highlighting may be found
|
|
<a href="http://wiki.apache.org/solr/HighlightingParameters">here</a>.
|
|
</p>
|
|
</div> <!-- highlighting -->
|
|
|
|
|
|
|
|
<a name="N10227"></a><a name="Faceted+Search"></a>
|
|
<h2 class="boxed">Faceted Search</h2>
|
|
<div class="section">
|
|
<p>
|
|
Faceted search takes the documents matched by a query and generates counts for various
|
|
properties or categories. Links are usually provided that allows users to "drill down" or
|
|
refine their search results based on the returned categories.
|
|
</p>
|
|
<p>
|
|
The following example searches for all documents (<span class="codefrag">*:*</span>) and
|
|
requests counts by the category field <span class="codefrag">cat</span>.
|
|
</p>
|
|
<p>
|
|
|
|
<a href="http://localhost:8983/solr/select/?wt=json&indent=on&q=*:*&fl=name&facet=true&facet.field=cat">...&q=*:*&facet=true&facet.field=cat</a>
|
|
|
|
</p>
|
|
<p>
|
|
Notice that although only the first 10 documents are returned in the results list,
|
|
the facet counts generated are for the complete set of documents that match the query.
|
|
</p>
|
|
<p>
|
|
We can facet multiple ways at the same time. The following example adds a facet on the
|
|
boolean <span class="codefrag">inStock</span> field:
|
|
</p>
|
|
<p>
|
|
|
|
<a href="http://localhost:8983/solr/select/?wt=json&indent=on&q=*:*&fl=name&facet=true&facet.field=cat&facet.field=inStock">...&q=*:*&facet=true&facet.field=cat&facet.field=inStock</a>
|
|
|
|
</p>
|
|
<p>
|
|
Solr can also generate counts for arbitrary queries. The following example
|
|
queries for <span class="codefrag">ipod</span> and shows prices below and above 100 by using
|
|
range queries on the price field.
|
|
</p>
|
|
<p>
|
|
|
|
<a href="http://localhost:8983/solr/select/?wt=json&indent=on&q=ipod&fl=name&facet=true&facet.query=price:[0+TO+100]&facet.query=price:[100+TO+*]">...&q=ipod&facet=true&facet.query=price:[0 TO 100]&facet.query=price:[100 TO *]</a>
|
|
|
|
</p>
|
|
<p>
|
|
One can even facet by date ranges. This example requests counts for the manufacture date (<span class="codefrag">manufacturedate_dt</span> field) for each year between 2004 and 2010.
|
|
</p>
|
|
<p>
|
|
|
|
<a href="http://localhost:8983/solr/select/?wt=json&indent=on&q=*:*&fl=name,manufacturedate_dt&facet=true&facet.date=manufacturedate_dt&facet.date.start=2004-01-01T00:00:00Z&facet.date.end=2010-01-01T00:00:00Z&facet.date.gap=%2b1YEAR">...&q=*:*&facet=true&facet.date=manufacturedate_dt&facet.date.start=2004-01-01T00:00:00Z&facet.date.end=2010-01-01T00:00:00Z&facet.date.gap=+1YEAR</a>
|
|
|
|
</p>
|
|
<p>
|
|
More information on faceted search may be found on the
|
|
<a href="http://wiki.apache.org/solr/SolrFacetingOverview">faceting overview</a>
|
|
and
|
|
<a href="http://wiki.apache.org/solr/SimpleFacetParameters">faceting parameters</a>
|
|
pages.
|
|
</p>
|
|
</div> <!-- faceted search -->
|
|
|
|
|
|
|
|
<a name="N10278"></a><a name="Search+UI"></a>
|
|
<h2 class="boxed">Search UI</h2>
|
|
<div class="section">
|
|
<p>
|
|
Solr includes an example search interface built with velocity templating
|
|
that demonstrates many features, including searching, faceting, highlighting,
|
|
autocomplete, and geospatial searching.
|
|
</p>
|
|
<p>
|
|
Try it out at
|
|
<a href="http://localhost:8983/solr/browse">http://localhost:8983/solr/browse</a>
|
|
|
|
</p>
|
|
</div> <!-- Search UI -->
|
|
|
|
|
|
|
|
|
|
<a name="N1028B"></a><a name="Text+Analysis"></a>
|
|
<h2 class="boxed">Text Analysis</h2>
|
|
<div class="section">
|
|
<p>
|
|
Text fields are typically indexed by breaking the text into words and applying various transformations such as
|
|
lowercasing, removing plurals, or stemming to increase relevancy. The same text transformations are normally
|
|
applied to any queries in order to match what is indexed.
|
|
</p>
|
|
<p>
|
|
The <a href="http://wiki.apache.org/solr/SchemaXml">schema</a> defines
|
|
the fields in the index and what type of analysis is applied to them. The current schema your server is using
|
|
may be accessed via the <span class="codefrag">[SCHEMA]</span> link on the <a href="http://localhost:8983/solr/admin/">admin</a> page.
|
|
</p>
|
|
<p>
|
|
The best analysis components (tokenization and filtering) for your textual content depends heavily on language.
|
|
As you can see in the above <span class="codefrag">[SCHEMA]</span> link, the fields in the example schema are using a <span class="codefrag">fieldType</span>
|
|
named <span class="codefrag">text_general</span>, which has defaults appropriate for all languages.
|
|
</p>
|
|
<p>
|
|
If you know your textual content is English, as is the case for the example documents in this tutorial,
|
|
and you'd like to apply English-specific stemming and stop word removal, as well as split compound words, you can use the <span class="codefrag">text_en_splitting</span> fieldType instead.
|
|
Go ahead and edit the <span class="codefrag">schema.xml</span> under the <span class="codefrag">solr/example/solr/conf</span> directory,
|
|
and change the <span class="codefrag">type</span> for fields <span class="codefrag">text</span> and <span class="codefrag">features</span> from <span class="codefrag">text_general</span> to <span class="codefrag">text_en_splitting</span>.
|
|
Restart the server and then re-post all of the documents, and then these queries will show the English-specific transformations:
|
|
</p>
|
|
<ul>
|
|
|
|
<li>A search for
|
|
<a href="http://localhost:8983/solr/select/?indent=on&q=power-shot&fl=name">power-shot</a>
|
|
matches <span class="codefrag">PowerShot</span>, and
|
|
<a href="http://localhost:8983/solr/select/?indent=on&q=adata&fl=name">adata</a>
|
|
matches <span class="codefrag">A-DATA</span> due to the use of <span class="codefrag">WordDelimiterFilter</span> and <span class="codefrag">LowerCaseFilter</span>.
|
|
</li>
|
|
|
|
|
|
<li>A search for
|
|
<a href="http://localhost:8983/solr/select/?indent=on&q=features:recharging&fl=name,features">features:recharging</a>
|
|
matches <span class="codefrag">Rechargeable</span> due to stemming with the <span class="codefrag">EnglishPorterFilter</span>.
|
|
</li>
|
|
|
|
|
|
<li>A search for
|
|
<a href="http://localhost:8983/solr/select/?indent=on&q=%221 gigabyte%22&fl=name">"1 gigabyte"</a>
|
|
matches things with <span class="codefrag">GB</span>, and the misspelled
|
|
<a href="http://localhost:8983/solr/select/?indent=on&q=pixima&fl=name">pixima</a>
|
|
matches <span class="codefrag">Pixma</span> due to use of a <span class="codefrag">SynonymFilter</span>.
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
<p>A full description of the analysis components, Analyzers, Tokenizers, and TokenFilters
|
|
available for use is <a href="http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters">here</a>.
|
|
</p>
|
|
<a name="N1030B"></a><a name="Analysis+Debugging"></a>
|
|
<h3 class="boxed">Analysis Debugging</h3>
|
|
<p>There is a handy <a href="http://localhost:8983/solr/admin/analysis.jsp">analysis</a>
|
|
debugging page where you can see how a text value is broken down into words,
|
|
and shows the resulting tokens after they pass through each filter in the chain.
|
|
</p>
|
|
<p>
|
|
|
|
<a href="http://localhost:8983/solr/admin/analysis.jsp?name=name&val=Canon+Power-Shot+SD500">This</a>
|
|
shows how "<span class="codefrag">Canon Power-Shot SD500</span>" would be indexed as a value in the name field. Each row of
|
|
the table shows the resulting tokens after having passed through the next <span class="codefrag">TokenFilter</span> in the analyzer for the <span class="codefrag">name</span> field.
|
|
Notice how both <span class="codefrag">powershot</span> and <span class="codefrag">power</span>, <span class="codefrag">shot</span> are indexed. Tokens generated at the same position
|
|
are shown in the same column, in this case <span class="codefrag">shot</span> and <span class="codefrag">powershot</span>.
|
|
</p>
|
|
<p>Selecting <a href="http://localhost:8983/solr/admin/analysis.jsp?name=name&verbose=on&val=Canon+Power-Shot+SD500">verbose output</a>
|
|
will show more details, such as the name of each analyzer component in the chain, token positions, and the start and end positions
|
|
of the token in the original text.
|
|
</p>
|
|
<p>Selecting <a href="http://localhost:8983/solr/admin/analysis.jsp?name=name&highlight=on&val=Canon+Power-Shot+SD500&qval=Powershot sd-500">highlight matches</a>
|
|
when both index and query values are provided will take the resulting terms from the query value and highlight
|
|
all matches in the index value analysis.
|
|
</p>
|
|
<p>
|
|
<a href="http://localhost:8983/solr/admin/analysis.jsp?name=text&highlight=on&val=Four+score+and+seven+years+ago+our+fathers+brought+forth+on+this+continent+a+new+nation%2C+conceived+in+liberty+and+dedicated+to+the+proposition+that+all+men+are+created+equal.+&qval=liberties+and+equality">Here</a>
|
|
is an example of stemming and stop-words at work.
|
|
</p>
|
|
</div>
|
|
|
|
|
|
<a name="N1034D"></a><a name="Conclusion"></a>
|
|
<h2 class="boxed">Conclusion</h2>
|
|
<div class="section">
|
|
<p>
|
|
Congratulations! You successfully ran a small Solr instance, added some
|
|
documents, and made changes to the index and schema. You learned about queries, text
|
|
analysis, and the Solr admin interface. You're ready to start using Solr on
|
|
your own project! Continue on with the following steps:
|
|
</p>
|
|
<ul>
|
|
|
|
<li>Subscribe to the Solr <a href="mailing_lists.html">mailing lists</a>!</li>
|
|
|
|
<li>Make a copy of the Solr <span class="codefrag">example</span> directory as a template for your project.</li>
|
|
|
|
<li>Customize the schema and other config in <span class="codefrag">solr/conf/</span> to meet your needs.</li>
|
|
|
|
</ul>
|
|
<p>
|
|
Solr has a ton of other features that we haven't touched on here, including
|
|
<a href="http://wiki.apache.org/solr/DistributedSearch">distributed search</a>
|
|
to handle huge document collections,
|
|
<a href="http://wiki.apache.org/solr/FunctionQuery">function queries</a>,
|
|
<a href="http://wiki.apache.org/solr/StatsComponent">numeric field statistics</a>,
|
|
and
|
|
<a href="http://wiki.apache.org/solr/ClusteringComponent">search results clustering</a>.
|
|
Explore the <a href="http://wiki.apache.org/solr/FrontPage">Solr Wiki</a> to find
|
|
more details about Solr's many <a href="features.html">features</a>.
|
|
</p>
|
|
<p>
|
|
Have Fun, and we'll see you on the Solr mailing lists!
|
|
</p>
|
|
</div>
|
|
|
|
|
|
</div>
|
|
<!--+
|
|
|end content
|
|
+-->
|
|
<div class="clearboth"> </div>
|
|
</div>
|
|
<div id="footer">
|
|
<!--+
|
|
|start bottomstrip
|
|
+-->
|
|
<div class="lastmodified">
|
|
<script type="text/javascript"><!--
|
|
document.write("Last Published: " + document.lastModified);
|
|
// --></script>
|
|
</div>
|
|
<div class="copyright">
|
|
Copyright ©
|
|
2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
|
|
</div>
|
|
<!--+
|
|
|end bottomstrip
|
|
+-->
|
|
</div>
|
|
</body>
|
|
</html>
|