This commit is contained in:
Karl Wright 2017-09-26 03:20:41 -04:00
commit ec5704e4af
40 changed files with 1161 additions and 994 deletions

View File

@ -53,5 +53,18 @@
<build>
<sourceDirectory>${module-path}/src/java</sourceDirectory>
<testSourceDirectory>${module-path}/src/test</testSourceDirectory>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -32,6 +32,9 @@ New Features
abstractions: SpatialContextFactory, ShapeFactory, BinaryCodec, DistanceCalculator.
(Ignacio Vera, David Smiley)
* LUCENE-7973: Update dictionary version for Ukrainian analyzer to 3.9.0 (Andriy
Rysin via Dawid Weiss)
Optimizations
* LUCENE-7905: Optimize how OrdinalMap (used by

View File

@ -809,7 +809,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
/**
* Mapping from old ordinal to new ordinals, used when merging indexes
* wit separate taxonomies.
* with separate taxonomies.
* <p>
* addToTaxonomies() merges one or more taxonomies into the given taxonomy
* (this). An OrdinalMap is filled for each of the added taxonomies,

View File

@ -278,7 +278,7 @@ org.slf4j.version = 1.7.7
/org.tukaani/xz = 1.5
/rome/rome = 1.0
ua.net.nlp.morfologik-ukrainian-search.version = 3.7.6
ua.net.nlp.morfologik-ukrainian-search.version = 3.9.0
/ua.net.nlp/morfologik-ukrainian-search = ${ua.net.nlp.morfologik-ukrainian-search.version}
/xerces/xercesImpl = 2.9.1

View File

@ -1 +0,0 @@
8d2c4bf006f59227bcba8885b4602b3a8b5bd799

View File

@ -0,0 +1 @@
9790241f6d0d6cefd48e0d2532bd59097fb0340a

View File

@ -145,6 +145,9 @@ Bug Fixes
* SOLR-11363: JSON Facet API: repeated values in a numeric points field with docValues enabled
were double counted. (Hossman, yonik)
* SOLR-11297: Message "Lock held by this virtual machine" during startup. Solr is trying to start some cores twice.
(Luiz Armesto, Shawn Heisey, Erick Erickson)
Optimizations
----------------------

View File

@ -637,9 +637,10 @@ public class CoreContainer {
if (zkSys.getZkController() != null) {
zkSys.getZkController().throwErrorIfReplicaReplaced(cd);
}
solrCores.waitAddPendingCoreOps(cd.getName());
core = createFromDescriptor(cd, false, false);
} finally {
solrCores.removeFromPendingOps(cd.getName());
if (asyncSolrCoreLoad) {
solrCores.markCoreAsNotLoading(cd);
}
@ -935,7 +936,13 @@ public class CoreContainer {
// first and clean it up if there's an error.
coresLocator.create(this, cd);
SolrCore core = createFromDescriptor(cd, true, newCollection);
SolrCore core = null;
try {
solrCores.waitAddPendingCoreOps(cd.getName());
core = createFromDescriptor(cd, true, newCollection);
} finally {
solrCores.removeFromPendingOps(cd.getName());
}
return core;
} catch (Exception ex) {
@ -970,7 +977,6 @@ public class CoreContainer {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Error CREATEing SolrCore '" + coreName + "': " + ex.getMessage() + rootMsg, ex);
}
}
/**
@ -979,6 +985,26 @@ public class CoreContainer {
* @param dcore a core descriptor
* @param publishState publish core state to the cluster if true
*
* WARNING: Any call to this method should be surrounded by a try/finally block
* that calls solrCores.waitAddPendingCoreOps(...) and solrCores.removeFromPendingOps(...)
*
* <pre>
* <code>
* try {
* solrCores.waitAddPendingCoreOps(dcore.getName());
* createFromDescriptor(...);
* } finally {
* solrCores.removeFromPendingOps(dcore.getName());
* }
* </code>
* </pre>
*
* Trying to put the waitAddPending... in this method results in Bad Things Happening due to race conditions.
* getCore() depends on getting the core returned _if_ it's in the pending list due to some other thread opening it.
* If the core is not in the pending list and not loaded, then getCore() calls this method. Anything that called
* to check if the core was loaded _or_ in pending ops and, based on the return called createFromDescriptor would
* introduce a race condition, see getCore() for the place it would be a problem
*
* @return the newly created core
*/
private SolrCore createFromDescriptor(CoreDescriptor dcore, boolean publishState, boolean newCollection) {
@ -1258,7 +1284,12 @@ public class CoreContainer {
} else {
CoreLoadFailure clf = coreInitFailures.get(name);
if (clf != null) {
createFromDescriptor(clf.cd, true, false);
try {
solrCores.waitAddPendingCoreOps(clf.cd.getName());
createFromDescriptor(clf.cd, true, false);
} finally {
solrCores.removeFromPendingOps(clf.cd.getName());
}
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No such core: " + name );
}
@ -1431,7 +1462,8 @@ public class CoreContainer {
// TestLazyCores
if (desc == null || zkSys.getZkController() != null) return null;
// This will put an entry in pending core ops if the core isn't loaded
// This will put an entry in pending core ops if the core isn't loaded. Here's where moving the
// waitAddPendingCoreOps to createFromDescriptor would introduce a race condition.
core = solrCores.waitAddPendingCoreOps(name);
if (isShutDown) return null; // We're quitting, so stop. This needs to be after the wait above since we may come off

View File

@ -301,12 +301,12 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
.withFunctionName("movingMedian", MovingMedianEvaluator.class)
.withFunctionName("monteCarlo", MonteCarloEvaluator.class)
.withFunctionName("constantDistribution", ConstantDistributionEvaluator.class)
.withFunctionName("weibullDistribution", WeibullDistributionEvaluator.class)
.withFunctionName("mean", MeanEvaluator.class)
// Boolean Stream Evaluators
.withFunctionName("and", AndEvaluator.class)
.withFunctionName("and", AndEvaluator.class)
.withFunctionName("eor", ExclusiveOrEvaluator.class)
.withFunctionName("eq", EqualToEvaluator.class)
.withFunctionName("gt", GreaterThanEvaluator.class)

View File

@ -51,8 +51,8 @@ An example `security.json` showing both sections is shown below to show how thes
There are several things defined in this file:
<1> Basic authentication and rule-based authorization plugins are enabled.
<2> A user called 'solr', with a password `'SolrRocks'` has been defined.
<3> The parameter `"blockUnknown":true` means that unauthenticated requests are not allowed to pass through.
<2> The parameter `"blockUnknown":true` means that unauthenticated requests are not allowed to pass through.
<3> A user called 'solr', with a password `'SolrRocks'` has been defined.
<4> The 'admin' role has been defined, and it has permission to edit security settings.
<5> The 'solr' user has been defined to the 'admin' role.

View File

@ -33,24 +33,24 @@ The `CollapsingQParser` is really a _post filter_ that provides more performant
The CollapsingQParser accepts the following local parameters:
field::
`field`::
The field that is being collapsed on. The field must be a single valued String, Int or Float-type of field.
min or max::
`min` or `max`::
Selects the group head document for each group based on which document has the min or max value of the specified numeric field or <<function-queries.adoc#function-queries,function query>>.
+
At most only one of the `min`, `max`, or `sort` (see below) parameters may be specified.
+
If none are specified, the group head document of each group will be selected based on the highest scoring document in that group. The default is none.
sort::
`sort`::
Selects the group head document for each group based on which document comes first according to the specified <<common-query-parameters.adoc#sort-parameter,sort string>>.
+
At most only one of the `min`, `max`, (see above) or `sort` parameters may be specified.
+
If none are specified, the group head document of each group will be selected based on the highest scoring document in that group. The default is none.
nullPolicy::
`nullPolicy`::
There are three available null policies:
+
* `ignore`: removes documents with a null value in the collapse field. This is the default.
@ -59,14 +59,14 @@ There are three available null policies:
+
The default is `ignore`.
hint::
`hint`::
Currently there is only one hint available: `top_fc`, which stands for top level FieldCache.
+
The `top_fc` hint is only available when collapsing on String fields. `top_fc` usually provides the best query time speed but takes the longest to warm on startup or following a commit. `top_fc` will also result in having the collapsed field cached in memory twice if it's used for faceting or sorting. For very high cardinality (high distinct count) fields, `top_fc` may not fare so well.
+
The default is none.
size::
`size`::
Sets the initial size of the collapse data structures when collapsing on a *numeric field only*.
+
The data structures used for collapsing grow dynamically when collapsing on numeric fields. Setting the size above the number of results expected in the result set will eliminate the resizing cost.
@ -146,14 +146,14 @@ Inside the expanded section there is a _map_ with each group head pointing to th
The ExpandComponent has the following parameters:
expand.sort::
`expand.sort`::
Orders the documents within the expanded groups. The default is `score desc`.
expand.rows::
`expand.rows`::
The number of rows to display in each group. The default is 5 rows.
expand.q::
`expand.q`::
Overrides the main query (`q`), determines which documents to include in the main group. The default is to use the main query.
expand.fq::
`expand.fq`::
Overrides main filter queries (`fq`), determines which documents to include in the main group. The default is to use the main filter queries.

View File

@ -43,9 +43,9 @@ keytool -genkeypair -alias solr-ssl -keyalg RSA -keysize 2048 -keypass secret -s
The above command will create a keystore file named `solr-ssl.keystore.jks` in the current directory.
=== Convert the Certificate and Key to PEM Format for Use with cURL
=== Convert the Certificate and Key to PEM Format for Use with curl
cURL isn't capable of using JKS formatted keystores, so the JKS keystore needs to be converted to PEM format, which cURL understands.
curl isn't capable of using JKS formatted keystores, so the JKS keystore needs to be converted to PEM format, which curl understands.
First convert the JKS keystore into PKCS12 format using `keytool`:
@ -63,7 +63,7 @@ Next convert the PKCS12 format keystore, including both the certificate and the
openssl pkcs12 -in solr-ssl.keystore.p12 -out solr-ssl.pem
----
If you want to use cURL on OS X Yosemite (10.10), you'll need to create a certificate-only version of the PEM format, as follows:
If you want to use curl on OS X Yosemite (10.10), you'll need to create a certificate-only version of the PEM format, as follows:
[source,bash]
----
@ -230,16 +230,16 @@ bin\solr.cmd -cloud -s cloud\node2 -z localhost:2181 -p 7574
[IMPORTANT]
====
cURL on OS X Mavericks (10.9) has degraded SSL support. For more information and workarounds to allow one-way SSL, see http://curl.haxx.se/mail/archive-2013-10/0036.html. cURL on OS X Yosemite (10.10) is improved - 2-way SSL is possible - see http://curl.haxx.se/mail/archive-2014-10/0053.html .
curl on OS X Mavericks (10.9) has degraded SSL support. For more information and workarounds to allow one-way SSL, see http://curl.haxx.se/mail/archive-2013-10/0036.html. curl on OS X Yosemite (10.10) is improved - 2-way SSL is possible - see http://curl.haxx.se/mail/archive-2014-10/0053.html .
The cURL commands in the following sections will not work with the system `curl` on OS X Yosemite (10.10). Instead, the certificate supplied with the `-E` param must be in PKCS12 format, and the file supplied with the `--cacert` param must contain only the CA certificate, and no key (see <<Convert the Certificate and Key to PEM Format for Use with cURL,above>> for instructions on creating this file):
The curl commands in the following sections will not work with the system `curl` on OS X Yosemite (10.10). Instead, the certificate supplied with the `-E` param must be in PKCS12 format, and the file supplied with the `--cacert` param must contain only the CA certificate, and no key (see <<Convert the Certificate and Key to PEM Format for Use with curl,above>> for instructions on creating this file):
[source,bash]
curl -E solr-ssl.keystore.p12:secret --cacert solr-ssl.cacert.pem ...
====
NOTE: If your operating system does not include cURL, you can download binaries here: http://curl.haxx.se/download.html
NOTE: If your operating system does not include curl, you can download binaries here: http://curl.haxx.se/download.html
=== Create a SolrCloud Collection using bin/solr
@ -259,7 +259,7 @@ bin\solr.cmd create -c mycollection -shards 2
The `create` action will pass the `SOLR_SSL_*` properties set in your include file to the SolrJ code used to create the collection.
=== Retrieve SolrCloud Cluster Status using cURL
=== Retrieve SolrCloud Cluster Status using curl
To get the resulting cluster status (again, if you have not enabled client authentication, remove the `-E solr-ssl.pem:secret` option):
@ -315,9 +315,9 @@ cd example/exampledocs
java -Djavax.net.ssl.keyStorePassword=secret -Djavax.net.ssl.keyStore=../../server/etc/solr-ssl.keystore.jks -Djavax.net.ssl.trustStore=../../server/etc/solr-ssl.keystore.jks -Djavax.net.ssl.trustStorePassword=secret -Durl=https://localhost:8984/solr/mycollection/update -jar post.jar *.xml
----
=== Query Using cURL
=== Query Using curl
Use cURL to query the SolrCloud collection created above, from a directory containing the PEM formatted certificate and key created above (e.g. `example/etc/`) - if you have not enabled client authentication (system property `-Djetty.ssl.clientAuth=true)`, then you can remove the `-E solr-ssl.pem:secret` option:
Use curl to query the SolrCloud collection created above, from a directory containing the PEM formatted certificate and key created above (e.g. `example/etc/`) - if you have not enabled client authentication (system property `-Djetty.ssl.clientAuth=true)`, then you can remove the `-E solr-ssl.pem:secret` option:
[source,bash]
----

View File

@ -259,7 +259,7 @@ Using the "`bin/solr -e techproducts`" example, A query URL like this one will r
http://localhost:8983/solr/techproducts/select?q=*:*&facet.pivot=cat,popularity,inStock
&facet.pivot=popularity,cat&facet=true&facet.field=cat&facet.limit=5&rows=0&facet.pivot.mincount=2
----
+
[source,json]
----
{ "facet_counts":{
@ -551,7 +551,7 @@ For example:
The initial and end values cannot be empty.
If the interval needs to be unbounded, the special character `*` can be used for both, start and end limit. When using this special character, the start syntax options (`(` anAd `[`), and end syntax options (`)` and `]`) will be treated the same. `[*,*]` will include all documents with a value in the field.
If the interval needs to be unbounded, the special character `\*` can be used for both, start and end, limits. When using this special character, the start syntax options (`(` and `[`), and end syntax options (`)` and `]`) will be treated the same. `[*,*]` will include all documents with a value in the field.
The interval limits may be strings but there is no need to add quotes. All the text until the comma will be treated as the start limit, and the text after that will be the end limit. For example: `[Buenos Aires,New York]`. Keep in mind that a string-like comparison will be done to match documents in string intervals (case-sensitive). The comparator can't be changed.

View File

@ -20,8 +20,12 @@
Solr ships with many out-of-the-box RequestHandlers, which are called implicit because they are not configured in `solrconfig.xml`.
These handlers have pre-defined default parameters, known as _paramsets_, which can be modified if necessary.
== List of Implicitly Available Endpoints
// TODO 7.1 - this doesn't look great in the PDF, redesign the presentation
// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
[cols="15,20,15,50",options="header"]
@ -57,17 +61,20 @@ Solr ships with many out-of-the-box RequestHandlers, which are called implicit b
== How to View the Configuration
You can see configuration for all request handlers, including the implicit request handlers, via the <<config-api.adoc#config-api,Config API>>. E.g. for the `gettingstarted` collection:
You can see configuration for all request handlers, including the implicit request handlers, via the <<config-api.adoc#config-api,Config API>>. For the `gettingstarted` collection:
`curl http://localhost:8983/solr/gettingstarted/config/requestHandler`
[source,text]
curl http://localhost:8983/solr/gettingstarted/config/requestHandler
To restrict the results to the configuration for a particular request handler, use the `componentName` request param. E.g. to see just the configuration for the `/export` request handler:
To restrict the results to the configuration for a particular request handler, use the `componentName` request parameter. To see just the configuration for the `/export` request handler:
`curl "http://localhost:8983/solr/gettingstarted/config/requestHandler?componentName=/export"`
[source,text]
curl "http://localhost:8983/solr/gettingstarted/config/requestHandler?componentName=/export"
To include the expanded paramset in the response, as well as the effective parameters from merging the paramset params with the built-in params, use the `expandParams` request param. E.g. for the `/export` request handler:
To include the expanded paramset in the response, as well as the effective parameters from merging the paramset parameters with the built-in parameters, use the `expandParams` request param. For the `/export` request handler, you can make a request like this:
`curl "http://localhost:8983/solr/gettingstarted/config/requestHandler?componentName=/export&expandParams=true"`
[source,text]
curl "http://localhost:8983/solr/gettingstarted/config/requestHandler?componentName=/export&expandParams=true"
== How to Edit the Configuration

View File

@ -54,14 +54,16 @@ See the section <<solrcloud-autoscaling.adoc#solrcloud-autoscaling,SolrCloud Aut
=== Other Features & Enhancements
// TODO 7.1 - update link to docs when complete
* The Analytics Component has been refactored.
** The documentation for this component is in progress; until it is available, please refer to https://issues.apache.org/jira/browse/SOLR-11144[SOLR-11144] for more details.
* There were several other new features released in earlier 6.x releases, which you may have missed:
** <<learning-to-rank.adoc#learning-to-rank,Learning to Rank>>
** <<highlighting.adoc#the-unified-highlighter,Unified Highlighter>>
** <<metrics-reporting.adoc#metrics-reporting,Metrics API>>. See also information about related deprecations in the section <<JMX Support and MBeans>> below.
** <<other-parsers.adoc#payload-query-parsers,Payload queries>>
** <<stream-evaluators.adoc#stream-evaluators,Streaming Evaluators>>
** <<stream-evaluator-reference.adoc#stream-evaluator-reference,Streaming Evaluators>>
** <<v2-api.adoc#v2-api,/v2 API>>
** <<graph-traversal.adoc#graph-traversal,Graph streaming expressions>>

View File

@ -181,10 +181,10 @@ http://localhost:8983/solr/admin/cores?action=LISTSNAPSHOTS&core=techproducts&co
The list snapshot request parameters are:
core::
`core`::
The name of the core to whose snapshots we want to list.
async::
`async`::
Request ID to track this action which will be processed asynchronously.
=== Delete Snapshot API
@ -210,7 +210,6 @@ The name of the core whose snapshot we want to delete
`async`::
Request ID to track this action which will be processed asynchronously
== Backup/Restore Storage Repositories
Solr provides interfaces to plug different storage systems for backing up and restoring. For example, you can have a Solr cluster running on a local filesystem like EXT3 but you can backup the indexes to a HDFS filesystem or vice versa.

View File

@ -112,7 +112,7 @@ Now, lets add a new word to the English stop word list using an HTTP PUT:
curl -X PUT -H 'Content-type:application/json' --data-binary '["foo"]' "http://localhost:8983/solr/techproducts/schema/analysis/stopwords/english"
----
Here were using cURL to PUT a JSON list containing a single word “foo” to the managed English stop words set. Solr will return 200 if the request was successful. You can also put multiple words in a single PUT request.
Here were using curl to PUT a JSON list containing a single word “foo” to the managed English stop words set. Solr will return 200 if the request was successful. You can also put multiple words in a single PUT request.
You can test to see if a specific word exists by sending a GET request for that word as a child resource of the set, such as:

View File

@ -1,7 +1,7 @@
= Managing Solr
:page-shortname: managing-solr
:page-permalink: managing-solr.html
:page-children: securing-solr, running-solr-on-hdfs, making-and-restoring-backups, configuring-logging, using-jmx-with-solr, mbean-request-handler, performance-statistics-reference, metrics-reporting, v2-api
:page-children: securing-solr, running-solr-on-hdfs, making-and-restoring-backups, configuring-logging, metrics-reporting, using-jmx-with-solr, mbean-request-handler, performance-statistics-reference, v2-api
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information

View File

@ -36,6 +36,7 @@ The output format. This operates the same as the <<response-writers.adoc#respons
== MBeanRequestHandler Examples
// TODO 7.1 - replace with link to tutorial
The following examples assume you are running Solr's `techproducts` example configuration:
[source,bash]
@ -45,16 +46,20 @@ bin/solr start -e techproducts
To return information about the CACHE category only:
`\http://localhost:8983/solr/techproducts/admin/mbeans?cat=CACHE`
[source,text]
http://localhost:8983/solr/techproducts/admin/mbeans?cat=CACHE
To return information and statistics about the CACHE category only, formatted in XML:
`\http://localhost:8983/solr/techproducts/admin/mbeans?stats=true&cat=CACHE&wt=xml`
[source,text]
http://localhost:8983/solr/techproducts/admin/mbeans?stats=true&cat=CACHE&wt=xml
To return information for everything, and statistics for everything except the `fieldCache`:
`\http://localhost:8983/solr/techproducts/admin/mbeans?stats=true&f.fieldCache.stats=false`
[source,text]
http://localhost:8983/solr/techproducts/admin/mbeans?stats=true&f.fieldCache.stats=false
To return information and statistics for the `fieldCache` only:
`\http://localhost:8983/solr/techproducts/admin/mbeans?key=fieldCache&stats=true`
[source,text]
http://localhost:8983/solr/techproducts/admin/mbeans?key=fieldCache&stats=true

View File

@ -64,6 +64,14 @@ If your code block will include line breaks, put 4 hyphens (`----`) before and a
More info: http://asciidoctor.org/docs/user-manual/#source-code-blocks
==== Source Block Syntax Highlighting
The PDF and HTML outputs use Pygments to add syntax highlighting to code examples. This is done by adding the language of the code block after the `source`, as shown in the above example source block (`xml` in that case).
Pygments has a long selection of lexers available. You can see the full list at http://pygments.org/docs/lexers. Use one of the valid short names to get syntax highlighting for that language.
Ideally, we will have an appropriate lexer to use for all source blocks, but that's not possible. When in doubt, choose `text`, or leave it blank.
=== Block Titles
Titles can be added to most blocks (images, source blocks, tables, etc.) by simply prefacing the title with a period (`.`). For example, to add a title to the source block example above:
@ -197,7 +205,7 @@ For more information, see the section <<schema-api.adoc#modify-the-schema,Modify
More info: http://asciidoctor.org/docs/user-manual/#inter-document-cross-references
== Lists
== Ordered and Unordered Lists
AsciiDoc supports three types of lists:

View File

@ -22,7 +22,7 @@ Near Real Time (NRT) search means that documents are available for search almost
This allows additions and updates to documents to be seen in 'near' real time. Solr does not block updates while a commit is in progress. Nor does it wait for background merges to complete before opening a new search of indexes and returning.
With NRT, you can modify a `commit` command to be a *soft commit*, which avoids parts of a standard commit that can be costly. You will still want to do standard commits to ensure that documents are in stable storage, but *soft commits* let you see a very near real time view of the index in the meantime.
With NRT, you can modify a `commit` command to be a *soft commit*, which avoids parts of a standard commit that can be costly. You will still want to do standard commits to ensure that documents are in stable storage, but soft commits let you see a very near real time view of the index in the meantime.
However, pay special attention to cache and autowarm settings as they can have a significant impact on NRT performance.
@ -30,9 +30,9 @@ However, pay special attention to cache and autowarm settings as they can have a
A commit operation makes index changes visible to new search requests. A *hard commit* uses the transaction log to get the id of the latest document changes, and also calls `fsync` on the index files to ensure they have been flushed to stable storage and no data loss will result from a power failure. The current transaction log is closed and a new one is opened. See the "transaction log" discussion below for data loss issues.
A *soft commit* is much faster since it only makes index changes visible and does not `fsync` index files, or write a new index descriptor or start a new transaction log. Search collections that have NRT requirements (that want index changes to be quickly visible to searches) will want to soft commit often but hard commit less frequently. A softCommit may be "less expensive", but it is not free, since it can slow throughput. See the "transaction log" discussion below for data loss issues.
A soft commit is much faster since it only makes index changes visible and does not `fsync` index files, or write a new index descriptor or start a new transaction log. Search collections that have NRT requirements (that want index changes to be quickly visible to searches) will want to soft commit often but hard commit less frequently. A softCommit may be "less expensive", but it is not free, since it can slow throughput. See the "transaction log" discussion below for data loss issues.
An *optimize* is like a *hard commit* except that it forces all of the index segments to be merged into a single segment first. Depending on the use, this operation should be performed infrequently (e.g., nightly), if at all, since it involves reading and re-writing the entire index. Segments are normally merged over time anyway (as determined by the merge policy), and optimize just forces these merges to occur immediately.
An *optimize* is like a hard commit except that it forces all of the index segments to be merged into a single segment first. Depending on the use, this operation should be performed infrequently (e.g., nightly), if at all, since it involves reading and re-writing the entire index. Segments are normally merged over time anyway (as determined by the merge policy), and optimize just forces these merges to occur immediately.
Soft commit takes uses two parameters: `maxDocs` and `maxTime`.
@ -46,7 +46,7 @@ Use `maxDocs` and `maxTime` judiciously to fine-tune your commit strategies.
=== Transaction Logs (tlogs)
Transaction logs are a "rolling window" of at least the last `N` (default 100) documents indexed. Tlogs are configured in solrconfig.xml, including the value of `N`. The current transaction log is closed and a new one opened each time any variety of hard commit occurs. Soft commits have no effect on the transaction log.
Transaction logs are a "rolling window" of at least the last `N` (default 100) documents indexed. Tlogs are configured in `solrconfig.xml`, including the value of `N`. The current transaction log is closed and a new one opened each time any variety of hard commit occurs. Soft commits have no effect on the transaction log.
When tlogs are enabled, documents being added to the index are written to the tlog before the indexing call returns to the client. In the event of an un-graceful shutdown (power loss, JVM crash, `kill -9` etc) any documents written to the tlog that was open when Solr stopped are replayed on startup.

View File

@ -79,7 +79,7 @@ Other types of data such as errors and timeouts are also provided. These are ava
The table below shows the metric names and attributes to request:
[cols="25,75",options="header"]
[cols="30,70",options="header"]
|===
|Metric name | Description
|`QUERY./select.errors`

View File

@ -21,7 +21,7 @@
Query Re-Ranking allows you to run a simple query (A) for matching documents and then re-rank the top N documents using the scores from a more complex query (B).
Since the more costly ranking from query B is only applied to the top _N_ documents it will have less impact on performance then just using the complex query B by itself. The trade off is that documents which score very low using the simple query A may not be considered during the re-ranking phase, even if they would score very highly using query B.
Since the more costly ranking from query B is only applied to the top _N_ documents, it will have less impact on performance then just using the complex query B by itself. The trade off is that documents which score very low using the simple query A may not be considered during the re-ranking phase, even if they would score very highly using query B.
== Specifying a Ranking Query

View File

@ -94,9 +94,6 @@ This parameter controls the output format of NamedLists, where order is more imp
The `json.nl` parameter takes the following values:
|json.nl Parameter setting |Example output for `NamedList("a"=1, "bar"="foo", null=3, null=null)` |Description
`flat`::
The default. NamedList is represented as a flat array, alternating names and values.
+
@ -107,17 +104,17 @@ NamedList is represented as a JSON object. Although this is the simplest mapping
+
With input of `NamedList("a"=1, "bar"="foo", null=3, null=null)`, the output would be `{"a":1, "bar":"foo", "":3, "":null}`.
arrarr::
`arrarr`::
NamedList is represented as an array of two element arrays.
+
With input of `NamedList("a"=1, "bar"="foo", null=3, null=null)`, the output would be `[["a",1], ["bar","foo"], [null,3], [null,null]]`.
arrmap::
`arrmap`::
NamedList is represented as an array of JSON objects.
+
With input of `NamedList("a"=1, "bar"="foo", null=3, null=null)`, the output would be `[{"a":1}, {"b":2}, 3, null]`.
arrntv::
`arrntv`::
NamedList is represented as an array of Name Type Value JSON objects.
+
With input of `NamedList("a"=1, "bar"="foo", null=3, null=null)`, the output would be `[{"name":"a","type":"int","value":1}, {"name":"bar","type":"str","value":"foo"}, {"name":null,"type":"int","value":3}, {"name":null,"type":"null","value":null}]`.
@ -138,29 +135,26 @@ Note that the XSLT Response Writer can be used to convert the XML produced by th
The behavior of the XML Response Writer can be driven by the following query parameters.
=== The version Parameter
`version`::
The `version` parameter determines the XML protocol used in the response. Clients are strongly encouraged to _always_ specify the protocol version, so as to ensure that the format of the response they receive does not change unexpectedly if the Solr server is upgraded and a new default format is introduced.
+
The only currently supported version value is `2.2`. The format of the `responseHeader` changed to use the same `<lst>` structure as the rest of the response.
+
The default value is the latest supported.
=== stylesheet Parameter
`stylesheet`::
The `stylesheet` parameter can be used to direct Solr to include a `<?xml-stylesheet type="text/xsl" href="..."?>` declaration in the XML response it returns.
+
The default behavior is not to return any stylesheet declaration at all.
+
[IMPORTANT]
====
Use of the `stylesheet` parameter is discouraged, as there is currently no way to specify external stylesheets, and no stylesheets are provided in the Solr distributions. This is a legacy parameter, which may be developed further in a future release.
====
=== indent Parameter
`indent`::
If the `indent` parameter is used, and has a non-blank value, then Solr will make some attempts at indenting its XML response to make it more readable by humans.
+
The default behavior is not to indent.
== XSLT Response Writer

View File

@ -40,25 +40,26 @@ This example `security.json` shows how the <<basic-authentication-plugin.adoc#ba
----
{
"authentication":{
"class":"solr.BasicAuthPlugin",
"blockUnknown": true,
"credentials":{"solr":"IV0EHq1OnNrj6gvRCwvFwTrZ1+z1oBbnQdiVC3otuq0= Ndd7LKvVBAaZIF0QAVi1ekCfAJXr1GGfLtRUXhgrF8c="}
"class":"solr.BasicAuthPlugin", <1>
"blockUnknown": true, <2>
"credentials":{"solr":"IV0EHq1OnNrj6gvRCwvFwTrZ1+z1oBbnQdiVC3otuq0= Ndd7LKvVBAaZIF0QAVi1ekCfAJXr1GGfLtRUXhgrF8c="} <3>
},
"authorization":{
"class":"solr.RuleBasedAuthorizationPlugin",
"class":"solr.RuleBasedAuthorizationPlugin", <4>
"permissions":[{"name":"security-edit",
"role":"admin"}],
"user-role":{"solr":"admin"}
"role":"admin"}], <5>
"user-role":{"solr":"admin"} <6>
}}
----
There are several things defined in this example:
* Basic authentication and rule-based authorization plugins are enabled.
<1> Basic authentication and rule-based authorization plugins are enabled.
<2> All requests w/o credentials will be rejected with a 401 error. Set `'blockUnknown'` to false (or remove it altogether) if you wish to let unauthenticated requests to go through. However, if a particular resource is protected by a rule, they are rejected anyway with a 401 error.
* A user called 'solr', with a password has been defined.
* All requests w/o credentials will be rejected with a 401 error. Set `'blockUnknown'` to false (or remove it altogether) if you wish to let unauthenticated requests to go through. However, if a particular resource is protected by a rule, they are rejected anyway with a 401 error.
* The 'admin' role has been defined, and it has permission to edit security settings.
* The 'solr' user has been defined to the 'admin' role.
<4> Basic authentication and rule-based authorization plugins are enabled.
<5> The 'admin' role has been defined, and it has permission to edit security settings.
<6> The 'solr' user has been defined to the 'admin' role.
== Permission Attributes

View File

@ -136,7 +136,8 @@ Pass the location of HDFS client configuration files - needed for HDFS HA for ex
Hadoop can be configured to use the Kerberos protocol to verify user identity when trying to access core services like HDFS. If your HDFS directories are protected using Kerberos, then you need to configure Solr's HdfsDirectoryFactory to authenticate using Kerberos in order to read and write to HDFS. To enable Kerberos authentication from Solr, you need to set the following parameters:
`solr.hdfs.security.kerberos.enabled`:: false |Set to `true` to enable Kerberos authentication. The default is `false`.
`solr.hdfs.security.kerberos.enabled`::
Set to `true` to enable Kerberos authentication. The default is `false`.
`solr.hdfs.security.kerberos.keytabfile`::
A keytab file contains pairs of Kerberos principals and encrypted keys which allows for password-less authentication when Solr attempts to authenticate with secure Hadoop.

View File

@ -63,8 +63,7 @@ The client can specify '<<distributed-search-with-index-sharding.adoc#distribute
Example response with `partialResults` flag set to 'true':
*Solr Response with partialResults*
.Solr Response with partialResults
[source,json]
----
{

View File

@ -18,27 +18,27 @@
// specific language governing permissions and limitations
// under the License.
The Streaming Expression language includes a powerful statistical programing syntax with many of the
features of a functional programming language. The syntax includes *variables*, *data structures*
and a growing set of *mathematical functions*.
The Streaming Expression language includes a powerful statistical programing syntax with many of the features of a functional programming language.
Using the statistical programing syntax Solr's powerful *data retrieval*
capabilities can be combined with in-depth *statistical analysis*.
The syntax includes variables, data structures and a growing set of mathematical functions.
The *data retrieval* methods include:
Using the statistical programing syntax, Solr's powerful data retrieval
capabilities can be combined with in-depth statistical analysis.
The data retrieval methods include:
* SQL
* time series aggregation
* random sampling
* faceted aggregation
* KNN searches
* topic message queues
* K-Nearest Neighbor (KNN) searches
* `topic` message queues
* MapReduce (parallel relational algebra)
* JDBC calls to outside databases
* Graph Expressions
Once the data is retrieved, the statistical programming syntax can be used to create *arrays* from the data so it
can be *manipulated*, *transformed* and *analyzed*.
Once the data is retrieved, the statistical programming syntax can be used to create arrays from the data so it
can be manipulated, transformed and analyzed.
The statistical function library includes functions that perform:
@ -48,7 +48,7 @@ The statistical function library includes functions that perform:
* Moving averages
* Percentiles
* Simple regression and prediction
* ANOVA
* Analysis of covariance (ANOVA)
* Histograms
* Convolution
* Euclidean distance
@ -56,34 +56,30 @@ The statistical function library includes functions that perform:
* Rank transformation
* Normalization transformation
* Sequences
* Array manipulation functions (creation, copying, length, scaling, reverse etc...)
* Array manipulation functions (creation, copying, length, scaling, reverse, etc.)
The statistical function library is backed by *Apache Commons Math*.
The statistical function library is backed by https://commons.apache.org/proper/commons-math/[Apache Commons Math library]. A full discussion of many of the math functions available to streaming expressions is available in the section <<stream-evaluator-reference.adoc#stream-evaluator-reference,Stream Evaluator Reference>>.
This document provides an overview of the how to apply the variables, data structures
and mathematical functions.
This document provides an overview of the how to apply the variables, data structures and mathematical functions.
== /stream handler
NOTE: Like all streaming expressions, the statistical functions are run by Solr's `/stream` handler. For an overview of this handler, see the section <<streaming-expressions.adoc#streaming-expressions,Streaming Expressions>>.
Like all Streaming Expressions, the statistical functions can be run by Solr's /stream handler.
== Math Functions
== Math
Streaming expressions contain a suite of mathematical functions which can be called on their own or as part of a larger expression.
Streaming Expressions contain a suite of *mathematical functions* which can be called on
their own or as part of a larger expression.
Solr's `/stream` handler evaluates the mathematical expression and returns a result.
Solr's /stream handler evaluates the mathematical expression and returns a result.
For example sending the following expression to the /stream handler:
For example, if you send the following expression to the `/stream` handler:
[source,text]
----
add(1, 1)
----
Returns the following response:
You get the following response:
[source,text]
[source,json]
----
{
"result-set": {
@ -109,7 +105,7 @@ pow(10, add(1,1))
Returns the following response:
[source,text]
[source,json]
----
{
"result-set": {
@ -126,9 +122,7 @@ Returns the following response:
}
----
You can also perform math on a stream of Tuples.
For example:
You can also perform math on a stream of Tuples. For example:
[source,text]
----
@ -139,7 +133,7 @@ select(search(collection2, q="*:*", fl="price_f", sort="price_f desc", rows="3")
Returns the following response:
[source, text]
[source,json]
----
{
"result-set": {
@ -165,9 +159,13 @@ Returns the following response:
}
----
== Array (data structure)
== Data Structures
The first data structure we'll explore is the *array*.
Several types of data can be manipulated with the statistical programming syntax. The following sections explore <<Arrays,arrays>>, <<Tuples,tuples>>, and <<Lists,lists>>.
=== Arrays
The first data structure we'll explore is the array.
We can create an array with the `array` function:
@ -180,7 +178,7 @@ array(1, 2, 3)
Returns the following response:
[source,text]
[source,json]
----
{
"result-set": {
@ -201,7 +199,7 @@ Returns the following response:
}
----
We can nest arrays within arrays to form a *matrix*:
We can nest arrays within arrays to form a matrix:
[source,text]
----
@ -211,7 +209,7 @@ array(array(1, 2, 3),
Returns the following response:
[source,text]
[source,json]
----
{
"result-set": {
@ -239,9 +237,7 @@ Returns the following response:
}
----
We can manipulate arrays with functions.
For example we can reverse and array with the `rev` function:
We can manipulate arrays with functions. For example, we can reverse an array with the `rev` function:
[source,text]
----
@ -250,7 +246,7 @@ rev(array(1, 2, 3))
Returns the following response:
[source,text]
[source,json]
----
{
"result-set": {
@ -271,18 +267,16 @@ Returns the following response:
}
----
Arrays can also be built and returned by functions.
For example the sequence function:
Arrays can also be built and returned by functions. For example, the `sequence` function:
[source,text]
----
sequence(5,0,1)
----
This returns an array of size *5* starting from *0* with a stride of *1*.
This returns an array of size `5` starting from `0` with a stride of `1`.
[source,text]
[source,json]
----
{
"result-set": {
@ -305,11 +299,7 @@ This returns an array of size *5* starting from *0* with a stride of *1*.
}
----
We can perform math on an array.
For example we can scale an array with the `scale` function:
Expression:
We can perform math on an array. For example, we can scale an array with the `scale` function:
[source,text]
----
@ -318,7 +308,7 @@ scale(10, sequence(5,0,1))
Returns the following response:
[source,text]
[source,json]
----
{
"result-set": {
@ -341,9 +331,7 @@ Returns the following response:
}
----
We can perform *statistical analysis* on arrays.
For example we can correlate two sequences with the `corr` function:
We can perform statistical analysis on arrays For example, we can correlate two sequences with the `corr` function:
[source,text]
----
@ -352,7 +340,7 @@ corr(sequence(5,1,1), sequence(5,10,10))
Returns the following response:
[source,text]
[source,json]
----
{
"result-set": {
@ -370,12 +358,11 @@ Returns the following response:
----
== Tuple (data structure)
=== Tuples
The *tuple* is the next data structure we'll explore.
The tuple is the next data structure we'll explore.
The `tuple` function returns a map of name/value pairs. A tuple is a very flexible data structure
that can hold values that are strings, numerics, arrays and lists of tuples.
The `tuple` function returns a map of name/value pairs. A tuple is a very flexible data structure that can hold values that are strings, numerics, arrays and lists of tuples.
A tuple can be used to return a complex result from a statistical expression.
@ -390,7 +377,7 @@ tuple(title="hello world",
Returns the following response:
----
[source,text]
[source,json]
----
{
"result-set": {
@ -419,12 +406,11 @@ Returns the following response:
}
----
== List (data structure)
=== Lists
Next we have the *list* data structure.
Next we have the list data structure.
The `list` function is a data structure that wraps Streaming Expressions and emits all the tuples from the wrapped
expressions as a single concatenated stream.
The `list` function is a data structure that wraps streaming expressions and emits all the tuples from the wrapped expressions as a single concatenated stream.
Below is an example of a list of tuples:
@ -436,7 +422,7 @@ list(tuple(id=1, data=array(1, 2, 3)),
Returns the following response:
[source,text]
[source,json]
----
{
@ -467,14 +453,12 @@ Returns the following response:
}
----
== Let (setting variables)
== Setting Variables with let
The `let` function sets *variables* and runs a Streaming Expression that references the variables. The `let` funtion can be used to
The `let` function sets variables and runs a streaming expression that references the variables. The `let` function can be used to
write small statistical programs.
A *variable* can be set to the output of any Streaming Expression.
Here is a very simple example:
A variable can be set to the output of any streaming expression. Here is a very simple example:
[source,text]
----
@ -483,7 +467,7 @@ let(a=random(collection2, q="*:*", rows="3", fl="price_f"),
tuple(sample1=a, sample2=b))
----
The `let` expression above is setting variables *a* and *b* to random
The `let` expression above is setting variables `a` and `b` to random
samples taken from collection2.
The `let` function then executes the `tuple` streaming expression
@ -491,7 +475,7 @@ which references the two variables.
Here is the output:
[source,text]
[source,json]
----
{
"result-set": {
@ -529,11 +513,11 @@ Here is the output:
}
----
== Creating arrays with `col` function
== Creating Arrays with col Function
The `col` function is used to move a column of numbers from a list of tuples into an `array`.
This is an important function because Streaming Expressions such as `sql`, `random` and `timeseries` return tuples,
but the statistical functions operate on arrays.
This is an important function because streaming expressions such as `sql`, `random` and `timeseries` return tuples, but the statistical functions operate on arrays.
Below is an example of the `col` function:
@ -546,20 +530,19 @@ let(a=random(collection2, q="*:*", rows="3", fl="price_f"),
tuple(sample1=c, sample2=d))
----
The example above is using the `col` function to create arrays from the tuples stored in
variables *a* and *b*.
The example above is using the `col` function to create arrays from the tuples stored in variables `a` and `b`.
Variable *c* contains an array of values from the *price_f* field,
taken from the tuples stored in variable *a*.
Variable `c` contains an array of values from the `price_f` field,
taken from the tuples stored in variable `a`.
Variable *d* contains an array of values from the *price_f* field,
taken from the tuples stored in variable *b*.
Variable `d` contains an array of values from the `price_f` field,
taken from the tuples stored in variable `b`.
Also notice inn that the response `tuple` executed by `let` is pointing to the arrays in variables *c* and *d*.
Also notice inn that the response `tuple` executed by `let` is pointing to the arrays in variables `c` and `d`.
The response shows the arrays:
[source,text]
[source,json]
----
{
@ -588,61 +571,60 @@ The response shows the arrays:
== Statistical Programming Example
We've covered how the *data structures*, *variables* and a few *statistical functions* work.
Let's dive into an example that puts these tools to use.
We've covered how the data structures, variables and a few statistical functions work. Let's dive into an example that puts these tools to use.
=== Use case
=== Use Case
We have an existing hotel in *cityA* that is very profitable.
We are contemplating opening up a new hotel in a different city.
We're considering 4 different cities: *cityB*, *cityC*, *cityD*, *cityE*.
We'd like to open a hotel in a city that has similar room rates to *cityA*.
We'd like to open a hotel in a city that has similar room rates to cityA.
How do we determine which of the 4 cities we're considering has room rates which are most similar to *cityA*?
How do we determine which of the 4 cities we're considering has room rates which are most similar to cityA?
=== The Data
We have a data set of un-aggregated hotel *bookings*. Each booking record has a rate and city.
We have a data set of un-aggregated hotel bookings. Each booking record has a rate and city.
=== Can we simply aggregate?
=== Can We Simply Aggregate?
One approach would be to aggregate the data from each city and compare the *mean* room rates. This approach will
One approach would be to aggregate the data from each city and compare the mean room rates. This approach will
give us some useful information, but the mean is a summary statistic which loses a significant amount of information
about the data. For example we don't have an understanding of how the distribution of room rates is impacting the
about the data. For example, we don't have an understanding of how the distribution of room rates is impacting the
mean.
The *median* room rate provides another interesting data point but it's still not the entire picture. It's sill just
The median room rate provides another interesting data point but it's still not the entire picture. It's sill just
one point of reference.
Is there a way that we can compare the markets without losing valuable information in the data?
=== K Nearest Neighbor
==== K-Nearest Neighbor
The use case we're reasoning about can often be approached using a K Nearest Neighbor (knn) algorithm.
The use case we're reasoning about can often be approached using a K-Nearest Neighbor (knn) algorithm.
With knn we use a *distance* measure to compare vectors of data to find the k nearest neighbors to
With knn we use a distance measure to compare vectors of data to find the k nearest neighbors to
a specific vector.
=== Euclidean Distance
==== Euclidean Distance
The Streaming Expression statistical function library has a function called `distance`. The `distance` function
The streaming expression statistical function library has a function called `distance`. The `distance` function
computes the Euclidean distance between two vectors. This looks promising for comparing vectors of room rates.
=== Vectors
==== Vectors
But how to create the vectors from a our data set? Remember we have un-aggregated room rates from each of the cities.
How can we vectorize the data so it can be compared using the `distance` function.
We have a Streaming Expression that can retrieve a *random sample* from each of the cities. The name of this
We have a streaming expression that can retrieve a random sample from each of the cities. The name of this
expression is `random`. So we could take a random sample of 1000 room rates from each of the five cities.
But random vectors of room rates are not comparable because the distance algorithm compares values at each index
in the vector. How can make these vectors comparable?
We can make them comparable by *sorting* them. Then as the distance algorithm moves along the vectors it will be
We can make them comparable by sorting them. Then as the distance algorithm moves along the vectors it will be
comparing room rates from lowest to highest in both cities.
=== The code
=== The Code
[source,text]
----
@ -664,16 +646,16 @@ let(cityA=sort(random(bookings, q="city:cityA", rows="1000", fl="rate_d"), by="r
tuple(city=E, distance=distance(ratesA, ratesE)))))
----
==== The code explained
=== The Code Explained
The `let` expression sets variables first.
The first 5 variables (cityA, cityB, cityC, cityD, cityE), contain the random samples from the `bookings` collection.
the `random` function is pulling 1000 random samples from each city and including the `rate_d` field in the
The `random` function is pulling 1000 random samples from each city and including the `rate_d` field in the
tuples that are returned.
The `random` function is wrapped by a `sort` function which is sorting the tuples in
ascending order based on the rate_d field.
ascending order based on the `rate_d` field.
The next five variables (ratesA, ratesB, ratesC, ratesD, ratesE) contain the arrays of room rates for each
city. The `col` function is used to move the `rate_d` field from the random sample tuples

View File

@ -1,6 +1,6 @@
= Stream Decorator Reference
:page-shortname: stream-decorators
:page-permalink: stream-decorators.html
:page-shortname: stream-decorator-reference
:page-permalink: stream-decorator-reference.html
:page-tocclass: right
:page-toclevels: 1
// Licensed to the Apache Software Foundation (ASF) under one
@ -386,7 +386,7 @@ As you can see in the examples above, the `cartesianProduct` function does suppo
== classify
The `classify` function classifies tuples using a logistic regression text classification model. It was designed specifically to work with models trained using the <<stream-sources.adoc#train,train function>>. The `classify` function uses the <<stream-sources.adoc#model,model function>> to retrieve a stored model and then scores a stream of tuples using the model. The tuples read by the classifier must contain a text field that can be used for classification. The classify function uses a Lucene analyzer to extract the features from the text so the model can be applied. By default the `classify` function looks for the analyzer using the name of text field in the tuple. If the Solr schema on the worker node does not contain this field, the analyzer can be looked up in another field by specifying the `analyzerField` parameter.
The `classify` function classifies tuples using a logistic regression text classification model. It was designed specifically to work with models trained using the <<stream-source-reference.adoc#train,train function>>. The `classify` function uses the <<stream-source-reference.adoc#model,model function>> to retrieve a stored model and then scores a stream of tuples using the model. The tuples read by the classifier must contain a text field that can be used for classification. The classify function uses a Lucene analyzer to extract the features from the text so the model can be applied. By default the `classify` function looks for the analyzer using the name of text field in the tuple. If the Solr schema on the worker node does not contain this field, the analyzer can be looked up in another field by specifying the `analyzerField` parameter.
Each tuple that is classified is assigned two scores:
@ -500,7 +500,7 @@ daemon(id="uniqueId",
)
----
The sample code above shows a `daemon` function wrapping an `update` function, which is wrapping a `topic` function. When this expression is sent to the `/stream` handler, the `/stream` hander sees the `daemon` function and keeps it in memory where it will run at intervals. In this particular example, the `daemon` function will run the `update` function every second. The `update` function is wrapping a <<stream-sources.adoc#topic,`topic` function>>, which will stream tuples that match the `topic` function query in batches. Each subsequent call to the topic will return the next batch of tuples for the topic. The `update` function will send all the tuples matching the topic to another collection to be indexed. The `terminate` parameter tells the daemon to terminate when the `topic` function stops sending tuples.
The sample code above shows a `daemon` function wrapping an `update` function, which is wrapping a `topic` function. When this expression is sent to the `/stream` handler, the `/stream` hander sees the `daemon` function and keeps it in memory where it will run at intervals. In this particular example, the `daemon` function will run the `update` function every second. The `update` function is wrapping a <<stream-source-reference.adoc#topic,`topic` function>>, which will stream tuples that match the `topic` function query in batches. Each subsequent call to the topic will return the next batch of tuples for the topic. The `update` function will send all the tuples matching the topic to another collection to be indexed. The `terminate` parameter tells the daemon to terminate when the `topic` function stops sending tuples.
The effect of this is to push documents that match a specific query into another collection. Custom push functions can be plugged in that push documents out of Solr and into other systems, such as Kafka or an email system.
@ -643,7 +643,7 @@ daemon(id="myDaemon",
id="myTopic")))
----
In the example above a <<daemon,daemon>> wraps an executor**,** which wraps a <<stream-sources.adoc#topic,topic>> that is returning tuples with expressions to execute. When sent to the stream handler, the daemon will call the executor at intervals which will cause the executor to read from the topic and execute the expressions found in the `expr_s` field. The daemon will repeatedly call the executor until all the tuples that match the topic have been iterated, then it will terminate. This is the approach for executing batches of streaming expressions from a `topic` queue.
In the example above a <<daemon,daemon>> wraps an executor, which wraps a <<stream-source-reference.adoc#topic,topic>> that is returning tuples with expressions to execute. When sent to the stream handler, the daemon will call the executor at intervals which will cause the executor to read from the topic and execute the expressions found in the `expr_s` field. The daemon will repeatedly call the executor until all the tuples that match the topic have been iterated, then it will terminate. This is the approach for executing batches of streaming expressions from a `topic` queue.
== fetch
@ -676,9 +676,9 @@ The `having` expression wraps a stream and applies a boolean operation to each t
=== having Parameters
* `StreamExpression`: (Mandatory) The stream source for the having function.
* `booleanEvaluator`: (Madatory) The following boolean operations are supported: *eq* (equals), *gt* (greater than), *lt* (less than), *gteq* (greater than or equal to), *lteq* (less than or equal to), *and*, *or, eor* (exclusive or), and *not*. Boolean evaluators can be nested with other evaluators to form complex boolean logic.
* `booleanEvaluator`: (Madatory) The following boolean operations are supported: `eq` (equals), `gt` (greater than), `lt` (less than), `gteq` (greater than or equal to), `lteq` (less than or equal to), `and`, `or`, `eor` (exclusive or), and `not`. Boolean evaluators can be nested with other evaluators to form complex boolean logic.
The comparison evaluators compare the value in a specific field with a value, whether a string, number, or boolean. For example: *eq*(field1, 10), returns true if *field1* is equal to 10.
The comparison evaluators compare the value in a specific field with a value, whether a string, number, or boolean. For example: `eq(field1, 10)`, returns `true` if `field1` is equal to 10.
=== having Syntax
@ -1001,7 +1001,7 @@ The expression above shows a `parallel` function wrapping a `reduce` function. T
The `priority` function is a simple priority scheduler for the <<executor>> function. The `executor` function doesn't directly have a concept of task prioritization; instead it simply executes tasks in the order that they are read from it's underlying stream. The `priority` function provides the ability to schedule a higher priority task ahead of lower priority tasks that were submitted earlier.
The `priority` function wraps two <<stream-sources.adoc#topic,topics>> that are both emitting tuples that contain streaming expressions to execute. The first topic is considered the higher priority task queue.
The `priority` function wraps two <<stream-source-reference.adoc#topic,topics>> that are both emitting tuples that contain streaming expressions to execute. The first topic is considered the higher priority task queue.
Each time the `priority` function is called, it checks the higher priority task queue to see if there are any tasks to execute. If tasks are waiting in the higher priority queue then the priority function will emit the higher priority tasks. If there are no high priority tasks to run, the lower priority queue tasks are emitted.

View File

@ -1,6 +1,6 @@
= Stream Source Reference
:page-shortname: stream-sources
:page-permalink: stream-sources.html
:page-shortname: stream-source-reference
:page-permalink: stream-source-reference.html
:page-tocclass: right
:page-toclevels: 1
// Licensed to the Apache Software Foundation (ASF) under one
@ -20,6 +20,7 @@
// specific language governing permissions and limitations
// under the License.
Put something here to see if it fixes things.
== search
@ -36,7 +37,7 @@ This expression allows you to specify a request hander using the `qt` parameter.
* `zkHost`: Only needs to be defined if the collection being searched is found in a different zkHost than the local stream handler.
* `qt`: Specifies the query type, or request handler, to use. Set this to `/export` to work with large result sets. The default is `/select`.
* `rows`: (Mandatory with the `/select` handler) The rows parameter specifies how many rows to return. This parameter is only needed with the `/select` handler (which is the default) since the `/export` handler always returns all rows.
* `partitionKeys`: Comma delimited list of keys to partition the search results by. To be used with the parallel function for parallelizing operations across worker nodes. See the <<stream-decorators.adoc#parallel,parallel>> function for details.
* `partitionKeys`: Comma delimited list of keys to partition the search results by. To be used with the parallel function for parallelizing operations across worker nodes. See the <<stream-decorator-reference.adoc#parallel,parallel>> function for details.
=== search Syntax
@ -251,7 +252,7 @@ knn(collection1,
== model
The `model` function retrieves and caches logistic regression text classification models that are stored in a SolrCloud collection. The `model` function is designed to work with models that are created by the <<train,train function>>, but can also be used to retrieve text classification models trained outside of Solr, as long as they conform to the specified format. After the model is retrieved it can be used by the <<stream-decorators.adoc#classify,classify function>> to classify documents.
The `model` function retrieves and caches logistic regression text classification models that are stored in a SolrCloud collection. The `model` function is designed to work with models that are created by the <<train,train function>>, but can also be used to retrieve text classification models trained outside of Solr, as long as they conform to the specified format. After the model is retrieved it can be used by the <<stream-decorator-reference.adoc#classify,classify function>> to classify documents.
A single model tuple is fetched and returned based on the *id* parameter. The model is retrieved by matching the *id* parameter with a model name in the index. If more then one iteration of the named model is stored in the index, the highest iteration is selected.
@ -384,7 +385,7 @@ stream decorator to perform parallel relational algebra. When used in parallel m
* `fl`: (Mandatory) The list of fields to return.
* `sort`: (Mandatory) The sort criteria.
* `zkHost`: Only needs to be defined if the collection being searched is found in a different zkHost than the local stream handler.
* `partitionKeys`: Comma delimited list of keys to partition the search results by. To be used with the parallel function for parallelizing operations across worker nodes. See the <<stream-decorators.adoc#parallel,parallel>> function for details.
* `partitionKeys`: Comma delimited list of keys to partition the search results by. To be used with the parallel function for parallelizing operations across worker nodes. See the <<stream-decorator-reference.adoc#parallel,parallel>> function for details.
=== shuffle Syntax
@ -536,5 +537,3 @@ the tuple stream from the Stream Expression into a list of Tuples.
tuple(a=add(1,1),
b=search(collection1, q="cat:a", fl="a, b, c", sort="a desc"))
----

View File

@ -1,7 +1,7 @@
= Streaming Expressions
:page-shortname: streaming-expressions
:page-permalink: streaming-expressions.html
:page-children: stream-sources, stream-decorators, stream-evaluators, statistical-programming, graph-traversal
:page-children: stream-source-reference, stream-decorator-reference, stream-evaluator-reference, statistical-programming, graph-traversal
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
@ -121,12 +121,12 @@ Because streaming expressions relies on the `/export` handler, many of the field
Stream sources originate streams. The most commonly used one of these is `search`, which does a query.
A full reference to all available source expressions is available in <<stream-sources.adoc#stream-sources,Stream Sources>>.
A full reference to all available source expressions is available in <<stream-source-reference.adoc#stream-source-reference,Stream Source Reference>>.
=== About Stream Decorators
Stream decorators wrap other stream functions or perform operations on a stream.
A full reference to all available decorator expressions is available in <<stream-decorators.adoc#stream-decorators,Stream Decorators>>.
A full reference to all available decorator expressions is available in <<stream-decorator-reference.adoc#stream-decorator-reference,Stream Decorator Reference>>.
=== About Stream Evaluators
@ -141,4 +141,4 @@ In cases where you want to use raw values as part of an evaluation you will need
If you wish to use a raw string as part of an evaluation, you will want to consider using the `raw(string)` evaluator. This will always return the raw value, no matter what is entered.
A full reference to all available evaluator expressions is available in <<stream-evaluators.adoc#stream-evaluators,Stream Evaluators>>.
A full reference to all available evaluator expressions is available in <<stream-evaluator-reference.adoc#stream-evaluator-reference,Stream Evaluator Reference>>.

View File

@ -101,31 +101,37 @@ If documents to be elevated are not defined in the `elevate.xml` file, they shou
For debugging it may be useful to see results with and without the elevated docs. To hide results, use `enableElevation=false`:
`\http://localhost:8983/solr/techproducts/elevate?q=ipod&df=text&debugQuery=true&enableElevation=true`
[source,text]
http://localhost:8983/solr/techproducts/elevate?q=ipod&df=text&debugQuery=true&enableElevation=true
`\http://localhost:8983/solr/techproducts/elevate?q=ipod&df=text&debugQuery=true&enableElevation=false`
[source,text]
http://localhost:8983/solr/techproducts/elevate?q=ipod&df=text&debugQuery=true&enableElevation=false
=== The forceElevation Parameter
You can force elevation during runtime by adding `forceElevation=true` to the query URL:
`\http://localhost:8983/solr/techproducts/elevate?q=ipod&df=text&debugQuery=true&enableElevation=true&forceElevation=true`
[source,text]
http://localhost:8983/solr/techproducts/elevate?q=ipod&df=text&debugQuery=true&enableElevation=true&forceElevation=true
=== The exclusive Parameter
You can force Solr to return only the results specified in the elevation file by adding `exclusive=true` to the URL:
`\http://localhost:8983/solr/techproducts/elevate?q=ipod&df=text&debugQuery=true&exclusive=true`
[source,text]
http://localhost:8983/solr/techproducts/elevate?q=ipod&df=text&debugQuery=true&exclusive=true
=== Document Transformers and the markExcludes Parameter
The `[elevated]` <<transforming-result-documents.adoc#transforming-result-documents,Document Transformer>> can be used to annotate each document with information about whether or not it was elevated:
`\http://localhost:8983/solr/techproducts/elevate?q=ipod&df=text&fl=id,[elevated]`
[source,text]
http://localhost:8983/solr/techproducts/elevate?q=ipod&df=text&fl=id,[elevated]
Likewise, it can be helpful when troubleshooting to see all matching documents including documents that the elevation configuration would normally exclude. This is possible by using the `markExcludes=true` parameter, and then using the `[excluded]` transformer:
`\http://localhost:8983/solr/techproducts/elevate?q=ipod&df=text&markExcludes=true&fl=id,[elevated],[excluded]`
[source,text]
http://localhost:8983/solr/techproducts/elevate?q=ipod&df=text&markExcludes=true&fl=id,[elevated],[excluded]
=== The elevateIds and excludeIds Parameters
@ -133,13 +139,15 @@ When the elevation component is in use, the pre-configured list of elevations fo
For example, in the request below documents 3007WFP and 9885A004 will be elevated, and document IW-02 will be excluded -- regardless of what elevations or exclusions are configured for the query "cable" in elevate.xml:
`\http://localhost:8983/solr/techproducts/elevate?q=cable&df=text&excludeIds=IW-02&elevateIds=3007WFP,9885A004`
[source,text]
http://localhost:8983/solr/techproducts/elevate?q=cable&df=text&excludeIds=IW-02&elevateIds=3007WFP,9885A004
If either one of these parameters is specified at request time, the the entire elevation configuration for the query is ignored.
For example, in the request below documents IW-02 and F8V7067-APL-KIT will be elevated, and no documents will be excluded regardless of what elevations or exclusions are configured for the query "ipod" in elevate.xml:
`\http://localhost:8983/solr/techproducts/elevate?q=ipod&df=text&elevateIds=IW-02,F8V7067-APL-KIT`
[source,text]
http://localhost:8983/solr/techproducts/elevate?q=ipod&df=text&elevateIds=IW-02,F8V7067-APL-KIT
=== The fq Parameter with Elevation

View File

@ -44,7 +44,10 @@ Specifies a field for which statistics should be generated. This parameter may b
The query below demonstrates computing stats against two different fields numeric fields, as well as stats over the results of a `termfreq()` function call using the `text` field:
`\http://localhost:8983/solr/techproducts/select?q=*:*&stats=true&stats.field={!func}termfreq('text','memory')&stats.field=price&stats.field=popularity&rows=0&indent=true`
[source,text]
----
http://localhost:8983/solr/techproducts/select?q=*:*&wt=xml&stats=true&stats.field={!func}termfreq('text','memory')&stats.field=price&stats.field=popularity&rows=0&indent=true
----
[source,xml]
----
@ -159,7 +162,8 @@ Additional "Expert" local params are supported in some cases for affecting the b
Here we compute some statistics for the price field. The min, max, mean, 90th, and 99th percentile price values are computed against all products that are in stock (`q=*:*` and `fq=inStock:true`), and independently all of the default statistics are computed against all products regardless of whether they are in stock or not (by excluding that filter).
`\http://localhost:8983/solr/techproducts/select?q=*:*&fq={!tag=stock_check}inStock:true&stats=true&stats.field={!ex=stock_check+key=instock_prices+min=true+max=true+mean=true+percentiles='90,99'}price&stats.field={!key=all_prices}price&rows=0&indent=true`
[source,text]
http://localhost:8983/solr/techproducts/select?q=*:*&fq={!tag=stock_check}inStock:true&stats=true&stats.field={!ex=stock_check+key=instock_prices+min=true+max=true+mean=true+percentiles='90,99'}price&stats.field={!key=all_prices}price&rows=0&indent=true
[source,xml]
----

View File

@ -70,7 +70,8 @@ Once your handler is defined, you may use in conjunction with any schema (that h
The example below shows an invocation of this component using the above configuration:
`\http://localhost:8983/solr/techproducts/tvrh?q=*:*&start=0&rows=10&fl=id,includes`
[source,text]
http://localhost:8983/solr/techproducts/tvrh?q=*:*&start=0&rows=10&fl=id,includes&wt=xml
[source,xml]
----

View File

@ -161,7 +161,10 @@ All of the following sample queries work with Solr's "`bin/solr -e techproducts`
=== Get Top 10 Terms
This query requests the first ten terms in the name field: `\http://localhost:8983/solr/techproducts/terms?terms.fl=name`
This query requests the first ten terms in the name field:
[source,text]
http://localhost:8983/solr/techproducts/terms?terms.fl=name&wt=xml
Results:
@ -191,7 +194,10 @@ Results:
=== Get First 10 Terms Starting with Letter 'a'
This query requests the first ten terms in the name field, in index order (instead of the top 10 results by document count): `\http://localhost:8983/solr/techproducts/terms?terms.fl=name&terms.lower=a&terms.sort=index`
This query requests the first ten terms in the name field, in index order (instead of the top 10 results by document count):
[source,text]
http://localhost:8983/solr/techproducts/terms?terms.fl=name&terms.lower=a&terms.sort=index&wt=xml
Results:
@ -240,7 +246,8 @@ Results:
If the <<suggester.adoc#suggester,Suggester>> doesn't suit your needs, you can use the Terms component in Solr to build a similar feature for your own search application. Simply submit a query specifying whatever characters the user has typed so far as a prefix. For example, if the user has typed "at", the search engine's interface would submit the following query:
`\http://localhost:8983/solr/techproducts/terms?terms.fl=name&terms.prefix=at`
[source,text]
http://localhost:8983/solr/techproducts/terms?terms.fl=name&terms.prefix=at&wt=xml
Result:
@ -260,7 +267,10 @@ Result:
</response>
----
You can use the parameter `omitHeader=true` to omit the response header from the query response, like in this example, which also returns the response in JSON format: `\http://localhost:8983/solr/techproducts/terms?terms.fl=name&terms.prefix=at&omitHeader=true`
You can use the parameter `omitHeader=true` to omit the response header from the query response, like in this example, which also returns the response in JSON format:
[source,text]
http://localhost:8983/solr/techproducts/terms?terms.fl=name&terms.prefix=at&omitHeader=true
Result:

View File

@ -154,13 +154,21 @@ http://localhost:8983/solr/my_collection/select?q=*:*&facet.range=my_date_field&
== More DateRangeField Details
`DateRangeField` is almost a drop-in replacement for places where `DatePointField` is used. The only difference is that Solr's XML or SolrJ response formats will expose the stored data as a String instead of a Date. The underlying index data for this field will be a bit larger. Queries that align to units of time a second on up should be faster than TrieDateField, especially if it's in UTC. But the main point of DateRangeField as its name suggests is to allow indexing date ranges. To do that, simply supply strings in the format shown above. It also supports specifying 3 different relational predicates between the indexed data, and the query range: `Intersects` (default), `Contains`, `Within`. You can specify the predicate by querying using the `op` local-params parameter like so:
`DateRangeField` is almost a drop-in replacement for places where `DatePointField` is used. The only difference is that Solr's XML or SolrJ response formats will expose the stored data as a String instead of a Date. The underlying index data for this field will be a bit larger. Queries that align to units of time a second on up should be faster than TrieDateField, especially if it's in UTC.
The main point of `DateRangeField`, as its name suggests, is to allow indexing date ranges. To do that, simply supply strings in the format shown above. It also supports specifying 3 different relational predicates between the indexed data, and the query range:
* `Intersects` (default)
* `Contains`
* `Within`
You can specify the predicate by querying using the `op` local-params parameter like so:
[source,text]
----
fq={!field f=dateRange op=Contains}[2013 TO 2018]
----
Unlike most/all local-params, `op` is actually _not_ defined by any query parser (`field`), it is defined by the field type `DateRangeField`. In that example, it would find documents with indexed ranges that _contain_ (or equals) the range 2013 thru 2018. Multi-valued overlapping indexed ranges in a document are effectively coalesced.
Unlike most local parameters, `op` is actually _not_ defined by any query parser (`field`), it is defined by the field type, in this case `DateRangeField`. In the above example, it would find documents with indexed ranges that _contain_ (or equals) the range 2013 thru 2018. Multi-valued overlapping indexed ranges in a document are effectively coalesced.
For a DateRangeField example use-case, see http://wiki.apache.org/solr/DateRangeField[see Solr's community wiki].

View File

@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.io.eval;
import java.io.IOException;
import java.util.List;
import java.util.Locale;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
import org.apache.commons.math3.stat.StatUtils;
public class MeanEvaluator extends RecursiveObjectEvaluator implements OneValueWorker {
protected static final long serialVersionUID = 1L;
public MeanEvaluator(StreamExpression expression, StreamFactory factory) throws IOException{
super(expression, factory);
if(1 != containedEvaluators.size()){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting exactly 1 value but found %d",expression,containedEvaluators.size()));
}
}
@Override
public Object doWork(Object value) throws IOException{
if(null == value){
throw new IOException(String.format(Locale.ROOT, "Unable to find %s(...) because the value is null", constructingFactory.getFunctionName(getClass())));
}
else if(value instanceof List){
List<Number> c = (List<Number>) value;
double[] data = new double[c.size()];
for(int i=0; i< c.size(); i++) {
data[i] = c.get(i).doubleValue();
}
return StatUtils.mean(data);
}
else{
throw new IOException(String.format(Locale.ROOT, "Unable to find %s(...) because the value is not a collection, instead a %s was found", constructingFactory.getFunctionName(getClass()), value.getClass().getSimpleName()));
}
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.io.eval;
import java.io.IOException;
import java.util.Locale;
import org.apache.commons.math3.distribution.WeibullDistribution;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
public class WeibullDistributionEvaluator extends RecursiveNumericEvaluator implements TwoValueWorker {
private static final long serialVersionUID = 1;
public WeibullDistributionEvaluator(StreamExpression expression, StreamFactory factory) throws IOException {
super(expression, factory);
}
@Override
public Object doWork(Object first, Object second) throws IOException{
if(null == first){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the first value",toExpression(constructingFactory)));
}
if(null == second){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the second value",toExpression(constructingFactory)));
}
Number shape = (Number)first;
Number scale = (Number)second;
return new WeibullDistribution(shape.doubleValue(), scale.doubleValue());
}
}

View File

@ -6552,6 +6552,53 @@ public class StreamExpressionTest extends SolrCloudTestCase {
}
@Test
public void testWeibullDistribution() throws Exception {
String cexpr = "let(echo=true, " +
"a=percentile(sample(weibullDistribution(1, 10),5000), 50), " +
"b=percentile(sample(weibullDistribution(1, 50),5000), 50), " +
"c=percentile(sample(weibullDistribution(1, 100),5000), 50)," +
"d=percentile(sample(weibullDistribution(4, 10),5000), 50)," +
"e=percentile(sample(weibullDistribution(8, 10),5000), 50))";
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
paramsLoc.set("expr", cexpr);
paramsLoc.set("qt", "/stream");
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS;
TupleStream solrStream = new SolrStream(url, paramsLoc);
StreamContext context = new StreamContext();
solrStream.setStreamContext(context);
List<Tuple> tuples = getTuples(solrStream);
assertTrue(tuples.size() == 1);
Number a = (Number)tuples.get(0).get("a");
Number b = (Number)tuples.get(0).get("b");
Number c = (Number)tuples.get(0).get("c");
Number d = (Number)tuples.get(0).get("d");
Number e = (Number)tuples.get(0).get("e");
assertTrue(a.doubleValue() < b.doubleValue());
assertTrue(b.doubleValue() < c.doubleValue());
assertTrue(a.doubleValue() < d.doubleValue());
assertTrue(d.doubleValue() < e.doubleValue());
}
@Test
public void testMean() throws Exception {
String cexpr = "mean(array(1,2,3,4,5))";
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
paramsLoc.set("expr", cexpr);
paramsLoc.set("qt", "/stream");
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS;
TupleStream solrStream = new SolrStream(url, paramsLoc);
StreamContext context = new StreamContext();
solrStream.setStreamContext(context);
List<Tuple> tuples = getTuples(solrStream);
assertTrue(tuples.size() == 1);
Number mean = (Number)tuples.get(0).get("return-value");
assertEquals(mean.doubleValue(), 3.0D, 0.0D);
}
@Test
public void testScale() throws Exception {
UpdateRequest updateRequest = new UpdateRequest();