diff --git a/dev-tools/scripts/addVersion.py b/dev-tools/scripts/addVersion.py index 5ce6bde7bba..745cfd3a49a 100644 --- a/dev-tools/scripts/addVersion.py +++ b/dev-tools/scripts/addVersion.py @@ -134,7 +134,7 @@ def update_example_solrconfigs(new_version): print(' updating example solrconfig.xml files') matcher = re.compile('') - paths = ['solr/server/solr/configsets', 'solr/example'] + paths = ['solr/server/solr/configsets', 'solr/example', 'solr/core/src/test-files/solr/configsets/_default'] for path in paths: if not os.path.isdir(path): raise RuntimeError("Can't locate configset dir (layout change?) : " + path) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index e3decb12bf1..3cb139848c9 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -58,8 +58,11 @@ New Features ---------------------- * SOLR-11019: Add addAll Stream Evaluator (Joel Bernstein) + * SOLR-10996: Implement TriggerListener API (ab, shalin) +* SOLR-11046: Add residuals Stream Evaluator (Joel Bernstein) + Bug Fixes ---------------------- @@ -68,7 +71,8 @@ Bug Fixes Optimizations ---------------------- -(No Changes) +* SOLR-10985: Remove unnecessary toString() calls in solr-core's search package's debug logging. + (Michael Braun via Christine Poerschke) Other Changes ---------------------- @@ -80,6 +84,8 @@ Other Changes * SOLR-10748: Make stream.body configurable and disabled by default (janhoy) +* SOLR-10964: Reduce SolrIndexSearcher casting in LTRRescorer. (Christine Poerschke) + ================== 7.0.0 ================== Versions of Major Components @@ -289,6 +295,8 @@ New Features * SOLR-10965: New ExecutePlanAction for autoscaling which executes the operations computed by ComputePlanAction against the cluster. (shalin) +* SOLR-10282: bin/solr support for enabling Kerberos authentication (Ishan Chattopadhyaya) + Bug Fixes ---------------------- * SOLR-9262: Connection and read timeouts are being ignored by UpdateShardHandler after SOLR-4509. @@ -347,6 +355,13 @@ Bug Fixes * SOLR-10826: Fix CloudSolrClient to expand the collection parameter correctly (Tim Owen via Varun Thacker) +* SOLR-11039: Next button in Solr admin UI for collection list pagination does not work. (janhoy) + +* SOLR-11041: MoveReplicaCmd do not specify ulog dir in case of HDFS (Cao Manh Dat) + +* SOLR-11045: The new replica created by MoveReplica will have to have same name and coreName as the + old one in case of HDFS (Cao Manh Dat) + Optimizations ---------------------- @@ -482,6 +497,10 @@ Other Changes - SOLR-10977: Randomize the usage of Points based numerics in schema15.xml and all impacted tests (hossman) - SOLR-10979: Randomize PointFields in schema-docValues*.xml and all affected tests (hossman) - SOLR-10989: Randomize PointFields and general cleanup in schema files where some Trie fields were unused (hossman) + - SOLR-11048: Randomize PointsFields in schema-add-schema-fields-update-processor.xml in solr-core collection1 and + all affected tests (Anshum Gupta) + - SOLR-11059: Randomize PointFields in schema-blockjoinfacetcomponent.xml and all related tests (Anshum Gupta) + - SOLR-11060: Randomize PointFields in schema-custom-field.xml and all related tests (Anshum Gupta) * SOLR-6807: Changed requestDispatcher's handleSelect to default to false, thus ignoring "qt". Simplified configs to not refer to handleSelect or "qt". Switch all tests that assumed true to assume false @@ -498,6 +517,13 @@ Other Changes * SOLR-11016: Fix TestCloudJSONFacetJoinDomain test-only bug (hossman) +* SOLR-11021: The elevate.xml config-file is made optional in the ElevationComponent. + The default configset doesn't ship with a elevate.xml file anymore (Varun Thacker) + +* SOLR-10898: Fix SOLR-10898 to not deterministicly fail 1/512 runs (hossman) + +* SOLR-10796: TestPointFields: increase randomized testing of non-trivial values. (Steve Rowe) + ================== 6.7.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. @@ -630,6 +656,8 @@ when using one of Exact*StatsCache (Mikhail Khludnev) * SOLR-10914: RecoveryStrategy's sendPrepRecoveryCmd can get stuck for 5 minutes if leader is unloaded. (shalin) +* SOLR-11024: ParallelStream should set the StreamContext when constructing SolrStreams (Joel Bernstein) + Optimizations ---------------------- * SOLR-10634: JSON Facet API: When a field/terms facet will retrieve all buckets (i.e. limit:-1) diff --git a/solr/bin/solr b/solr/bin/solr index c9aad2f2935..3d2834057ff 100755 --- a/solr/bin/solr +++ b/solr/bin/solr @@ -555,20 +555,23 @@ function print_usage() { echo "" echo "Usage: solr auth enable [-type basicAuth] -credentials user:pass [-blockUnknown ] [-updateIncludeFileOnly ]" echo " solr auth enable [-type basicAuth] -prompt [-blockUnknown ] [-updateIncludeFileOnly ]" + echo " solr auth enable -type kerberos -config "" [-updateIncludeFileOnly ]" echo " solr auth disable [-updateIncludeFileOnly ]" echo "" - echo " -type The authentication mechanism to enable. Defaults to 'basicAuth'." + echo " -type The authentication mechanism (basicAuth or kerberos) to enable. Defaults to 'basicAuth'." echo "" - echo " -credentials The username and password of the initial user" + echo " -credentials The username and password of the initial user. Applicable for basicAuth only." echo " Note: only one of -prompt or -credentials must be provided" echo "" - echo " -prompt Prompts the user to provide the credentials" + echo " -config "" Configuration parameters (Solr startup parameters). Required and applicable only for Kerberos" + echo "" + echo " -prompt Prompts the user to provide the credentials. Applicable for basicAuth only." echo " Note: only one of -prompt or -credentials must be provided" echo "" echo " -blockUnknown When true, this blocks out access to unauthenticated users. When not provided," echo " this defaults to false (i.e. unauthenticated users can access all endpoints, except the" echo " operations like collection-edit, security-edit, core-admin-edit etc.). Check the reference" - echo " guide for Basic Authentication for more details." + echo " guide for Basic Authentication for more details. Applicable for basicAuth only." echo "" echo " -updateIncludeFileOnly Only update the solr.in.sh or solr.in.cmd file, and skip actual enabling/disabling" echo " authentication (i.e. don't update security.json)" @@ -975,6 +978,14 @@ if [[ "$SCRIPT_CMD" == "create" || "$SCRIPT_CMD" == "create_core" || "$SCRIPT_CM exit 1 fi + if [ "$CREATE_CONFDIR" == "_default" ]; then + echo "WARNING: Using _default configset. Data driven schema functionality is enabled by default, which is" + echo " NOT RECOMMENDED for production use." + echo + echo " To turn it off:" + echo " curl http://$SOLR_TOOL_HOST:$CREATE_PORT/solr/$CREATE_NAME/config -d '{\"set-user-property\": {\"update.autoCreateFields\":\"false\"}}'" + fi + if [[ "$(whoami)" == "root" ]] && [[ "$FORCE" == "false" ]] ; then echo "WARNING: Creating cores as the root user can cause Solr to fail and is not advisable. Exiting." echo " If you started Solr as root (not advisable either), force core creation by adding argument -force" @@ -1242,6 +1253,11 @@ if [[ "$SCRIPT_CMD" == "auth" ]]; then AUTH_PARAMS=("${AUTH_PARAMS[@]}" "-credentials" "$AUTH_CREDENTIALS") shift 2 ;; + -config) + AUTH_CONFIG="`echo $2| base64`" + AUTH_PARAMS=("${AUTH_PARAMS[@]}" "-config" "$AUTH_CONFIG") + shift 2 + ;; -solrIncludeFile) SOLR_INCLUDE="$2" shift 2 diff --git a/solr/bin/solr.cmd b/solr/bin/solr.cmd index b268f90a767..cfc21b97e58 100644 --- a/solr/bin/solr.cmd +++ b/solr/bin/solr.cmd @@ -1426,6 +1426,14 @@ if "!CREATE_PORT!"=="" ( goto err ) + +if "!CREATE_CONFDIR!"=="_default" ( + echo WARNING: Using _default configset. Data driven schema functionality is enabled by default, which is + echo NOT RECOMMENDED for production use. + echo To turn it off: + echo curl http://%SOLR_TOOL_HOST%:!CREATE_PORT!/solr/!CREATE_NAME!/config -d '{"set-user-property": {"update.autoCreateFields":"false"}}' +) + if "%SCRIPT_CMD%"=="create_core" ( "%JAVA%" %SOLR_SSL_OPTS% %AUTHC_OPTS% %SOLR_ZK_CREDS_AND_ACLS% -Dsolr.install.dir="%SOLR_TIP%" ^ -Dlog4j.configuration="file:%DEFAULT_SERVER_DIR%\scripts\cloud-scripts\log4j.properties" ^ diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java index 33c9a3624ef..59b764bb935 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java @@ -116,8 +116,7 @@ public class LTRRescorer extends Rescorer { final LTRScoringQuery.ModelWeight modelWeight = (LTRScoringQuery.ModelWeight) searcher .createNormalizedWeight(scoringQuery, true); - final SolrIndexSearcher solrIndexSearch = (SolrIndexSearcher) searcher; - scoreFeatures(solrIndexSearch, firstPassTopDocs,topN, modelWeight, hits, leaves, reranked); + scoreFeatures(searcher, firstPassTopDocs,topN, modelWeight, hits, leaves, reranked); // Must sort all documents that we reranked, and then select the top Arrays.sort(reranked, new Comparator() { @Override @@ -138,7 +137,7 @@ public class LTRRescorer extends Rescorer { return new TopDocs(firstPassTopDocs.totalHits, reranked, reranked[0].score); } - public void scoreFeatures(SolrIndexSearcher solrIndexSearch, TopDocs firstPassTopDocs, + public void scoreFeatures(IndexSearcher indexSearcher, TopDocs firstPassTopDocs, int topN, LTRScoringQuery.ModelWeight modelWeight, ScoreDoc[] hits, List leaves, ScoreDoc[] reranked) throws IOException { @@ -183,8 +182,8 @@ public class LTRRescorer extends Rescorer { reranked[hitUpto] = hit; // if the heap is not full, maybe I want to log the features for this // document - if (featureLogger != null) { - featureLogger.log(hit.doc, scoringQuery, solrIndexSearch, + if (featureLogger != null && indexSearcher instanceof SolrIndexSearcher) { + featureLogger.log(hit.doc, scoringQuery, (SolrIndexSearcher)indexSearcher, modelWeight.getFeaturesInfo()); } } else if (hitUpto == topN) { @@ -200,8 +199,8 @@ public class LTRRescorer extends Rescorer { if (hit.score > reranked[0].score) { reranked[0] = hit; heapAdjust(reranked, topN, 0); - if (featureLogger != null) { - featureLogger.log(hit.doc, scoringQuery, solrIndexSearch, + if (featureLogger != null && indexSearcher instanceof SolrIndexSearcher) { + featureLogger.log(hit.doc, scoringQuery, (SolrIndexSearcher)indexSearcher, modelWeight.getFeaturesInfo()); } } diff --git a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java index ac09621b03c..c05072d5bf0 100644 --- a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java +++ b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java @@ -31,6 +31,7 @@ import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.core.CoreContainer; import org.apache.solr.core.CoreDescriptor; import org.apache.solr.core.SolrResourceLoader; @@ -64,10 +65,11 @@ public class CloudUtil { String cnn = replica.getName(); String baseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP); + boolean isSharedFs = replica.getStr(CoreAdminParams.DATA_DIR) != null; log.debug("compare against coreNodeName={} baseUrl={}", cnn, baseUrl); if (thisCnn != null && thisCnn.equals(cnn) - && !thisBaseUrl.equals(baseUrl)) { + && !thisBaseUrl.equals(baseUrl) && isSharedFs) { if (cc.getLoadedCoreNames().contains(desc.getName())) { cc.unload(desc.getName()); } diff --git a/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java index 9bf48021bff..6f1b42caa9f 100644 --- a/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java @@ -324,6 +324,15 @@ public class CreateCollectionCmd implements Cmd { ocmh.forwardToAutoScaling(AutoScaling.AUTO_ADD_REPLICAS_TRIGGER_DSL); } log.debug("Finished create command on all shards for collection: {}", collectionName); + + // Emit a warning about production use of data driven functionality + boolean defaultConfigSetUsed = message.getStr(COLL_CONF) == null || + message.getStr(COLL_CONF).equals(ConfigSetsHandlerApi.DEFAULT_CONFIGSET_NAME); + if (defaultConfigSetUsed) { + results.add("warning", "Using _default configset. Data driven schema functionality" + + " is enabled by default, which is NOT RECOMMENDED for production use. To turn it off:" + + " curl http://{host:port}/solr/" + collectionName + "/config -d '{\"set-user-property\": {\"update.autoCreateFields\":\"false\"}}'"); + } } } catch (SolrException ex) { throw ex; diff --git a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java index ee019b45380..117edf92d18 100644 --- a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java @@ -34,6 +34,8 @@ import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.Utils; +import org.apache.solr.update.UpdateLog; +import org.apache.solr.util.TimeOut; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -105,18 +107,15 @@ public class MoveReplicaCmd implements Cmd{ } assert slice != null; Object dataDir = replica.get("dataDir"); - final String ulogDir = replica.getStr("ulogDir"); if (dataDir != null && dataDir.toString().startsWith("hdfs:/")) { - moveHdfsReplica(clusterState, results, dataDir.toString(), ulogDir, targetNode, async, coll, replica, slice, timeout); + moveHdfsReplica(clusterState, results, dataDir.toString(), targetNode, async, coll, replica, slice, timeout); } else { moveNormalReplica(clusterState, results, targetNode, async, coll, replica, slice, timeout); } } - private void moveHdfsReplica(ClusterState clusterState, NamedList results, String dataDir, String ulogDir, String targetNode, String async, + private void moveHdfsReplica(ClusterState clusterState, NamedList results, String dataDir, String targetNode, String async, DocCollection coll, Replica replica, Slice slice, int timeout) throws Exception { - String newCoreName = Assign.buildCoreName(coll, slice.getName(), replica.getType()); - ZkNodeProps removeReplicasProps = new ZkNodeProps( COLLECTION_PROP, coll.getName(), SHARD_ID_PROP, slice.getName(), @@ -135,16 +134,32 @@ public class MoveReplicaCmd implements Cmd{ return; } + TimeOut timeOut = new TimeOut(20L, TimeUnit.SECONDS); + while (!timeOut.hasTimedOut()) { + coll = ocmh.zkStateReader.getClusterState().getCollection(coll.getName()); + if (coll.getReplica(replica.getName()) != null) { + Thread.sleep(100); + } else { + break; + } + } + if (timeOut.hasTimedOut()) { + results.add("failure", "Still see deleted replica in clusterstate!"); + return; + } + + String ulogDir = replica.getStr(CoreAdminParams.ULOG_DIR); ZkNodeProps addReplicasProps = new ZkNodeProps( COLLECTION_PROP, coll.getName(), SHARD_ID_PROP, slice.getName(), CoreAdminParams.NODE, targetNode, - CoreAdminParams.NAME, newCoreName, - CoreAdminParams.DATA_DIR, dataDir, - CoreAdminParams.ULOG_DIR, ulogDir); + CoreAdminParams.CORE_NODE_NAME, replica.getName(), + CoreAdminParams.NAME, replica.getCoreName(), + CoreAdminParams.ULOG_DIR, ulogDir.substring(0, ulogDir.lastIndexOf(UpdateLog.TLOG_NAME)), + CoreAdminParams.DATA_DIR, dataDir); if(async!=null) addReplicasProps.getProperties().put(ASYNC, async); NamedList addResult = new NamedList(); - ocmh.addReplica(clusterState, addReplicasProps, addResult, null); + ocmh.addReplica(ocmh.zkStateReader.getClusterState(), addReplicasProps, addResult, null); if (addResult.get("failure") != null) { String errorString = String.format(Locale.ROOT, "Failed to create replica for collection=%s shard=%s" + " on node=%s", coll.getName(), slice.getName(), targetNode); @@ -153,7 +168,7 @@ public class MoveReplicaCmd implements Cmd{ return; } else { String successString = String.format(Locale.ROOT, "MOVEREPLICA action completed successfully, moved replica=%s at node=%s " + - "to replica=%s at node=%s", replica.getCoreName(), replica.getNodeName(), newCoreName, targetNode); + "to replica=%s at node=%s", replica.getCoreName(), replica.getNodeName(), replica.getCoreName(), targetNode); results.add("success", successString); } } diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java index bcd531a9948..d4f914d2f5f 100644 --- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java +++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java @@ -208,9 +208,9 @@ public class Overseer implements Closeable { @Override public void onEnqueue() throws Exception { if (!itemWasMoved[0]) { + workQueue.offer(data); stateUpdateQueue.poll(); itemWasMoved[0] = true; - workQueue.offer(data); } } diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java index 6b64b835d71..66fa26a5b85 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java @@ -2250,10 +2250,7 @@ public class ZkController { DocCollection collection = clusterState.getCollectionOrNull(desc .getCloudDescriptor().getCollectionName()); if (collection != null) { - boolean autoAddReplicas = ClusterStateUtil.isAutoAddReplicas(getZkStateReader(), collection.getName()); - if (autoAddReplicas) { - CloudUtil.checkSharedFSFailoverReplaced(cc, desc); - } + CloudUtil.checkSharedFSFailoverReplaced(cc, desc); } } } diff --git a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java index 3901c61bbb5..ea2a931f890 100644 --- a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java @@ -217,8 +217,8 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware, .withFunctionName("scale", ScaleEvaluator.class) .withFunctionName("sequence", SequenceEvaluator.class) .withFunctionName("addAll", AddAllEvaluator.class) + .withFunctionName("residuals", ResidualsEvaluator.class) - // Boolean Stream Evaluators .withFunctionName("and", AndEvaluator.class) .withFunctionName("eor", ExclusiveOrEvaluator.class) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java index 256400e5c43..a4155ea6b4a 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java @@ -126,6 +126,7 @@ import static org.apache.solr.common.params.CoreAdminParams.DELETE_DATA_DIR; import static org.apache.solr.common.params.CoreAdminParams.DELETE_INDEX; import static org.apache.solr.common.params.CoreAdminParams.DELETE_INSTANCE_DIR; import static org.apache.solr.common.params.CoreAdminParams.INSTANCE_DIR; +import static org.apache.solr.common.params.CoreAdminParams.ULOG_DIR; import static org.apache.solr.common.params.ShardParams._ROUTE_; import static org.apache.solr.common.util.StrUtils.formatString; @@ -633,6 +634,7 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission CoreAdminParams.NAME, INSTANCE_DIR, DATA_DIR, + ULOG_DIR, REPLICA_TYPE); return copyPropertiesWithPrefix(req.getParams(), props, COLL_PROP_PREFIX); }), diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java index 3f3dd5c99b8..6511c673d1b 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java @@ -204,53 +204,51 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore } core.addTransformerFactory(markerName, elevatedMarkerFactory); forceElevation = initArgs.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation); - try { - synchronized (elevationCache) { - elevationCache.clear(); - String f = initArgs.get(CONFIG_FILE); - if (f == null) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "QueryElevationComponent must specify argument: '" + CONFIG_FILE - + "' -- path to elevate.xml"); - } - boolean exists = false; - // check if using ZooKeeper - ZkController zkController = core.getCoreContainer().getZkController(); - if (zkController != null) { - // TODO : shouldn't have to keep reading the config name when it has been read before - exists = zkController.configFileExists(zkController.getZkStateReader().readConfigName(core.getCoreDescriptor().getCloudDescriptor().getCollectionName()), f); - } else { - File fC = new File(core.getResourceLoader().getConfigDir(), f); - File fD = new File(core.getDataDir(), f); - if (fC.exists() == fD.exists()) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "QueryElevationComponent missing config file: '" + f + "\n" - + "either: " + fC.getAbsolutePath() + " or " + fD.getAbsolutePath() + " must exist, but not both."); + String f = initArgs.get(CONFIG_FILE); + if (f != null) { + try { + synchronized (elevationCache) { + elevationCache.clear(); + boolean exists = false; + + // check if using ZooKeeper + ZkController zkController = core.getCoreContainer().getZkController(); + if (zkController != null) { + // TODO : shouldn't have to keep reading the config name when it has been read before + exists = zkController.configFileExists(zkController.getZkStateReader().readConfigName(core.getCoreDescriptor().getCloudDescriptor().getCollectionName()), f); + } else { + File fC = new File(core.getResourceLoader().getConfigDir(), f); + File fD = new File(core.getDataDir(), f); + if (fC.exists() == fD.exists()) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "QueryElevationComponent missing config file: '" + f + "\n" + + "either: " + fC.getAbsolutePath() + " or " + fD.getAbsolutePath() + " must exist, but not both."); + } + if (fC.exists()) { + exists = true; + log.info("Loading QueryElevation from: " + fC.getAbsolutePath()); + Config cfg = new Config(core.getResourceLoader(), f); + elevationCache.put(null, loadElevationMap(cfg)); + } } - if (fC.exists()) { - exists = true; - log.info("Loading QueryElevation from: " + fC.getAbsolutePath()); - Config cfg = new Config(core.getResourceLoader(), f); - elevationCache.put(null, loadElevationMap(cfg)); - } - } - //in other words, we think this is in the data dir, not the conf dir - if (!exists) { - // preload the first data - RefCounted searchHolder = null; - try { - searchHolder = core.getNewestSearcher(false); - IndexReader reader = searchHolder.get().getIndexReader(); - getElevationMap(reader, core); - } finally { - if (searchHolder != null) searchHolder.decref(); + //in other words, we think this is in the data dir, not the conf dir + if (!exists) { + // preload the first data + RefCounted searchHolder = null; + try { + searchHolder = core.getNewestSearcher(false); + IndexReader reader = searchHolder.get().getIndexReader(); + getElevationMap(reader, core); + } finally { + if (searchHolder != null) searchHolder.decref(); + } } } + } catch (Exception ex) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "Error initializing QueryElevationComponent.", ex); } - } catch (Exception ex) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "Error initializing QueryElevationComponent.", ex); } } diff --git a/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java b/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java index 413584dce19..b4e76d494af 100644 --- a/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java +++ b/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java @@ -179,15 +179,14 @@ public class ExactStatsCache extends StatsCache { String termStatsString = StatsUtil.termStatsMapToString(statsMap); rb.rsp.add(TERM_STATS_KEY, termStatsString); if (LOG.isDebugEnabled()) { - LOG.debug("termStats=" + termStatsString + ", terms=" + terms + ", numDocs=" + searcher.maxDoc()); + LOG.debug("termStats={}, terms={}, numDocs={}", termStatsString, terms, searcher.maxDoc()); } } if (colMap.size() != 0){ String colStatsString = StatsUtil.colStatsMapToString(colMap); rb.rsp.add(COL_STATS_KEY, colStatsString); if (LOG.isDebugEnabled()) { - LOG.debug("collectionStats=" - + colStatsString + ", terms=" + terms + ", numDocs=" + searcher.maxDoc()); + LOG.debug("collectionStats={}, terms={}, numDocs={}", colStatsString, terms, searcher.maxDoc()); } } } catch (IOException e) { diff --git a/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java b/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java index ffcc99d0e31..99efb8d7530 100644 --- a/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java +++ b/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java @@ -136,7 +136,7 @@ public class LRUStatsCache extends ExactStatsCache { throws IOException { TermStats termStats = termStatsCache.get(term.toString()); if (termStats == null) { - LOG.debug("## Missing global termStats info: {}, using local", term.toString()); + LOG.debug("## Missing global termStats info: {}, using local", term); return localSearcher.localTermStatistics(term, context); } else { return termStats.toTermStatistics(); diff --git a/solr/core/src/java/org/apache/solr/search/stats/LocalStatsCache.java b/solr/core/src/java/org/apache/solr/search/stats/LocalStatsCache.java index 2eb3fc0e35a..90395f520fc 100644 --- a/solr/core/src/java/org/apache/solr/search/stats/LocalStatsCache.java +++ b/solr/core/src/java/org/apache/solr/search/stats/LocalStatsCache.java @@ -38,7 +38,7 @@ public class LocalStatsCache extends StatsCache { @Override public StatsSource get(SolrQueryRequest req) { - LOG.debug("## GET {}", req.toString()); + LOG.debug("## GET {}", req); return new LocalStatsSource(); } @@ -49,31 +49,33 @@ public class LocalStatsCache extends StatsCache { // by returning null we don't create additional round-trip request. @Override public ShardRequest retrieveStatsRequest(ResponseBuilder rb) { - LOG.debug("## RDR {}", rb.req.toString()); + LOG.debug("## RDR {}", rb.req); return null; } @Override public void mergeToGlobalStats(SolrQueryRequest req, List responses) { - LOG.debug("## MTGD {}", req.toString()); - for (ShardResponse r : responses) { - LOG.debug(" - {}", r); + if (LOG.isDebugEnabled()) { + LOG.debug("## MTGD {}", req); + for (ShardResponse r : responses) { + LOG.debug(" - {}", r); + } } } @Override public void returnLocalStats(ResponseBuilder rb, SolrIndexSearcher searcher) { - LOG.debug("## RLD {}", rb.req.toString()); + LOG.debug("## RLD {}", rb.req); } @Override public void receiveGlobalStats(SolrQueryRequest req) { - LOG.debug("## RGD {}", req.toString()); + LOG.debug("## RGD {}", req); } @Override public void sendGlobalStats(ResponseBuilder rb, ShardRequest outgoing) { - LOG.debug("## SGD {}", outgoing.toString()); + LOG.debug("## SGD {}", outgoing); } } diff --git a/solr/core/src/java/org/apache/solr/util/SolrCLI.java b/solr/core/src/java/org/apache/solr/util/SolrCLI.java index 30c5681330f..657b402bed9 100644 --- a/solr/core/src/java/org/apache/solr/util/SolrCLI.java +++ b/solr/core/src/java/org/apache/solr/util/SolrCLI.java @@ -43,6 +43,7 @@ import java.time.Instant; import java.time.Period; import java.util.ArrayList; import java.util.Arrays; +import java.util.Base64; import java.util.Collection; import java.util.Enumeration; import java.util.HashMap; @@ -115,6 +116,7 @@ import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.ContentStreamBase; import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.StrUtils; import org.apache.solr.security.Sha256AuthenticationProvider; import org.apache.solr.util.configuration.SSLConfigurationsFactory; import org.noggit.CharArr; @@ -3548,7 +3550,7 @@ public class SolrCLI { OptionBuilder .withArgName("type") .hasArg() - .withDescription("The authentication mechanism to enable. Defaults to 'basicAuth'.") + .withDescription("The authentication mechanism to enable (basicAuth or kerberos). Defaults to 'basicAuth'.") .create("type"), OptionBuilder .withArgName("credentials") @@ -3561,6 +3563,11 @@ public class SolrCLI { .withDescription("Prompts the user to provide the credentials. Use either -credentials or -prompt, not both") .create("prompt"), OptionBuilder + .withArgName("config") + .hasArgs() + .withDescription("Configuration parameters (Solr startup parameters). Required for Kerberos authentication") + .create("config"), + OptionBuilder .withArgName("blockUnknown") .withDescription("Blocks all access for unknown users (requires authentication for all endpoints)") .hasArg() @@ -3603,11 +3610,141 @@ public class SolrCLI { } String type = cli.getOptionValue("type", "basicAuth"); - if (type.equalsIgnoreCase("basicAuth") == false) { - System.out.println("Only type=basicAuth supported at the moment."); - exit(1); + switch (type) { + case "basicAuth": + return handleBasicAuth(cli); + case "kerberos": + return handleKerberos(cli); + default: + System.out.println("Only type=basicAuth or kerberos supported at the moment."); + exit(1); + } + return 1; + } + + private int handleKerberos(CommandLine cli) throws Exception { + String cmd = cli.getArgs()[0]; + boolean updateIncludeFileOnly = Boolean.parseBoolean(cli.getOptionValue("updateIncludeFileOnly", "false")); + String securityJson = "{" + + "\n \"authentication\":{" + + "\n \"class\":\"solr.KerberosPlugin\"" + + "\n }" + + "\n}"; + + + switch (cmd) { + case "enable": + String zkHost = null; + boolean zkInaccessible = false; + + if (!updateIncludeFileOnly) { + try { + zkHost = getZkHost(cli); + } catch (Exception ex) { + System.out.println("Unable to access ZooKeeper. Please add the following security.json to ZooKeeper (in case of SolrCloud):\n" + + securityJson + "\n"); + zkInaccessible = true; + } + if (zkHost == null) { + if (zkInaccessible == false) { + System.out.println("Unable to access ZooKeeper. Please add the following security.json to ZooKeeper (in case of SolrCloud):\n" + + securityJson + "\n"); + zkInaccessible = true; + } + } + + // check if security is already enabled or not + if (!zkInaccessible) { + try (SolrZkClient zkClient = new SolrZkClient(zkHost, 10000)) { + if (zkClient.exists("/security.json", true)) { + byte oldSecurityBytes[] = zkClient.getData("/security.json", null, null, true); + if (!"{}".equals(new String(oldSecurityBytes, StandardCharsets.UTF_8).trim())) { + System.out.println("Security is already enabled. You can disable it with 'bin/solr auth disable'. Existing security.json: \n" + + new String(oldSecurityBytes, StandardCharsets.UTF_8)); + exit(1); + } + } + } catch (Exception ex) { + if (zkInaccessible == false) { + System.out.println("Unable to access ZooKeeper. Please add the following security.json to ZooKeeper (in case of SolrCloud):\n" + + securityJson + "\n"); + zkInaccessible = true; + } + } + } + } + + if (!updateIncludeFileOnly) { + if (!zkInaccessible) { + System.out.println("Uploading following security.json: " + securityJson); + try (SolrZkClient zkClient = new SolrZkClient(zkHost, 10000)) { + zkClient.setData("/security.json", securityJson.getBytes(StandardCharsets.UTF_8), true); + } catch (Exception ex) { + if (zkInaccessible == false) { + System.out.println("Unable to access ZooKeeper. Please add the following security.json to ZooKeeper (in case of SolrCloud):\n" + + securityJson); + zkInaccessible = true; + } + } + } + } + + String config = StrUtils.join(Arrays.asList(cli.getOptionValues("config")), ' '); + // config is base64 encoded (to get around parsing problems), decode it + config = config.replaceAll(" ", ""); + config = new String(Base64.getDecoder().decode(config.getBytes("UTF-8")), "UTF-8"); + config = config.replaceAll("\n", "").replaceAll("\r", ""); + + String solrIncludeFilename = cli.getOptionValue("solrIncludeFile"); + File includeFile = new File(solrIncludeFilename); + if (includeFile.exists() == false || includeFile.canWrite() == false) { + System.out.println("Solr include file " + solrIncludeFilename + " doesn't exist or is not writeable."); + printAuthEnablingInstructions(config); + System.exit(0); + } + + // update the solr.in.sh file to contain the necessary authentication lines + updateIncludeFileEnableAuth(includeFile, null, config); + System.out.println("Please restart any running Solr nodes."); + return 0; + + case "disable": + if (!updateIncludeFileOnly) { + zkHost = getZkHost(cli); + if (zkHost == null) { + stdout.print("ZK Host not found. Solr should be running in cloud mode"); + exit(1); + } + + System.out.println("Uploading following security.json: {}"); + + try (SolrZkClient zkClient = new SolrZkClient(zkHost, 10000)) { + zkClient.setData("/security.json", "{}".getBytes(StandardCharsets.UTF_8), true); + } + } + + solrIncludeFilename = cli.getOptionValue("solrIncludeFile"); + includeFile = new File(solrIncludeFilename); + if (!includeFile.exists() || !includeFile.canWrite()) { + System.out.println("Solr include file " + solrIncludeFilename + " doesn't exist or is not writeable."); + System.out.println("Security has been disabled. Please remove any SOLR_AUTH_TYPE or SOLR_AUTHENTICATION_OPTS configuration from solr.in.sh/solr.in.cmd.\n"); + System.exit(0); + } + + // update the solr.in.sh file to comment out the necessary authentication lines + updateIncludeFileDisableAuth(includeFile); + return 0; + + default: + System.out.println("Valid auth commands are: enable, disable"); + exit(1); } + System.out.println("Options not understood."); + new HelpFormatter().printHelp("bin/solr auth [OPTIONS]", getToolOptions(this)); + return 1; + } + private int handleBasicAuth(CommandLine cli) throws Exception { String cmd = cli.getArgs()[0]; boolean prompt = Boolean.parseBoolean(cli.getOptionValue("prompt", "false")); boolean updateIncludeFileOnly = Boolean.parseBoolean(cli.getOptionValue("updateIncludeFileOnly", "false")); @@ -3715,7 +3852,7 @@ public class SolrCLI { "httpBasicAuthUser=" + username + "\nhttpBasicAuthPassword=" + password, StandardCharsets.UTF_8); // update the solr.in.sh file to contain the necessary authentication lines - updateIncludeFileEnableAuth(includeFile, basicAuthConfFile.getAbsolutePath()); + updateIncludeFileEnableAuth(includeFile, basicAuthConfFile.getAbsolutePath(), null); return 0; case "disable": @@ -3754,7 +3891,6 @@ public class SolrCLI { new HelpFormatter().printHelp("bin/solr auth [OPTIONS]", getToolOptions(this)); return 1; } - private void printAuthEnablingInstructions(String username, String password) { if (SystemUtils.IS_OS_WINDOWS) { System.out.println("\nAdd the following lines to the solr.in.cmd file so that the solr.cmd script can use subsequently.\n"); @@ -3766,8 +3902,26 @@ public class SolrCLI { + "SOLR_AUTHENTICATION_OPTS=\"-Dbasicauth=" + username + ":" + password + "\"\n"); } } + private void printAuthEnablingInstructions(String kerberosConfig) { + if (SystemUtils.IS_OS_WINDOWS) { + System.out.println("\nAdd the following lines to the solr.in.cmd file so that the solr.cmd script can use subsequently.\n"); + System.out.println("set SOLR_AUTH_TYPE=kerberos\n" + + "set SOLR_AUTHENTICATION_OPTS=\"" + kerberosConfig + "\"\n"); + } else { + System.out.println("\nAdd the following lines to the solr.in.sh file so that the ./solr script can use subsequently.\n"); + System.out.println("SOLR_AUTH_TYPE=\"kerberos\"\n" + + "SOLR_AUTHENTICATION_OPTS=\"" + kerberosConfig + "\"\n"); + } + } - private void updateIncludeFileEnableAuth(File includeFile, String basicAuthConfFile) throws IOException { + /** + * This will update the include file (e.g. solr.in.sh / solr.in.cmd) with the authentication parameters. + * @param includeFile The include file + * @param basicAuthConfFile If basicAuth, the path of the file containing credentials. If not, null. + * @param kerberosConfig If kerberos, the config string containing startup parameters. If not, null. + */ + private void updateIncludeFileEnableAuth(File includeFile, String basicAuthConfFile, String kerberosConfig) throws IOException { + assert !(basicAuthConfFile != null && kerberosConfig != null); // only one of the two needs to be populated List includeFileLines = FileUtils.readLines(includeFile, StandardCharsets.UTF_8); for (int i=0; i - - - - - + + + + + - + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-blockjoinfacetcomponent.xml b/solr/core/src/test-files/solr/collection1/conf/schema-blockjoinfacetcomponent.xml index dc23b84c20f..8db75b61469 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-blockjoinfacetcomponent.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-blockjoinfacetcomponent.xml @@ -17,9 +17,9 @@ --> - - - + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-custom-field.xml b/solr/core/src/test-files/solr/collection1/conf/schema-custom-field.xml index 23999e41b33..c8e89a127a0 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-custom-field.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-custom-field.xml @@ -17,8 +17,8 @@ --> - - + + diff --git a/solr/core/src/test-files/solr/configsets/_default/conf/elevate.xml b/solr/core/src/test-files/solr/configsets/_default/conf/elevate.xml deleted file mode 100644 index 2c09ebed669..00000000000 --- a/solr/core/src/test-files/solr/configsets/_default/conf/elevate.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - - diff --git a/solr/core/src/test-files/solr/configsets/_default/conf/solrconfig.xml b/solr/core/src/test-files/solr/configsets/_default/conf/solrconfig.xml index f53636f474e..aa1ae698bd8 100644 --- a/solr/core/src/test-files/solr/configsets/_default/conf/solrconfig.xml +++ b/solr/core/src/test-files/solr/configsets/_default/conf/solrconfig.xml @@ -1004,7 +1004,6 @@ string - elevate.xml diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java index f2027b0abf7..77db071459a 100644 --- a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java @@ -74,9 +74,10 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase { assertEquals(0, (int)status.get("status")); assertTrue(status.get("QTime") > 0); } + // Use of _default configset should generate a warning for data-driven functionality in production use + assertTrue(response.getWarning() != null && response.getWarning().contains("NOT RECOMMENDED for production use")); response = CollectionAdminRequest.deleteCollection(collectionName).process(cluster.getSolrClient()); - assertEquals(0, response.getStatus()); assertTrue(response.isSuccess()); Map> nodesStatus = response.getCollectionNodesStatus(); diff --git a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSTest.java b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSTest.java index 367756316da..70c4e46c6b2 100644 --- a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSTest.java @@ -54,7 +54,6 @@ public class MoveReplicaHDFSTest extends MoveReplicaTest { dfsCluster = null; } - public static class ForkJoinThreadsFilter implements ThreadFilter { @Override diff --git a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSUlogDirTest.java b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSUlogDirTest.java new file mode 100644 index 00000000000..a27a39d3ca8 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSUlogDirTest.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.cloud; + +import java.io.IOException; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.response.CollectionAdminResponse; +import org.apache.solr.cloud.hdfs.HdfsTestUtil; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.cloud.ClusterStateUtil; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.ZkConfigManager; +import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.util.BadHdfsThreadsFilter; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +@ThreadLeakFilters(defaultFilters = true, filters = { + BadHdfsThreadsFilter.class, // hdfs currently leaks thread(s) + MoveReplicaHDFSTest.ForkJoinThreadsFilter.class +}) +public class MoveReplicaHDFSUlogDirTest extends SolrCloudTestCase { + private static MiniDFSCluster dfsCluster; + + @BeforeClass + public static void setupClass() throws Exception { + configureCluster(2) + .addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-dynamic").resolve("conf")) + .configure(); + + System.setProperty("solr.hdfs.blockcache.enabled", "false"); + dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath()); + + ZkConfigManager configManager = new ZkConfigManager(zkClient()); + configManager.uploadConfigDir(configset("cloud-hdfs"), "conf1"); + + System.setProperty("solr.hdfs.home", HdfsTestUtil.getDataDir(dfsCluster, "data")); + } + + @AfterClass + public static void teardownClass() throws Exception { + cluster.shutdown(); // need to close before the MiniDFSCluster + HdfsTestUtil.teardownClass(dfsCluster); + dfsCluster = null; + } + + @Test + public void testDataDirAndUlogAreMaintained() throws Exception { + String coll = "movereplicatest_coll2"; + CollectionAdminRequest.createCollection(coll, "conf1", 1, 1) + .setCreateNodeSet("") + .process(cluster.getSolrClient()); + String hdfsUri = HdfsTestUtil.getURI(dfsCluster); + String dataDir = hdfsUri + "/dummyFolder/dataDir"; + String ulogDir = hdfsUri + "/dummyFolder2/ulogDir"; + CollectionAdminResponse res = CollectionAdminRequest + .addReplicaToShard(coll, "shard1") + .setDataDir(dataDir) + .setUlogDir(ulogDir) + .setNode(cluster.getJettySolrRunner(0).getNodeName()) + .process(cluster.getSolrClient()); + + ulogDir += "/tlog"; + ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader(); + assertTrue(ClusterStateUtil.waitForAllActiveAndLiveReplicas(zkStateReader, 120000)); + + DocCollection docCollection = zkStateReader.getClusterState().getCollection(coll); + Replica replica = docCollection.getReplicas().iterator().next(); + assertTrue(replica.getStr("ulogDir"), replica.getStr("ulogDir").equals(ulogDir) || replica.getStr("ulogDir").equals(ulogDir+'/')); + assertTrue(replica.getStr("dataDir"),replica.getStr("dataDir").equals(dataDir) || replica.getStr("dataDir").equals(dataDir+'/')); + + new CollectionAdminRequest.MoveReplica(coll, replica.getName(), cluster.getJettySolrRunner(1).getNodeName()) + .process(cluster.getSolrClient()); + assertTrue(ClusterStateUtil.waitForAllActiveAndLiveReplicas(zkStateReader, 120000)); + docCollection = zkStateReader.getClusterState().getCollection(coll); + assertEquals(1, docCollection.getSlice("shard1").getReplicas().size()); + Replica newReplica = docCollection.getReplicas().iterator().next(); + assertEquals(newReplica.getNodeName(), cluster.getJettySolrRunner(1).getNodeName()); + assertTrue(newReplica.getStr("ulogDir"), newReplica.getStr("ulogDir").equals(ulogDir) || newReplica.getStr("ulogDir").equals(ulogDir+'/')); + assertTrue(newReplica.getStr("dataDir"),newReplica.getStr("dataDir").equals(dataDir) || newReplica.getStr("dataDir").equals(dataDir+'/')); + + assertEquals(replica.getName(), newReplica.getName()); + assertEquals(replica.getCoreName(), newReplica.getCoreName()); + assertFalse(replica.getNodeName().equals(newReplica.getNodeName())); + final int numDocs = 100; + addDocs(coll, numDocs); // indexed but not committed + + cluster.getJettySolrRunner(1).stop(); + Thread.sleep(5000); + new CollectionAdminRequest.MoveReplica(coll, newReplica.getName(), cluster.getJettySolrRunner(0).getNodeName()) + .process(cluster.getSolrClient()); + assertTrue(ClusterStateUtil.waitForAllActiveAndLiveReplicas(zkStateReader, 120000)); + + // assert that the old core will be removed on startup + cluster.getJettySolrRunner(1).start(); + assertTrue(ClusterStateUtil.waitForAllActiveAndLiveReplicas(zkStateReader, 120000)); + docCollection = zkStateReader.getClusterState().getCollection(coll); + assertEquals(1, docCollection.getReplicas().size()); + newReplica = docCollection.getReplicas().iterator().next(); + assertEquals(newReplica.getNodeName(), cluster.getJettySolrRunner(0).getNodeName()); + assertTrue(newReplica.getStr("ulogDir"), newReplica.getStr("ulogDir").equals(ulogDir) || newReplica.getStr("ulogDir").equals(ulogDir+'/')); + assertTrue(newReplica.getStr("dataDir"),newReplica.getStr("dataDir").equals(dataDir) || newReplica.getStr("dataDir").equals(dataDir+'/')); + + assertEquals(0, cluster.getJettySolrRunner(1).getCoreContainer().getCores().size()); + + cluster.getSolrClient().commit(coll); + assertEquals(numDocs, cluster.getSolrClient().query(coll, new SolrQuery("*:*")).getResults().getNumFound()); + } + + private void addDocs(String collection, int numDocs) throws SolrServerException, IOException { + SolrClient solrClient = cluster.getSolrClient(); + for (int docId = 1; docId <= numDocs; docId++) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", docId); + solrClient.add(collection, doc); + } + } + +} diff --git a/solr/core/src/test/org/apache/solr/cloud/TestPullReplicaErrorHandling.java b/solr/core/src/test/org/apache/solr/cloud/TestPullReplicaErrorHandling.java index 6a22d9962f5..12b3ef0ff79 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestPullReplicaErrorHandling.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestPullReplicaErrorHandling.java @@ -27,7 +27,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; import java.util.concurrent.TimeUnit; - +import org.apache.solr.SolrTestCaseJ4.SuppressObjectReleaseTracker; import org.apache.solr.SolrTestCaseJ4.SuppressSSL; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrQuery; @@ -52,6 +52,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; @SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776") +@SuppressObjectReleaseTracker(bugUrl="Testing purposes") public class TestPullReplicaErrorHandling extends SolrCloudTestCase { private final static int REPLICATION_TIMEOUT_SECS = 10; diff --git a/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java b/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java index 31b8b9c72e4..d3fc6794cf9 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java @@ -19,9 +19,12 @@ package org.apache.solr.cloud; import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Set; + import com.codahale.metrics.Counter; import org.apache.lucene.util.TestUtil; @@ -41,7 +44,6 @@ import org.apache.solr.common.util.Utils; import org.apache.solr.core.CoreContainer; import org.apache.solr.core.SolrCore; import org.apache.solr.metrics.SolrMetricManager; -import org.apache.solr.request.SolrRequestHandler; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -86,6 +88,25 @@ public class TestRandomRequestDistribution extends AbstractFullDistribZkTestBase cloudClient.getZkStateReader().forceUpdateCollection("b1x1"); + // get direct access to the metrics counters for each core/replica we're interested to monitor them + final Map counters = new LinkedHashMap<>(); + for (JettySolrRunner runner : jettys) { + CoreContainer container = runner.getCoreContainer(); + SolrMetricManager metricManager = container.getMetricManager(); + for (SolrCore core : container.getCores()) { + if ("a1x2".equals(core.getCoreDescriptor().getCollectionName())) { + String registry = core.getCoreMetricManager().getRegistryName(); + Counter cnt = metricManager.counter(null, registry, "requests", "QUERY./select"); + // sanity check + assertEquals(core.getName() + " has already recieved some requests?", + 0, cnt.getCount()); + counters.put(core.getName(), cnt); + } + } + } + assertEquals("Sanity Check: we know there should be 2 replicas", 2, counters.size()); + + // send queries to the node that doesn't host any core/replica and see where it routes them ClusterState clusterState = cloudClient.getZkStateReader().getClusterState(); DocCollection b1x1 = clusterState.getCollection("b1x1"); Collection replicas = b1x1.getSlice("shard1").getReplicas(); @@ -94,29 +115,30 @@ public class TestRandomRequestDistribution extends AbstractFullDistribZkTestBase if (!baseUrl.endsWith("/")) baseUrl += "/"; try (HttpSolrClient client = getHttpSolrClient(baseUrl + "a1x2", 2000, 5000)) { + long expectedTotalRequests = 0; + Set uniqueCoreNames = new LinkedHashSet<>(); + log.info("Making requests to " + baseUrl + "a1x2"); - for (int i = 0; i < 10; i++) { + while (uniqueCoreNames.size() < counters.keySet().size() && expectedTotalRequests < 1000L) { + expectedTotalRequests++; client.query(new SolrQuery("*:*")); + + long actualTotalRequests = 0; + for (Map.Entry e : counters.entrySet()) { + final long coreCount = e.getValue().getCount(); + actualTotalRequests += coreCount; + if (0 < coreCount) { + uniqueCoreNames.add(e.getKey()); + } + } + assertEquals("Sanity Check: Num Queries So Far Doesn't Match Total????", + expectedTotalRequests, actualTotalRequests); } - } - - Map shardVsCount = new HashMap<>(); - for (JettySolrRunner runner : jettys) { - CoreContainer container = runner.getCoreContainer(); - SolrMetricManager metricManager = container.getMetricManager(); - for (SolrCore core : container.getCores()) { - String registry = core.getCoreMetricManager().getRegistryName(); - Counter cnt = metricManager.counter(null, registry, "requests", "QUERY./select"); - SolrRequestHandler select = core.getRequestHandler(""); -// long c = (long) select.getStatistics().get("requests"); - shardVsCount.put(core.getName(), (int) cnt.getCount()); - } - } - - log.info("Shard count map = " + shardVsCount); - - for (Map.Entry entry : shardVsCount.entrySet()) { - assertTrue("Shard " + entry.getKey() + " received all 10 requests", entry.getValue() != 10); + log.info("Total requests: " + expectedTotalRequests); + assertEquals("either request randomization code is broken of this test seed is really unlucky, " + + "Gave up waiting for requests to hit every core at least once after " + + expectedTotalRequests + " requests", + uniqueCoreNames.size(), counters.size()); } } diff --git a/solr/core/src/test/org/apache/solr/schema/TestPointFields.java b/solr/core/src/test/org/apache/solr/schema/TestPointFields.java index a1d2260207d..4c8ff69b09d 100644 --- a/solr/core/src/test/org/apache/solr/schema/TestPointFields.java +++ b/solr/core/src/test/org/apache/solr/schema/TestPointFields.java @@ -17,17 +17,24 @@ package org.apache.solr.schema; import java.io.IOException; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.math.RoundingMode; import java.text.SimpleDateFormat; import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneOffset; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.Date; +import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; @@ -69,13 +76,13 @@ import org.junit.Test; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -/** - * Tests for PointField functionality - * - * - */ +/** Tests for PointField functionality */ public class TestPointFields extends SolrTestCaseJ4 { - + + // long overflow can occur in some date calculations if gaps are too large, so we limit to a million years BC & AD. + private static final long MIN_DATE_EPOCH_MILLIS = LocalDateTime.parse("-1000000-01-01T00:00:00").toInstant(ZoneOffset.ofHours(0)).toEpochMilli(); + private static final long MAX_DATE_EPOCH_MILLIS = LocalDateTime.parse("+1000000-01-01T00:00:00").toInstant(ZoneOffset.ofHours(0)).toEpochMilli(); + private static final String[] FIELD_SUFFIXES = new String[] { "", "_dv", "_mv", "_mv_dv", "_ni", "_ni_dv", "_ni_dv_ns", "_ni_dv_ns_mv", "_ni_mv", "_ni_mv_dv", "_ni_ns", "_ni_ns_mv", "_dv_ns", "_ni_ns_dv", "_dv_ns_mv", @@ -115,9 +122,11 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testIntPointFieldReturn() throws Exception { - testPointFieldReturn("number_p_i", "int", new String[]{"0", "-1", "2", "3", "43", "52", "-60", "74", "80", "99"}); - testPointFieldReturn("number_p_i_dv_ns", "int", new String[]{"0", "-1", "2", "3", "43", "52", "-60", "74", "80", "99"}); - testPointFieldReturn("number_p_i_ni", "int", new String[]{"0", "-1", "2", "3", "43", "52", "-60", "74", "80", "99"}); + int numValues = 10 * RANDOM_MULTIPLIER; + String[] ints = toStringArray(getRandomInts(numValues, false)); + testPointFieldReturn("number_p_i", "int", ints); + testPointFieldReturn("number_p_i_dv_ns", "int", ints); + testPointFieldReturn("number_p_i_ni", "int", ints); } @Test @@ -129,9 +138,10 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testIntPointFieldNonSearchableRangeQuery() throws Exception { - doTestPointFieldNonSearchableRangeQuery("number_p_i_ni", "42"); - doTestPointFieldNonSearchableRangeQuery("number_p_i_ni_ns", "42"); - doTestPointFieldNonSearchableRangeQuery("number_p_i_ni_ns_mv", "42", "666"); + doTestPointFieldNonSearchableRangeQuery("number_p_i_ni", toStringArray(getRandomInts(1, false))); + doTestPointFieldNonSearchableRangeQuery("number_p_i_ni_ns", toStringArray(getRandomInts(1, false))); + int numValues = 2 * RANDOM_MULTIPLIER; + doTestPointFieldNonSearchableRangeQuery("number_p_i_ni_ns_mv", toStringArray(getRandomInts(numValues, false))); } @Test @@ -162,8 +172,8 @@ public class TestPointFields extends SolrTestCaseJ4 { for (String r : Arrays.asList("*_p_i_ni", "*_p_i_ni_ns")) { assertTrue(r, regexToTest.remove(r)); String field = r.replace("*", "number"); - doTestPointFieldSortError(field, "w/o docValues", "42"); - doTestPointFieldFunctionQueryError(field, "w/o docValues", "42"); + doTestPointFieldSortError(field, "w/o docValues", toStringArray(getRandomInts(1, false))); + doTestPointFieldFunctionQueryError(field, "w/o docValues", toStringArray(getRandomInts(1, false))); } for (String r : Arrays.asList("*_p_i_mv", "*_p_i_ni_mv", "*_p_i_ni_mv_dv", "*_p_i_ni_dv_ns_mv", @@ -172,10 +182,11 @@ public class TestPointFields extends SolrTestCaseJ4 { "*_p_i_mv_sml", "*_p_i_mv_dv_sml", "*_p_i_ni_mv_dv_sml")) { assertTrue(r, regexToTest.remove(r)); String field = r.replace("*", "number"); - doTestPointFieldSortError(field, "multivalued", "42"); - doTestPointFieldSortError(field, "multivalued", "42", "666"); - doTestPointFieldFunctionQueryError(field, "multivalued", "42"); - doTestPointFieldFunctionQueryError(field, "multivalued", "42", "666"); + doTestPointFieldSortError(field, "multivalued", toStringArray(getRandomInts(1, false))); + int numValues = 2 * RANDOM_MULTIPLIER; + doTestPointFieldSortError(field, "multivalued", toStringArray(getRandomInts(numValues, false))); + doTestPointFieldFunctionQueryError(field, "multivalued", toStringArray(getRandomInts(1, false))); + doTestPointFieldFunctionQueryError(field, "multivalued", toStringArray(getRandomInts(numValues, false))); } assertEquals("Missing types in the test", Collections.emptySet(), regexToTest); @@ -184,68 +195,214 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testIntPointFieldFacetField() throws Exception { testPointFieldFacetField("number_p_i", "number_p_i_dv", getSequentialStringArrayWithInts(10)); + clearIndex(); + assertU(commit()); + testPointFieldFacetField("number_p_i", "number_p_i_dv", toStringArray(getRandomInts(10, false))); } @Test public void testIntPointFieldRangeFacet() throws Exception { - doTestIntPointFieldRangeFacet("number_p_i_dv", "number_p_i"); + String docValuesField = "number_p_i_dv"; + String nonDocValuesField = "number_p_i"; + int numValues = 10 * RANDOM_MULTIPLIER; + int numBuckets = numValues / 2; + List values; + List sortedValues; + int max; + do { + values = getRandomInts(numValues, false); + sortedValues = values.stream().sorted().collect(Collectors.toList()); + } while ((max = sortedValues.get(sortedValues.size() - 1)) >= Integer.MAX_VALUE - numValues); // leave room for rounding + int min = sortedValues.get(0); + int gap = (int)(((long)(max + numValues) - (long)min) / (long)numBuckets); + int[] bucketCount = new int[numBuckets]; + int bucketNum = 0; + int minBucketVal = min; + for (Integer value : sortedValues) { + while (((long)value - (long)minBucketVal) >= (long)gap) { + ++bucketNum; + minBucketVal += gap; + } + ++bucketCount[bucketNum]; + } + + for (int i = 0 ; i < numValues ; i++) { + assertU(adoc("id", String.valueOf(i), docValuesField, String.valueOf(values.get(i)), nonDocValuesField, String.valueOf(values.get(i)))); + } + assertU(commit()); + + assertTrue(h.getCore().getLatestSchema().getField(docValuesField).hasDocValues()); + assertTrue(h.getCore().getLatestSchema().getField(docValuesField).getType() instanceof PointField); + String[] testStrings = new String[numBuckets + 1]; + testStrings[numBuckets] = "//*[@numFound='" + numValues + "']"; + minBucketVal = min; + for (int i = 0 ; i < numBuckets ; minBucketVal += gap, ++i) { + testStrings[i] = "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + + "']/lst[@name='counts']/int[@name='" + minBucketVal + "'][.='" + bucketCount[i] + "']"; + } + assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, "facet.range.start", String.valueOf(min), + "facet.range.end", String.valueOf(max), "facet.range.gap", String.valueOf(gap)), + testStrings); + assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, "facet.range.start", String.valueOf(min), + "facet.range.end", String.valueOf(max), "facet.range.gap", String.valueOf(gap), "facet.range.method", "dv"), + testStrings); + + assertFalse(h.getCore().getLatestSchema().getField(nonDocValuesField).hasDocValues()); + assertTrue(h.getCore().getLatestSchema().getField(nonDocValuesField).getType() instanceof PointField); + minBucketVal = min; + for (int i = 0 ; i < numBuckets ; minBucketVal += gap, ++i) { + testStrings[i] = "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + + "']/lst[@name='counts']/int[@name='" + minBucketVal + "'][.='" + bucketCount[i] + "']"; + } + // Range Faceting with method = filter should work + assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, "facet.range.start", String.valueOf(min), + "facet.range.end", String.valueOf(max), "facet.range.gap", String.valueOf(gap), "facet.range.method", "filter"), + testStrings); + // this should actually use filter method instead of dv + assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, "facet.range.start", String.valueOf(min), + "facet.range.end", String.valueOf(max), "facet.range.gap", String.valueOf(gap), "facet.range.method", "dv"), + testStrings); } @Test public void testIntPointStats() throws Exception { - testPointStats("number_p_i", "number_p_i_dv", new String[]{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}, - 0D, 9D, "10", "1", 0D); - testPointStats("number_p_i", "number_p_i_mv_dv", new String[]{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}, - 0D, 9D, "10", "1", 0D); + int numValues = 10 * RANDOM_MULTIPLIER; + // don't produce numbers with exponents, since XPath comparison operators can't handle them + List values = getRandomInts(numValues, false, 9999999); + // System.err.println(Arrays.toString(values.toArray(new Integer[values.size()]))); + List sortedValues = values.stream().sorted().collect(Collectors.toList()); + double min = (double)sortedValues.get(0); + double max = (double)sortedValues.get(sortedValues.size() - 1); + + String[] valArray = toStringArray(values); + testPointStats("number_p_i", "number_p_i_dv", valArray, min, max, numValues, 1, 0D); + testPointStats("number_p_i", "number_p_i_mv_dv", valArray, min, max, numValues, 1, 0D); } @Test public void testIntPointFieldMultiValuedExactQuery() throws Exception { - testPointFieldMultiValuedExactQuery("number_p_i_mv", getSequentialStringArrayWithInts(20)); - testPointFieldMultiValuedExactQuery("number_p_i_ni_mv_dv", getSequentialStringArrayWithInts(20)); + String[] ints = toStringArray(getRandomInts(20, false)); + testPointFieldMultiValuedExactQuery("number_p_i_mv", ints); + testPointFieldMultiValuedExactQuery("number_p_i_ni_mv_dv", ints); } @Test public void testIntPointFieldMultiValuedNonSearchableExactQuery() throws Exception { - testPointFieldMultiValuedExactQuery("number_p_i_ni_mv", getSequentialStringArrayWithInts(20), false); - testPointFieldMultiValuedExactQuery("number_p_i_ni_ns_mv", getSequentialStringArrayWithInts(20), false); + String[] ints = toStringArray(getRandomInts(20, false)); + testPointFieldMultiValuedExactQuery("number_p_i_ni_mv", ints, false); + testPointFieldMultiValuedExactQuery("number_p_i_ni_ns_mv", ints, false); } @Test public void testIntPointFieldMultiValuedReturn() throws Exception { - testPointFieldMultiValuedReturn("number_p_i_mv", "int", getSequentialStringArrayWithInts(20)); - testPointFieldMultiValuedReturn("number_p_i_ni_mv_dv", "int", getSequentialStringArrayWithInts(20)); - testPointFieldMultiValuedReturn("number_p_i_dv_ns_mv", "int", getSequentialStringArrayWithInts(20)); + String[] ints = toStringArray(getRandomInts(20, false)); + testPointFieldMultiValuedReturn("number_p_i_mv", "int", ints); + testPointFieldMultiValuedReturn("number_p_i_ni_mv_dv", "int", ints); + testPointFieldMultiValuedReturn("number_p_i_dv_ns_mv", "int", ints); } @Test public void testIntPointFieldMultiValuedRangeQuery() throws Exception { - testPointFieldMultiValuedRangeQuery("number_p_i_mv", "int", getSequentialStringArrayWithInts(20)); - testPointFieldMultiValuedRangeQuery("number_p_i_ni_mv_dv", "int", getSequentialStringArrayWithInts(20)); - testPointFieldMultiValuedRangeQuery("number_p_i_mv_dv", "int", getSequentialStringArrayWithInts(20)); + String[] ints = toStringArray(getRandomInts(20, false).stream().sorted().collect(Collectors.toList())); + testPointFieldMultiValuedRangeQuery("number_p_i_mv", "int", ints); + testPointFieldMultiValuedRangeQuery("number_p_i_ni_mv_dv", "int", ints); + testPointFieldMultiValuedRangeQuery("number_p_i_mv_dv", "int", ints); } @Test public void testIntPointFieldNotIndexed() throws Exception { - doTestFieldNotIndexed("number_p_i_ni", getSequentialStringArrayWithInts(10)); - doTestFieldNotIndexed("number_p_i_ni_mv", getSequentialStringArrayWithInts(10)); + String[] ints = toStringArray(getRandomInts(10, false)); + doTestFieldNotIndexed("number_p_i_ni", ints); + doTestFieldNotIndexed("number_p_i_ni_mv", ints); } //TODO MV SORT? @Test public void testIntPointFieldMultiValuedFacetField() throws Exception { testPointFieldMultiValuedFacetField("number_p_i_mv", "number_p_i_mv_dv", getSequentialStringArrayWithInts(20)); - testPointFieldMultiValuedFacetField("number_p_i_mv", "number_p_i_mv_dv", toStringArray(getRandomInts(20, false))); + String[] randomSortedInts = toStringArray(getRandomInts(20, false).stream().sorted().collect(Collectors.toList())); + testPointFieldMultiValuedFacetField("number_p_i_mv", "number_p_i_mv_dv", randomSortedInts); } @Test public void testIntPointFieldMultiValuedRangeFacet() throws Exception { - doTestIntPointFieldMultiValuedRangeFacet("number_p_i_mv_dv", "number_p_i_mv"); + String docValuesField = "number_p_i_mv_dv"; + String nonDocValuesField = "number_p_i_mv"; + int numValues = 20 * RANDOM_MULTIPLIER; + int numBuckets = numValues / 2; + List values; + List> sortedValues; + int max; + do { + values = getRandomInts(numValues, false); + sortedValues = toAscendingPosVals(values, true); + } while ((max = sortedValues.get(sortedValues.size() - 1).val) >= Integer.MAX_VALUE - numValues); // leave room for rounding + int min = sortedValues.get(0).val; + int gap = (int)(((long)(max + numValues) - (long)min) / (long)numBuckets); + List> docIdBucket = new ArrayList<>(numBuckets); + for (int i = 0 ; i < numBuckets ; ++i) { + docIdBucket.add(new HashSet<>()); + } + int bucketNum = 0; + int minBucketVal = min; + for (PosVal value : sortedValues) { + while (value.val - minBucketVal >= gap) { + ++bucketNum; + minBucketVal += gap; + } + docIdBucket.get(bucketNum).add(value.pos / 2); // each doc gets two consecutive values + } + for (int i = 0 ; i < numValues ; i += 2) { + assertU(adoc("id", String.valueOf(i / 2), + docValuesField, String.valueOf(values.get(i)), + docValuesField, String.valueOf(values.get(i + 1)), + nonDocValuesField, String.valueOf(values.get(i)), + nonDocValuesField, String.valueOf(values.get(i + 1)))); + } + assertU(commit()); + assertTrue(h.getCore().getLatestSchema().getField(docValuesField).hasDocValues()); + assertTrue(h.getCore().getLatestSchema().getField(docValuesField).getType() instanceof PointField); + String[] testStrings = new String[numBuckets + 1]; + minBucketVal = min; + testStrings[numBuckets] = "//*[@numFound='" + (numValues / 2) + "']"; + for (int i = 0 ; i < numBuckets ; minBucketVal += gap, ++i) { + testStrings[i] = "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + + "']/lst[@name='counts']/int[@name='" + minBucketVal + "'][.='" + docIdBucket.get(i).size() + "']"; + } + + assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, + "facet.range.start", String.valueOf(min), "facet.range.end", String.valueOf(max), + "facet.range.gap", String.valueOf(gap), "indent", "on"), + testStrings); + assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, + "facet.range.start", String.valueOf(min), "facet.range.end", String.valueOf(max), + "facet.range.gap", String.valueOf(gap), "facet.range.method", "dv", "indent", "on"), + testStrings); + + assertFalse(h.getCore().getLatestSchema().getField(nonDocValuesField).hasDocValues()); + assertTrue(h.getCore().getLatestSchema().getField(nonDocValuesField).getType() instanceof PointField); + minBucketVal = min; + for (int i = 0 ; i < numBuckets ; minBucketVal += gap, ++i) { + testStrings[i] = "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + + "']/lst[@name='counts']/int[@name='" + minBucketVal + "'][.='" + docIdBucket.get(i).size() + "']"; + } + // Range Faceting with method = filter should work + assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, + "facet.range.start", String.valueOf(min), "facet.range.end", String.valueOf(max), + "facet.range.gap", String.valueOf(gap), "facet.range.method", "filter", "indent", "on"), + testStrings); + // this should actually use filter method instead of dv + assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, + "facet.range.start", String.valueOf(min), "facet.range.end", String.valueOf(max), + "facet.range.gap", String.valueOf(gap), "facet.range.method", "dv", "indent", "on"), + testStrings); } @Test - public void testIntPointMultiValuedFunctionQuery() throws Exception { + public void testIntPointMultiValuedFunctionQuery() throws Exception { testPointMultiValuedFunctionQuery("number_p_i_mv", "number_p_i_mv_dv", "int", getSequentialStringArrayWithInts(20)); + testPointMultiValuedFunctionQuery("number_p_i_mv", "number_p_i_mv_dv", "int", + toStringArray(getRandomInts(20, false).stream().sorted().collect(Collectors.toList()))); } @Test @@ -280,6 +437,9 @@ public class TestPointFields extends SolrTestCaseJ4 { this.pos = pos; this.val = val; } + public String toString() { + return "(" + pos + ": " + val.toString() + ")"; + } } /** Primary sort by value, with nulls either first or last as specified, and then secondary sort by position. */ @@ -345,33 +505,29 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testDoublePointFieldExactQuery() throws Exception { - doTestFloatPointFieldExactQuery("number_d"); - doTestFloatPointFieldExactQuery("number_p_d"); - doTestFloatPointFieldExactQuery("number_p_d_mv"); - doTestFloatPointFieldExactQuery("number_p_d_dv"); - doTestFloatPointFieldExactQuery("number_p_d_mv_dv"); - doTestFloatPointFieldExactQuery("number_p_d_ni_dv"); - doTestFloatPointFieldExactQuery("number_p_d_ni_ns_dv"); - doTestFloatPointFieldExactQuery("number_p_d_ni_dv_ns"); - doTestFloatPointFieldExactQuery("number_p_d_ni_mv_dv"); + doTestFloatPointFieldExactQuery("number_d", true); + doTestFloatPointFieldExactQuery("number_p_d", true); + doTestFloatPointFieldExactQuery("number_p_d_mv", true); + doTestFloatPointFieldExactQuery("number_p_d_dv", true); + doTestFloatPointFieldExactQuery("number_p_d_mv_dv", true); + doTestFloatPointFieldExactQuery("number_p_d_ni_dv", true); + doTestFloatPointFieldExactQuery("number_p_d_ni_ns_dv", true); + doTestFloatPointFieldExactQuery("number_p_d_ni_dv_ns", true); + doTestFloatPointFieldExactQuery("number_p_d_ni_mv_dv", true); } @Test public void testDoublePointFieldNonSearchableExactQuery() throws Exception { - doTestFloatPointFieldExactQuery("number_p_d_ni", false); - doTestFloatPointFieldExactQuery("number_p_d_ni_ns", false); + doTestFloatPointFieldExactQuery("number_p_d_ni", false, true); + doTestFloatPointFieldExactQuery("number_p_d_ni_ns", false, true); } @Test public void testDoublePointFieldReturn() throws Exception { - testPointFieldReturn("number_p_d", "double", new String[]{"0.0", "1.2", "2.5", "3.02", "0.43", "5.2", "6.01", "74.0", "80.0", "9.9"}); - testPointFieldReturn("number_p_d_dv_ns", "double", new String[]{"0.0", "1.2", "2.5", "3.02", "0.43", "5.2", "6.01", "74.0", "80.0", "9.9"}); - String[] arr = new String[atLeast(10)]; - for (int i = 0; i < arr.length; i++) { - double rand = random().nextDouble() * 10; - arr[i] = String.valueOf(rand); - } - testPointFieldReturn("number_p_d", "double", arr); + int numValues = 10 * RANDOM_MULTIPLIER; + String[] doubles = toStringArray(getRandomDoubles(numValues, false)); + testPointFieldReturn("number_p_d", "double", doubles); + testPointFieldReturn("number_p_d_dv_ns", "double", doubles); } @Test @@ -383,9 +539,10 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testDoubleFieldNonSearchableRangeQuery() throws Exception { - doTestPointFieldNonSearchableRangeQuery("number_p_d_ni", "42.3"); - doTestPointFieldNonSearchableRangeQuery("number_p_d_ni_ns", "42.3"); - doTestPointFieldNonSearchableRangeQuery("number_p_d_ni_ns_mv", "42.3", "-66.6"); + doTestPointFieldNonSearchableRangeQuery("number_p_d_ni", toStringArray(getRandomDoubles(1, false))); + doTestPointFieldNonSearchableRangeQuery("number_p_d_ni_ns", toStringArray(getRandomDoubles(1, false))); + int numValues = 2 * RANDOM_MULTIPLIER; + doTestPointFieldNonSearchableRangeQuery("number_p_d_ni_ns_mv", toStringArray(getRandomDoubles(numValues, false))); } @@ -446,41 +603,121 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testDoublePointFieldRangeFacet() throws Exception { - doTestFloatPointFieldRangeFacet("number_p_d_dv", "number_p_d"); + String docValuesField = "number_p_d_dv"; + String nonDocValuesField = "number_p_d"; + int numValues = 10 * RANDOM_MULTIPLIER; + int numBuckets = numValues / 2; + List values, sortedValues; + double min, max, gap, buffer; + do { + values = getRandomDoubles(numValues, false); + sortedValues = values.stream().sorted().collect(Collectors.toList()); + min = sortedValues.get(0); + max = sortedValues.get(sortedValues.size() - 1); + buffer = BigDecimal.valueOf(max).subtract(BigDecimal.valueOf(min)) + .divide(BigDecimal.valueOf(numValues), RoundingMode.HALF_UP) + .divide(BigDecimal.valueOf(2.0D), RoundingMode.HALF_UP).doubleValue(); + gap = BigDecimal.valueOf(max + buffer).subtract(BigDecimal.valueOf(min - buffer)) + .divide(BigDecimal.valueOf(numBuckets), RoundingMode.HALF_UP).doubleValue(); + } while (max >= Double.MAX_VALUE - buffer || min <= -Double.MAX_VALUE + buffer); + // System.err.println("min: " + min + " max: " + max + " gap: " + gap + " buffer: " + buffer); + int[] bucketCount = new int[numBuckets]; + int bucketNum = 0; + double minBucketVal = min - buffer; + // System.err.println("bucketNum: " + bucketNum + " minBucketVal: " + minBucketVal); + for (double value : sortedValues) { + // System.err.println("value: " + value); + while (value - minBucketVal >= gap) { + ++bucketNum; + minBucketVal += gap; + // System.err.println("bucketNum: " + bucketNum + " minBucketVal: " + minBucketVal); + } + ++bucketCount[bucketNum]; + } + + for (int i = 0 ; i < numValues ; i++) { + assertU(adoc("id", String.valueOf(i), + docValuesField, String.valueOf(values.get(i)), nonDocValuesField, String.valueOf(values.get(i)))); + } + assertU(commit()); + + assertTrue(h.getCore().getLatestSchema().getField(docValuesField).hasDocValues()); + assertTrue(h.getCore().getLatestSchema().getField(docValuesField).getType() instanceof PointField); + String[] testStrings = new String[numBuckets + 1]; + testStrings[numBuckets] = "//*[@numFound='" + numValues + "']"; + minBucketVal = min - buffer; + for (int i = 0 ; i < numBuckets ; minBucketVal += gap, ++i) { + testStrings[i] = "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + + "']/lst[@name='counts']/int[@name='" + minBucketVal + "'][.='" + bucketCount[i] + "']"; + } + assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, "facet.range.start", String.valueOf(min - buffer), + "facet.range.end", String.valueOf(max + buffer), "facet.range.gap", String.valueOf(gap)), + testStrings); + assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, "facet.range.start", String.valueOf(min - buffer), + "facet.range.end", String.valueOf(max + buffer), "facet.range.gap", String.valueOf(gap), "facet.range.method", "dv"), + testStrings); + + assertFalse(h.getCore().getLatestSchema().getField(nonDocValuesField).hasDocValues()); + assertTrue(h.getCore().getLatestSchema().getField(nonDocValuesField).getType() instanceof PointField); + minBucketVal = min - buffer; + for (int i = 0 ; i < numBuckets ; minBucketVal += gap, ++i) { + testStrings[i] = "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + + "']/lst[@name='counts']/int[@name='" + minBucketVal + "'][.='" + bucketCount[i] + "']"; + } + // Range Faceting with method = filter should work + assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, "facet.range.start", String.valueOf(min - buffer), + "facet.range.end", String.valueOf(max + buffer), "facet.range.gap", String.valueOf(gap), "facet.range.method", "filter"), + testStrings); + // this should actually use filter method instead of dv + assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, "facet.range.start", String.valueOf(min - buffer), + "facet.range.end", String.valueOf(max + buffer), "facet.range.gap", String.valueOf(gap), "facet.range.method", "dv"), + testStrings); } @Test public void testDoublePointStats() throws Exception { - testPointStats("number_p_d", "number_p_d_dv", new String[]{"-10.0", "1.1", "2.2", "3.3", "4.4", "5.5", "6.6", "7.7", "8.8", "9.9"}, - -10.0D, 9.9D, "10", "1", 1E-10D); - testPointStats("number_p_d_mv", "number_p_d_mv_dv", new String[]{"-10.0", "1.1", "2.2", "3.3", "4.4", "5.5", "6.6", "7.7", "8.8", "9.9"}, - -10.0D, 9.9D, "10", "1", 1E-10D); + int numValues = 10 * RANDOM_MULTIPLIER; + // don't produce numbers with exponents, since XPath comparison operators can't handle them: 7 digits of precision + List values = getRandomInts(numValues, false, 9999999).stream() + .map(v -> (float)((double)v * Math.pow(10D, -1 * random().nextInt(8)))).collect(Collectors.toList()); + // System.err.println(Arrays.toString(values.toArray(new Float[values.size()]))); + List sortedValues = values.stream().sorted().collect(Collectors.toList()); + double min = (double)sortedValues.get(0); + double max = (double)sortedValues.get(sortedValues.size() - 1); + + String[] valArray = toStringArray(values); + testPointStats("number_p_d", "number_p_d_dv", valArray, min, max, numValues, 1, 1E-7D); + testPointStats("number_p_d", "number_p_d_mv_dv", valArray, min, max, numValues, 1, 1E-7D); } @Test public void testDoublePointFieldMultiValuedExactQuery() throws Exception { - testPointFieldMultiValuedExactQuery("number_p_d_mv", toStringArray(getRandomDoubles(20, false))); - testPointFieldMultiValuedExactQuery("number_p_d_ni_mv_dv", toStringArray(getRandomDoubles(20, false))); + String[] doubles = toStringArray(getRandomDoubles(20, false)); + testPointFieldMultiValuedExactQuery("number_p_d_mv", doubles); + testPointFieldMultiValuedExactQuery("number_p_d_ni_mv_dv", doubles); } @Test public void testDoublePointFieldMultiValuedNonSearchableExactQuery() throws Exception { - testPointFieldMultiValuedExactQuery("number_p_d_ni_mv", toStringArray(getRandomDoubles(20, false)), false); - testPointFieldMultiValuedExactQuery("number_p_d_ni_ns_mv", toStringArray(getRandomDoubles(20, false)), false); + String[] doubles = toStringArray(getRandomDoubles(20, false)); + testPointFieldMultiValuedExactQuery("number_p_d_ni_mv", doubles, false); + testPointFieldMultiValuedExactQuery("number_p_d_ni_ns_mv", doubles, false); } @Test public void testDoublePointFieldMultiValuedReturn() throws Exception { - testPointFieldMultiValuedReturn("number_p_d_mv", "double", getSequentialStringArrayWithDoubles(20)); - testPointFieldMultiValuedReturn("number_p_d_ni_mv_dv", "double", getSequentialStringArrayWithDoubles(20)); - testPointFieldMultiValuedReturn("number_p_d_dv_ns_mv", "double", getSequentialStringArrayWithDoubles(20)); + String[] doubles = toStringArray(getRandomDoubles(20, false)); + testPointFieldMultiValuedReturn("number_p_d_mv", "double", doubles); + testPointFieldMultiValuedReturn("number_p_d_ni_mv_dv", "double", doubles); + testPointFieldMultiValuedReturn("number_p_d_dv_ns_mv", "double", doubles); } @Test public void testDoublePointFieldMultiValuedRangeQuery() throws Exception { - testPointFieldMultiValuedRangeQuery("number_p_d_mv", "double", getSequentialStringArrayWithDoubles(20)); - testPointFieldMultiValuedRangeQuery("number_p_d_ni_mv_dv", "double", getSequentialStringArrayWithDoubles(20)); - testPointFieldMultiValuedRangeQuery("number_p_d_mv_dv", "double", getSequentialStringArrayWithDoubles(20)); + String[] doubles = toStringArray(getRandomDoubles(20, false).stream().sorted().collect(Collectors.toList())); + testPointFieldMultiValuedRangeQuery("number_p_d_mv", "double", doubles); + testPointFieldMultiValuedRangeQuery("number_p_d_ni_mv_dv", "double", doubles); + testPointFieldMultiValuedRangeQuery("number_p_d_mv_dv", "double", doubles); } @Test @@ -522,8 +759,9 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testDoublePointFieldNotIndexed() throws Exception { - doTestFieldNotIndexed("number_p_d_ni", getSequentialStringArrayWithDoubles(10)); - doTestFieldNotIndexed("number_p_d_ni_mv", getSequentialStringArrayWithDoubles(10)); + String[] doubles = toStringArray(getRandomDoubles(10, false)); + doTestFieldNotIndexed("number_p_d_ni", doubles); + doTestFieldNotIndexed("number_p_d_ni_mv", doubles); } @@ -571,32 +809,28 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testFloatPointFieldExactQuery() throws Exception { - doTestFloatPointFieldExactQuery("number_p_f"); - doTestFloatPointFieldExactQuery("number_p_f_mv"); - doTestFloatPointFieldExactQuery("number_p_f_dv"); - doTestFloatPointFieldExactQuery("number_p_f_mv_dv"); - doTestFloatPointFieldExactQuery("number_p_f_ni_dv"); - doTestFloatPointFieldExactQuery("number_p_f_ni_ns_dv"); - doTestFloatPointFieldExactQuery("number_p_f_ni_dv_ns"); - doTestFloatPointFieldExactQuery("number_p_f_ni_mv_dv"); + doTestFloatPointFieldExactQuery("number_p_f", false); + doTestFloatPointFieldExactQuery("number_p_f_mv", false); + doTestFloatPointFieldExactQuery("number_p_f_dv", false); + doTestFloatPointFieldExactQuery("number_p_f_mv_dv", false); + doTestFloatPointFieldExactQuery("number_p_f_ni_dv", false); + doTestFloatPointFieldExactQuery("number_p_f_ni_ns_dv", false); + doTestFloatPointFieldExactQuery("number_p_f_ni_dv_ns", false); + doTestFloatPointFieldExactQuery("number_p_f_ni_mv_dv", false); } @Test public void testFloatPointFieldNonSearchableExactQuery() throws Exception { - doTestFloatPointFieldExactQuery("number_p_f_ni", false); - doTestFloatPointFieldExactQuery("number_p_f_ni_ns", false); + doTestFloatPointFieldExactQuery("number_p_f_ni", false, false); + doTestFloatPointFieldExactQuery("number_p_f_ni_ns", false, false); } @Test public void testFloatPointFieldReturn() throws Exception { - testPointFieldReturn("number_p_f", "float", new String[]{"0.0", "-1.2", "2.5", "3.02", "0.43", "5.2", "6.01", "74.0", "80.0", "9.9"}); - testPointFieldReturn("number_p_f_dv_ns", "float", new String[]{"0.0", "-1.2", "2.5", "3.02", "0.43", "5.2", "6.01", "74.0", "80.0", "9.9"}); - String[] arr = new String[atLeast(10)]; - for (int i = 0; i < arr.length; i++) { - float rand = random().nextFloat() * 10; - arr[i] = String.valueOf(rand); - } - testPointFieldReturn("number_p_f", "float", arr); + int numValues = 10 * RANDOM_MULTIPLIER; + String[] floats = toStringArray(getRandomFloats(numValues, false)); + testPointFieldReturn("number_p_f", "float", floats); + testPointFieldReturn("number_p_f_dv_ns", "float", floats); } @Test @@ -608,9 +842,10 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testFloatPointFieldNonSearchableRangeQuery() throws Exception { - doTestPointFieldNonSearchableRangeQuery("number_p_f_ni", "42.3"); - doTestPointFieldNonSearchableRangeQuery("number_p_f_ni_ns", "42.3"); - doTestPointFieldNonSearchableRangeQuery("number_p_f_ni_ns_mv", "42.3", "-66.6"); + doTestPointFieldNonSearchableRangeQuery("number_p_f_ni", toStringArray(getRandomFloats(1, false))); + doTestPointFieldNonSearchableRangeQuery("number_p_f_ni_ns", toStringArray(getRandomFloats(1, false))); + int numValues = 2 * RANDOM_MULTIPLIER; + doTestPointFieldNonSearchableRangeQuery("number_p_f_ni_ns_mv", toStringArray(getRandomFloats(numValues, false))); } @Test @@ -669,42 +904,120 @@ public class TestPointFields extends SolrTestCaseJ4 { } @Test + @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11043") public void testFloatPointFieldRangeFacet() throws Exception { - doTestFloatPointFieldRangeFacet("number_p_f_dv", "number_p_f"); + String docValuesField = "number_p_f_dv"; + String nonDocValuesField = "number_p_f"; + int numValues = 10 * RANDOM_MULTIPLIER; + int numBuckets = numValues / 2; + List values, sortedValues; + float min, max, gap, buffer; + do { + values = getRandomFloats(numValues, false); + sortedValues = values.stream().sorted().collect(Collectors.toList()); + min = sortedValues.get(0); + max = sortedValues.get(sortedValues.size() - 1); + buffer = (float)(((double)max - (double)min) / (double)numValues / 2.0D); + gap = (float)(((double)max + (double)buffer - (double)min + (double)buffer) / (double)numBuckets); + } while (max >= Float.MAX_VALUE - buffer || min <= -Float.MAX_VALUE + buffer); + // System.err.println("min: " + min + " max: " + max + " gap: " + gap + " buffer: " + buffer); + int[] bucketCount = new int[numBuckets]; + int bucketNum = 0; + float minBucketVal = min - buffer; + // System.err.println("bucketNum: " + bucketNum + " minBucketVal: " + minBucketVal); + for (float value : sortedValues) { + // System.err.println("value: " + value); + while (value - minBucketVal >= gap) { + ++bucketNum; + minBucketVal += gap; + // System.err.println("bucketNum: " + bucketNum + " minBucketVal: " + minBucketVal); + } + ++bucketCount[bucketNum]; + } + + for (int i = 0 ; i < numValues ; i++) { + assertU(adoc("id", String.valueOf(i), + docValuesField, String.valueOf(values.get(i)), nonDocValuesField, String.valueOf(values.get(i)))); + } + assertU(commit()); + + assertTrue(h.getCore().getLatestSchema().getField(docValuesField).hasDocValues()); + assertTrue(h.getCore().getLatestSchema().getField(docValuesField).getType() instanceof PointField); + String[] testStrings = new String[numBuckets + 1]; + testStrings[numBuckets] = "//*[@numFound='" + numValues + "']"; + minBucketVal = min - buffer; + for (int i = 0 ; i < numBuckets ; minBucketVal += gap, ++i) { + testStrings[i] = "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + + "']/lst[@name='counts']/int[@name='" + minBucketVal + "'][.='" + bucketCount[i] + "']"; + } + assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, "facet.range.start", String.valueOf(min - buffer), + "facet.range.end", String.valueOf(max + buffer), "facet.range.gap", String.valueOf(gap)), + testStrings); + assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, "facet.range.start", String.valueOf(min - buffer), + "facet.range.end", String.valueOf(max + buffer), "facet.range.gap", String.valueOf(gap), "facet.range.method", "dv"), + testStrings); + + assertFalse(h.getCore().getLatestSchema().getField(nonDocValuesField).hasDocValues()); + assertTrue(h.getCore().getLatestSchema().getField(nonDocValuesField).getType() instanceof PointField); + minBucketVal = min - buffer; + for (int i = 0 ; i < numBuckets ; minBucketVal += gap, ++i) { + testStrings[i] = "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + + "']/lst[@name='counts']/int[@name='" + minBucketVal + "'][.='" + bucketCount[i] + "']"; + } + // Range Faceting with method = filter should work + assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, "facet.range.start", String.valueOf(min - buffer), + "facet.range.end", String.valueOf(max + buffer), "facet.range.gap", String.valueOf(gap), "facet.range.method", "filter"), + testStrings); + // this should actually use filter method instead of dv + assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, "facet.range.start", String.valueOf(min - buffer), + "facet.range.end", String.valueOf(max + buffer), "facet.range.gap", String.valueOf(gap), "facet.range.method", "dv"), + testStrings); } @Test public void testFloatPointStats() throws Exception { - testPointStats("number_p_f", "number_p_f_dv", new String[]{"-10.0", "1.1", "2.2", "3.3", "4.4", "5.5", "6.6", "7.7", "8.8", "9.9"}, - -10D, 9.9D, "10", "1", 1E-6D); - testPointStats("number_p_f_mv", "number_p_f_mv_dv", new String[]{"-10.0", "1.1", "2.2", "3.3", "4.4", "5.5", "6.6", "7.7", "8.8", "9.9"}, - -10D, 9.9D, "10", "1", 1E-6D); + int numValues = 10 * RANDOM_MULTIPLIER; + // don't produce numbers with exponents, since XPath comparison operators can't handle them: 7 digits of precision + List values = getRandomInts(numValues, false, 9999999).stream() + .map(v -> (float)((double)v * Math.pow(10D, -1 * random().nextInt(8)))).collect(Collectors.toList()); + // System.err.println(Arrays.toString(values.toArray(new Float[values.size()]))); + List sortedValues = values.stream().sorted().collect(Collectors.toList()); + double min = (double)sortedValues.get(0); + double max = (double)sortedValues.get(sortedValues.size() - 1); + + String[] valArray = toStringArray(values); + testPointStats("number_p_f", "number_p_f_dv", valArray, min, max, numValues, 1, 1E-7D); + testPointStats("number_p_f", "number_p_f_mv_dv", valArray, min, max, numValues, 1, 1E-7D); } @Test public void testFloatPointFieldMultiValuedExactQuery() throws Exception { - testPointFieldMultiValuedExactQuery("number_p_f_mv", toStringArray(getRandomFloats(20, false))); - testPointFieldMultiValuedExactQuery("number_p_f_ni_mv_dv", toStringArray(getRandomFloats(20, false))); + String[] floats = toStringArray(getRandomFloats(20, false)); + testPointFieldMultiValuedExactQuery("number_p_f_mv", floats); + testPointFieldMultiValuedExactQuery("number_p_f_ni_mv_dv", floats); } @Test public void testFloatPointFieldMultiValuedNonSearchableExactQuery() throws Exception { - testPointFieldMultiValuedExactQuery("number_p_f_ni_mv", toStringArray(getRandomFloats(20, false)), false); - testPointFieldMultiValuedExactQuery("number_p_f_ni_ns_mv", toStringArray(getRandomFloats(20, false)), false); + String[] floats = toStringArray(getRandomFloats(20, false)); + testPointFieldMultiValuedExactQuery("number_p_f_ni_mv", floats, false); + testPointFieldMultiValuedExactQuery("number_p_f_ni_ns_mv", floats, false); } @Test public void testFloatPointFieldMultiValuedReturn() throws Exception { - testPointFieldMultiValuedReturn("number_p_f_mv", "float", getSequentialStringArrayWithDoubles(20)); - testPointFieldMultiValuedReturn("number_p_f_ni_mv_dv", "float", getSequentialStringArrayWithDoubles(20)); - testPointFieldMultiValuedReturn("number_p_f_dv_ns_mv", "float", getSequentialStringArrayWithDoubles(20)); + String[] floats = toStringArray(getRandomFloats(20, false)); + testPointFieldMultiValuedReturn("number_p_f_mv", "float", floats); + testPointFieldMultiValuedReturn("number_p_f_ni_mv_dv", "float", floats); + testPointFieldMultiValuedReturn("number_p_f_dv_ns_mv", "float", floats); } @Test public void testFloatPointFieldMultiValuedRangeQuery() throws Exception { - testPointFieldMultiValuedRangeQuery("number_p_f_mv", "float", getSequentialStringArrayWithDoubles(20)); - testPointFieldMultiValuedRangeQuery("number_p_f_ni_mv_dv", "float", getSequentialStringArrayWithDoubles(20)); - testPointFieldMultiValuedRangeQuery("number_p_f_mv_dv", "float", getSequentialStringArrayWithDoubles(20)); + String[] floats = toStringArray(getRandomFloats(20, false).stream().sorted().collect(Collectors.toList())); + testPointFieldMultiValuedRangeQuery("number_p_f_mv", "float", floats); + testPointFieldMultiValuedRangeQuery("number_p_f_ni_mv_dv", "float", floats); + testPointFieldMultiValuedRangeQuery("number_p_f_mv_dv", "float", floats); } @Test @@ -754,8 +1067,9 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testFloatPointFieldNotIndexed() throws Exception { - doTestFieldNotIndexed("number_p_f_ni", getSequentialStringArrayWithDoubles(10)); - doTestFieldNotIndexed("number_p_f_ni_mv", getSequentialStringArrayWithDoubles(10)); + String[] floats = toStringArray(getRandomFloats(10, false)); + doTestFieldNotIndexed("number_p_f_ni", floats); + doTestFieldNotIndexed("number_p_f_ni_mv", floats); } // Long @@ -780,8 +1094,10 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testLongPointFieldReturn() throws Exception { - testPointFieldReturn("number_p_l", "long", new String[]{"0", "-1", "2", "3", "43", "52", "-60", "74", "80", "99", String.valueOf(Long.MAX_VALUE)}); - testPointFieldReturn("number_p_l_dv_ns", "long", new String[]{"0", "-1", "2", "3", "43", "52", "-60", "74", "80", "99", String.valueOf(Long.MAX_VALUE)}); + int numValues = 10 * RANDOM_MULTIPLIER; + String[] longs = toStringArray(getRandomLongs(numValues, false)); + testPointFieldReturn("number_p_l", "long", longs); + testPointFieldReturn("number_p_l_dv_ns", "long", longs); } @Test @@ -793,9 +1109,10 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testLongPointFieldNonSearchableRangeQuery() throws Exception { - doTestPointFieldNonSearchableRangeQuery("number_p_l_ni", "3333333333"); - doTestPointFieldNonSearchableRangeQuery("number_p_l_ni_ns", "3333333333"); - doTestPointFieldNonSearchableRangeQuery("number_p_l_ni_ns_mv", "3333333333", "-4444444444"); + doTestPointFieldNonSearchableRangeQuery("number_p_l_ni", toStringArray(getRandomLongs(1, false))); + doTestPointFieldNonSearchableRangeQuery("number_p_l_ni_ns", toStringArray(getRandomLongs(1, false))); + int numValues = 2 * RANDOM_MULTIPLIER; + doTestPointFieldNonSearchableRangeQuery("number_p_l_ni_ns_mv", toStringArray(getRandomLongs(numValues, false))); } @Test @@ -828,8 +1145,8 @@ public class TestPointFields extends SolrTestCaseJ4 { for (String r : Arrays.asList("*_p_l_ni", "*_p_l_ni_ns")) { assertTrue(r, regexToTest.remove(r)); String field = r.replace("*", "number"); - doTestPointFieldSortError(field, "w/o docValues", "4234"); - doTestPointFieldFunctionQueryError(field, "w/o docValues", "4234"); + doTestPointFieldSortError(field, "w/o docValues", toStringArray(getRandomLongs(1, false))); + doTestPointFieldFunctionQueryError(field, "w/o docValues", toStringArray(getRandomLongs(1, false))); } for (String r : Arrays.asList("*_p_l_mv", "*_p_l_ni_mv", "*_p_l_ni_mv_dv", "*_p_l_ni_dv_ns_mv", @@ -838,10 +1155,11 @@ public class TestPointFields extends SolrTestCaseJ4 { "*_p_l_mv_sml", "*_p_l_mv_dv_sml", "*_p_l_ni_mv_dv_sml")) { assertTrue(r, regexToTest.remove(r)); String field = r.replace("*", "number"); - doTestPointFieldSortError(field, "multivalued", "4234"); - doTestPointFieldSortError(field, "multivalued", "4234", "66666666"); - doTestPointFieldFunctionQueryError(field, "multivalued", "4234"); - doTestPointFieldFunctionQueryError(field, "multivalued", "4234", "66666666"); + doTestPointFieldSortError(field, "multivalued", toStringArray(getRandomLongs(1, false))); + int numValues = 2 * RANDOM_MULTIPLIER; + doTestPointFieldSortError(field, "multivalued", toStringArray(getRandomLongs(numValues, false))); + doTestPointFieldFunctionQueryError(field, "multivalued", toStringArray(getRandomLongs(1, false))); + doTestPointFieldFunctionQueryError(field, "multivalued", toStringArray(getRandomLongs(numValues, false))); } assertEquals("Missing types in the test", Collections.emptySet(), regexToTest); @@ -857,41 +1175,116 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testLongPointFieldRangeFacet() throws Exception { - doTestIntPointFieldRangeFacet("number_p_l_dv", "number_p_l"); + String docValuesField = "number_p_l_dv"; + String nonDocValuesField = "number_p_l"; + int numValues = 10 * RANDOM_MULTIPLIER; + int numBuckets = numValues / 2; + List values; + List sortedValues; + long max; + do { + values = getRandomLongs(numValues, false); + sortedValues = values.stream().sorted().collect(Collectors.toList()); + } while ((max = sortedValues.get(sortedValues.size() - 1)) >= Long.MAX_VALUE - numValues); // leave room for rounding + long min = sortedValues.get(0); + BigInteger bigIntGap = BigInteger.valueOf(max + numValues).subtract(BigInteger.valueOf(min)) + .divide(BigInteger.valueOf(numBuckets)); + long gap = bigIntGap.longValueExact(); + int[] bucketCount = new int[numBuckets]; + int bucketNum = 0; + long minBucketVal = min; + // System.err.println("min:" + min + " max: " + max + " gap: " + gap); + // System.err.println("bucketNum: " + bucketNum + " minBucketVal: " + minBucketVal); + for (Long value : sortedValues) { + // System.err.println("value: " + value); + while (BigInteger.valueOf(value).subtract(BigInteger.valueOf(minBucketVal)).compareTo(bigIntGap) > 0) { + ++bucketNum; + minBucketVal += gap; + // System.err.println("bucketNum: " + bucketNum + " minBucketVal: " + minBucketVal); + } + ++bucketCount[bucketNum]; + } + + for (int i = 0 ; i < numValues ; i++) { + assertU(adoc("id", String.valueOf(i), docValuesField, String.valueOf(values.get(i)), nonDocValuesField, String.valueOf(values.get(i)))); + } + assertU(commit()); + + assertTrue(h.getCore().getLatestSchema().getField(docValuesField).hasDocValues()); + assertTrue(h.getCore().getLatestSchema().getField(docValuesField).getType() instanceof PointField); + String[] testStrings = new String[numBuckets + 1]; + testStrings[numBuckets] = "//*[@numFound='" + numValues + "']"; + minBucketVal = min; + for (int i = 0 ; i < numBuckets ; minBucketVal += gap, ++i) { + testStrings[i] = "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + + "']/lst[@name='counts']/int[@name='" + minBucketVal + "'][.='" + bucketCount[i] + "']"; + } + assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, "facet.range.start", String.valueOf(min), + "facet.range.end", String.valueOf(max), "facet.range.gap", String.valueOf(gap)), + testStrings); + assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, "facet.range.start", String.valueOf(min), + "facet.range.end", String.valueOf(max), "facet.range.gap", String.valueOf(gap), "facet.range.method", "dv"), + testStrings); + + assertFalse(h.getCore().getLatestSchema().getField(nonDocValuesField).hasDocValues()); + assertTrue(h.getCore().getLatestSchema().getField(nonDocValuesField).getType() instanceof PointField); + minBucketVal = min; + for (int i = 0 ; i < numBuckets ; minBucketVal += gap, ++i) { + testStrings[i] = "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + + "']/lst[@name='counts']/int[@name='" + minBucketVal + "'][.='" + bucketCount[i] + "']"; + } + // Range Faceting with method = filter should work + assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, "facet.range.start", String.valueOf(min), + "facet.range.end", String.valueOf(max), "facet.range.gap", String.valueOf(gap), "facet.range.method", "filter"), + testStrings); + // this should actually use filter method instead of dv + assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, "facet.range.start", String.valueOf(min), + "facet.range.end", String.valueOf(max), "facet.range.gap", String.valueOf(gap), "facet.range.method", "dv"), + testStrings); } @Test public void testLongPointStats() throws Exception { - testPointStats("number_p_l", "number_p_l_dv", new String[]{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}, - 0D, 9D, "10", "1", 0D); - testPointStats("number_p_l_mv", "number_p_l_mv_dv", new String[]{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}, - 0D, 9D, "10", "1", 0D); + int numValues = 10 * RANDOM_MULTIPLIER; + // don't produce numbers with exponents, since XPath comparison operators can't handle them + List values = getRandomLongs(numValues, false, 9999999L); + List sortedValues = values.stream().sorted().collect(Collectors.toList()); + double min = (double)sortedValues.get(0); + double max = (double)sortedValues.get(sortedValues.size() - 1); + + String[] valArray = toStringArray(values); + testPointStats("number_p_l", "number_p_l_dv", valArray, min, max, numValues, 1, 0D); + testPointStats("number_p_l", "number_p_l_mv_dv", valArray, min, max, numValues, 1, 0D); } @Test public void testLongPointFieldMultiValuedExactQuery() throws Exception { - testPointFieldMultiValuedExactQuery("number_p_l_mv", getSequentialStringArrayWithInts(20)); - testPointFieldMultiValuedExactQuery("number_p_l_ni_mv_dv", getSequentialStringArrayWithInts(20)); + String[] ints = toStringArray(getRandomInts(20, false)); + testPointFieldMultiValuedExactQuery("number_p_l_mv", ints); + testPointFieldMultiValuedExactQuery("number_p_l_ni_mv_dv", ints); } @Test public void testLongPointFieldMultiValuedNonSearchableExactQuery() throws Exception { - testPointFieldMultiValuedExactQuery("number_p_l_ni_mv", getSequentialStringArrayWithInts(20), false); - testPointFieldMultiValuedExactQuery("number_p_l_ni_ns_mv", getSequentialStringArrayWithInts(20), false); + String[] longs = toStringArray(getRandomLongs(20, false)); + testPointFieldMultiValuedExactQuery("number_p_l_ni_mv", longs, false); + testPointFieldMultiValuedExactQuery("number_p_l_ni_ns_mv", longs, false); } @Test public void testLongPointFieldMultiValuedReturn() throws Exception { - testPointFieldMultiValuedReturn("number_p_l_mv", "long", getSequentialStringArrayWithInts(20)); - testPointFieldMultiValuedReturn("number_p_l_ni_mv_dv", "long", getSequentialStringArrayWithInts(20)); - testPointFieldMultiValuedReturn("number_p_l_dv_ns_mv", "long", getSequentialStringArrayWithInts(20)); + String[] longs = toStringArray(getRandomLongs(20, false)); + testPointFieldMultiValuedReturn("number_p_l_mv", "long", longs); + testPointFieldMultiValuedReturn("number_p_l_ni_mv_dv", "long", longs); + testPointFieldMultiValuedReturn("number_p_l_dv_ns_mv", "long", longs); } @Test public void testLongPointFieldMultiValuedRangeQuery() throws Exception { - testPointFieldMultiValuedRangeQuery("number_p_l_mv", "long", getSequentialStringArrayWithInts(20)); - testPointFieldMultiValuedRangeQuery("number_p_l_ni_mv_dv", "long", getSequentialStringArrayWithInts(20)); - testPointFieldMultiValuedRangeQuery("number_p_l_mv_dv", "long", getSequentialStringArrayWithInts(20)); + String[] longs = toStringArray(getRandomLongs(20, false).stream().sorted().collect(Collectors.toList())); + testPointFieldMultiValuedRangeQuery("number_p_l_mv", "long", longs); + testPointFieldMultiValuedRangeQuery("number_p_l_ni_mv_dv", "long", longs); + testPointFieldMultiValuedRangeQuery("number_p_l_mv_dv", "long", longs); } @Test @@ -902,7 +1295,77 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testLongPointFieldMultiValuedRangeFacet() throws Exception { - doTestIntPointFieldMultiValuedRangeFacet("number_p_l_mv_dv", "number_p_l_mv"); + String docValuesField = "number_p_l_mv_dv"; + String nonDocValuesField = "number_p_l_mv"; + int numValues = 20 * RANDOM_MULTIPLIER; + int numBuckets = numValues / 2; + List values; + List> sortedValues; + long max; + do { + values = getRandomLongs(numValues, false); + sortedValues = toAscendingPosVals(values, true); + } while ((max = sortedValues.get(sortedValues.size() - 1).val) >= Long.MAX_VALUE - numValues); // leave room for rounding + long min = sortedValues.get(0).val; + long gap = BigInteger.valueOf(max + numValues).subtract(BigInteger.valueOf(min)) + .divide(BigInteger.valueOf(numBuckets)).longValueExact(); + List> docIdBucket = new ArrayList<>(numBuckets); + for (int i = 0 ; i < numBuckets ; ++i) { + docIdBucket.add(new HashSet<>()); + } + int bucketNum = 0; + long minBucketVal = min; + for (PosVal value : sortedValues) { + while (value.val - minBucketVal >= gap) { + ++bucketNum; + minBucketVal += gap; + } + docIdBucket.get(bucketNum).add(value.pos / 2); // each doc gets two consecutive values + } + for (int i = 0 ; i < numValues ; i += 2) { + assertU(adoc("id", String.valueOf(i / 2), + docValuesField, String.valueOf(values.get(i)), + docValuesField, String.valueOf(values.get(i + 1)), + nonDocValuesField, String.valueOf(values.get(i)), + nonDocValuesField, String.valueOf(values.get(i + 1)))); + } + assertU(commit()); + assertTrue(h.getCore().getLatestSchema().getField(docValuesField).hasDocValues()); + assertTrue(h.getCore().getLatestSchema().getField(docValuesField).getType() instanceof PointField); + String[] testStrings = new String[numBuckets + 1]; + testStrings[numBuckets] = "//*[@numFound='" + (numValues / 2) + "']"; + minBucketVal = min; + for (int i = 0 ; i < numBuckets ; minBucketVal += gap, ++i) { + testStrings[i] = "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + + "']/lst[@name='counts']/int[@name='" + minBucketVal + "'][.='" + docIdBucket.get(i).size() + "']"; + } + + assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, + "facet.range.start", String.valueOf(min), "facet.range.end", String.valueOf(max), + "facet.range.gap", String.valueOf(gap), "indent", "on"), + testStrings); + assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, + "facet.range.start", String.valueOf(min), "facet.range.end", String.valueOf(max), + "facet.range.gap", String.valueOf(gap), "facet.range.method", "dv", "indent", "on"), + testStrings); + + assertFalse(h.getCore().getLatestSchema().getField(nonDocValuesField).hasDocValues()); + assertTrue(h.getCore().getLatestSchema().getField(nonDocValuesField).getType() instanceof PointField); + minBucketVal = min; + for (int i = 0 ; i < numBuckets ; minBucketVal += gap, ++i) { + testStrings[i] = "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + + "']/lst[@name='counts']/int[@name='" + minBucketVal + "'][.='" + docIdBucket.get(i).size() + "']"; + } + // Range Faceting with method = filter should work + assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, + "facet.range.start", String.valueOf(min), "facet.range.end", String.valueOf(max), + "facet.range.gap", String.valueOf(gap), "facet.range.method", "filter", "indent", "on"), + testStrings); + // this should actually use filter method instead of dv + assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, + "facet.range.start", String.valueOf(min), "facet.range.end", String.valueOf(max), + "facet.range.gap", String.valueOf(gap), "facet.range.method", "dv", "indent", "on"), + testStrings); } @Test @@ -939,8 +1402,9 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testLongPointFieldNotIndexed() throws Exception { - doTestFieldNotIndexed("number_p_l_ni", getSequentialStringArrayWithInts(10)); - doTestFieldNotIndexed("number_p_l_ni_mv", getSequentialStringArrayWithInts(10)); + String[] longs = toStringArray(getRandomLongs(10, false)); + doTestFieldNotIndexed("number_p_l_ni", longs); + doTestFieldNotIndexed("number_p_l_ni_mv", longs); } // Date @@ -964,12 +1428,10 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testDatePointFieldReturn() throws Exception { - testPointFieldReturn("number_p_dt", "date", - new String[]{"1995-12-31T23:59:59Z", "1994-02-28T23:59:59Z", - "2015-12-31T23:59:59Z", "2000-10-31T23:59:59Z", "1999-12-31T12:59:59Z"}); - testPointFieldReturn("number_p_dt_dv_ns", "date", - new String[]{"1995-12-31T23:59:59Z", "1994-02-28T23:59:59Z", - "2015-12-31T23:59:59Z", "2000-10-31T23:59:59Z", "1999-12-31T12:59:59Z"}); + int numValues = 10 * RANDOM_MULTIPLIER; + String[] dates = toStringArray(getRandomInstants(numValues, false)); + testPointFieldReturn("number_p_dt", "date", dates); + testPointFieldReturn("number_p_dt_dv_ns", "date", dates); } @Test @@ -980,9 +1442,10 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testDatePointFieldNonSearchableRangeQuery() throws Exception { - doTestPointFieldNonSearchableRangeQuery("number_p_dt_ni", "1995-12-31T23:59:59Z"); - doTestPointFieldNonSearchableRangeQuery("number_p_dt_ni_ns", "1995-12-31T23:59:59Z"); - doTestPointFieldNonSearchableRangeQuery("number_p_dt_ni_ns_mv", "1995-12-31T23:59:59Z", "2000-10-31T23:59:59Z"); + doTestPointFieldNonSearchableRangeQuery("number_p_dt_ni", toStringArray(getRandomInstants(1, false))); + doTestPointFieldNonSearchableRangeQuery("number_p_dt_ni_ns", toStringArray(getRandomInstants(1, false))); + int numValues = 2 * RANDOM_MULTIPLIER; + doTestPointFieldNonSearchableRangeQuery("number_p_dt_ni_ns_mv", toStringArray(getRandomInstants(numValues, false))); } @Test @@ -1037,12 +1500,147 @@ public class TestPointFields extends SolrTestCaseJ4 { testPointFieldFacetField("number_p_dt", "number_p_dt_dv", getSequentialStringArrayWithDates(10)); clearIndex(); assertU(commit()); - testPointFieldFacetField("number_p_dt", "number_p_dt_dv", getSequentialStringArrayWithDates(10)); + testPointFieldFacetField("number_p_dt", "number_p_dt_dv", toStringArray(getRandomInstants(10, false))); } + private static class DateGapCeiling { + String calendarUnit = "MILLIS"; + long inCalendarUnits; + boolean negative = false; + + /** Maximize calendar unit size given initialGapMillis; performs ceiling on each conversion */ + DateGapCeiling(long initialGapMillis) { + negative = initialGapMillis < 0; + inCalendarUnits = Math.abs(initialGapMillis); + if (inCalendarUnits >= 1000L) { + calendarUnit = "SECS"; + inCalendarUnits = (inCalendarUnits + 999L) / 1000L; + if (inCalendarUnits >= 60L) { + calendarUnit = "MINUTES"; + inCalendarUnits = (inCalendarUnits + 59L) / 60L; + if (inCalendarUnits >= 60L) { + calendarUnit = "HOURS"; + inCalendarUnits = (inCalendarUnits + 59L) / 60L; + if (inCalendarUnits >= 24L) { + calendarUnit = "DAYS"; + inCalendarUnits = (inCalendarUnits + 23L) / 24L; + if (inCalendarUnits >= 12L) { + calendarUnit = "MONTHS"; + inCalendarUnits = (inCalendarUnits + 11L) / 12L; + if ((inCalendarUnits * 16) >= 487) { // 487 = 365.25 / 12 * 16 (365.25 days/year, -ish) + calendarUnit = "YEARS"; + inCalendarUnits = (16L * inCalendarUnits + 486) / 487L; + } + } + } + } + } + } + } + @Override + public String toString() { + return (negative ? "-" : "+") + inCalendarUnits + calendarUnit; + } + + public long addTo(long millis) { // Instant.plus() doesn't work with estimated durations (MONTHS and YEARS) + LocalDateTime time = LocalDateTime.ofInstant(Instant.ofEpochMilli(millis), ZoneOffset.ofHours(0)); + if (negative) { + time = time.minus(inCalendarUnits, DateMathParser.CALENDAR_UNITS.get(calendarUnit)); + } else { + time = time.plus(inCalendarUnits, DateMathParser.CALENDAR_UNITS.get(calendarUnit)); + } + return time.atZone(ZoneOffset.ofHours(0)).toInstant().toEpochMilli(); + } + } + @Test public void testDatePointFieldRangeFacet() throws Exception { - doTestDatePointFieldRangeFacet("number_p_dt_dv", "number_p_dt"); + String docValuesField = "number_p_dt_dv"; + String nonDocValuesField = "number_p_dt"; + int numValues = 10 * RANDOM_MULTIPLIER; + int numBuckets = numValues / 2; + List values, sortedValues; + long min, max; + DateGapCeiling gap; + do { + values = getRandomLongs(numValues, false, MAX_DATE_EPOCH_MILLIS); + sortedValues = values.stream().sorted().collect(Collectors.toList()); + min = sortedValues.get(0); + max = sortedValues.get(sortedValues.size() - 1); + } while (max > MAX_DATE_EPOCH_MILLIS || min <= MIN_DATE_EPOCH_MILLIS); + long initialGap = BigInteger.valueOf(max).subtract(BigInteger.valueOf(min)) + .divide(BigInteger.valueOf(numBuckets)).longValueExact(); + gap = new DateGapCeiling(BigInteger.valueOf(max + initialGap).subtract(BigInteger.valueOf(min)) // padding for rounding + .divide(BigInteger.valueOf(numBuckets)).longValueExact()); + int[] bucketCount = new int[numBuckets]; + int bucketNum = 0; + long minBucketVal = min; + // System.err.println("min:" + Instant.ofEpochMilli(min) + " max: " + Instant.ofEpochMilli(max) + " gap: " + gap); + // System.err.println("bucketNum: " + bucketNum + " minBucketVal: " + Instant.ofEpochMilli(minBucketVal)); + for (long value : sortedValues) { + // System.err.println("value: " + Instant.ofEpochMilli(value)); + while (value >= gap.addTo(minBucketVal)) { + ++bucketNum; + minBucketVal = gap.addTo(minBucketVal); + // System.err.println("bucketNum: " + bucketNum + " minBucketVal: " + Instant.ofEpochMilli(minBucketVal)); + } + ++bucketCount[bucketNum]; + } + + for (int i = 0 ; i < numValues ; i++) { + assertU(adoc("id", String.valueOf(i), docValuesField, Instant.ofEpochMilli(values.get(i)).toString(), + nonDocValuesField, Instant.ofEpochMilli(values.get(i)).toString())); + } + assertU(commit()); + + assertTrue(h.getCore().getLatestSchema().getField(docValuesField).hasDocValues()); + assertTrue(h.getCore().getLatestSchema().getField(docValuesField).getType() instanceof PointField); + String[] testStrings = new String[numBuckets + 1]; + testStrings[numBuckets] = "//*[@numFound='" + numValues + "']"; + minBucketVal = min; + for (int i = 0 ; i < numBuckets ; ++i) { + testStrings[i] = "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + + "']/lst[@name='counts']/int[@name='" + Instant.ofEpochMilli(minBucketVal).toString() + + "'][.='" + bucketCount[i] + "']"; + minBucketVal = gap.addTo(minBucketVal); + } + long maxPlusGap = gap.addTo(max); + assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, + "facet.range.start", Instant.ofEpochMilli(min).toString(), + "facet.range.end", Instant.ofEpochMilli(maxPlusGap).toString(), + "facet.range.gap", gap.toString()), + testStrings); + assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, + "facet.range.start", Instant.ofEpochMilli(min).toString(), + "facet.range.end", Instant.ofEpochMilli(maxPlusGap).toString(), + "facet.range.gap", gap.toString(), + "facet.range.method", "dv"), + testStrings); + + assertFalse(h.getCore().getLatestSchema().getField(nonDocValuesField).hasDocValues()); + assertTrue(h.getCore().getLatestSchema().getField(nonDocValuesField).getType() instanceof PointField); + minBucketVal = min; + for (int i = 0 ; i < numBuckets ; ++i) { + testStrings[i] = "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + + "']/lst[@name='counts']/int[@name='" + Instant.ofEpochMilli(minBucketVal).toString() + + "'][.='" + bucketCount[i] + "']"; + minBucketVal = gap.addTo(minBucketVal); + } + maxPlusGap = gap.addTo(max); + // Range Faceting with method = filter should work + assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, + "facet.range.start", Instant.ofEpochMilli(min).toString(), + "facet.range.end", Instant.ofEpochMilli(maxPlusGap).toString(), + "facet.range.gap", gap.toString(), + "facet.range.method", "filter"), + testStrings); + // this should actually use filter method instead of dv + assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, + "facet.range.start", Instant.ofEpochMilli(min).toString(), + "facet.range.end", Instant.ofEpochMilli(maxPlusGap).toString(), + "facet.range.gap", gap.toString(), + "facet.range.method", "dv"), + testStrings); } @Test @@ -1053,27 +1651,31 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testDatePointFieldMultiValuedExactQuery() throws Exception { - testPointFieldMultiValuedExactQuery("number_p_dt_mv", getSequentialStringArrayWithDates(20)); - testPointFieldMultiValuedExactQuery("number_p_dt_ni_mv_dv", getSequentialStringArrayWithDates(20)); + String[] dates = toStringArray(getRandomInstants(20, false)); + testPointFieldMultiValuedExactQuery("number_p_dt_mv", dates); + testPointFieldMultiValuedExactQuery("number_p_dt_ni_mv_dv", dates); } @Test public void testDatePointFieldMultiValuedNonSearchableExactQuery() throws Exception { - testPointFieldMultiValuedExactQuery("number_p_dt_ni_mv", getSequentialStringArrayWithDates(20), false); - testPointFieldMultiValuedExactQuery("number_p_dt_ni_ns_mv", getSequentialStringArrayWithDates(20), false); + String[] dates = toStringArray(getRandomInstants(20, false)); + testPointFieldMultiValuedExactQuery("number_p_dt_ni_mv", dates, false); + testPointFieldMultiValuedExactQuery("number_p_dt_ni_ns_mv", dates, false); } @Test public void testDatePointFieldMultiValuedReturn() throws Exception { - testPointFieldMultiValuedReturn("number_p_dt_mv", "date", getSequentialStringArrayWithDates(20)); - testPointFieldMultiValuedReturn("number_p_dt_ni_mv_dv", "date", getSequentialStringArrayWithDates(20)); - testPointFieldMultiValuedReturn("number_p_dt_dv_ns_mv", "date", getSequentialStringArrayWithDates(20)); + String[] dates = toStringArray(getRandomInstants(20, false)); + testPointFieldMultiValuedReturn("number_p_dt_mv", "date", dates); + testPointFieldMultiValuedReturn("number_p_dt_ni_mv_dv", "date", dates); + testPointFieldMultiValuedReturn("number_p_dt_dv_ns_mv", "date", dates); } @Test public void testDatePointFieldMultiValuedRangeQuery() throws Exception { - testPointFieldMultiValuedRangeQuery("number_p_dt_mv", "date", getSequentialStringArrayWithDates(20)); - testPointFieldMultiValuedRangeQuery("number_p_dt_ni_mv_dv", "date", getSequentialStringArrayWithDates(20)); + String[] dates = toStringArray(getRandomInstants(20, false).stream().sorted().collect(Collectors.toList())); + testPointFieldMultiValuedRangeQuery("number_p_dt_mv", "date", dates); + testPointFieldMultiValuedRangeQuery("number_p_dt_ni_mv_dv", "date", dates); } @Test @@ -1089,7 +1691,8 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testDatePointMultiValuedFunctionQuery() throws Exception { - testPointMultiValuedFunctionQuery("number_p_dt_mv", "number_p_dt_mv_dv", "date", getSequentialStringArrayWithDates(20)); + String[] dates = toStringArray(getRandomInstants(20, false).stream().sorted().collect(Collectors.toList())); + testPointMultiValuedFunctionQuery("number_p_dt_mv", "number_p_dt_mv_dv", "date", dates); } @Test @@ -1097,9 +1700,9 @@ public class TestPointFields extends SolrTestCaseJ4 { if (!Boolean.getBoolean("enable.update.log")) { return; } - testDatePointFieldsAtomicUpdates("number_p_dt", "date"); - testDatePointFieldsAtomicUpdates("number_p_dt_dv", "date"); - testDatePointFieldsAtomicUpdates("number_p_dt_dv_ns", "date"); + testDatePointFieldsAtomicUpdates("number_p_dt"); + testDatePointFieldsAtomicUpdates("number_p_dt_dv"); + testDatePointFieldsAtomicUpdates("number_p_dt_dv_ns"); } @Test @@ -1107,9 +1710,9 @@ public class TestPointFields extends SolrTestCaseJ4 { if (!Boolean.getBoolean("enable.update.log")) { return; } - testMultiValuedDatePointFieldsAtomicUpdates("number_p_dt_mv", "date"); - testMultiValuedDatePointFieldsAtomicUpdates("number_p_dt_ni_mv_dv", "date"); - testMultiValuedDatePointFieldsAtomicUpdates("number_p_dt_dv_ns_mv", "date"); + testMultiValuedDatePointFieldsAtomicUpdates("number_p_dt_mv"); + testMultiValuedDatePointFieldsAtomicUpdates("number_p_dt_ni_mv_dv"); + testMultiValuedDatePointFieldsAtomicUpdates("number_p_dt_dv_ns_mv"); } @Test @@ -1122,45 +1725,59 @@ public class TestPointFields extends SolrTestCaseJ4 { @Test public void testDatePointFieldNotIndexed() throws Exception { - doTestFieldNotIndexed("number_p_dt_ni", getSequentialStringArrayWithDates(10)); - doTestFieldNotIndexed("number_p_dt_ni_mv", getSequentialStringArrayWithDates(10)); + String[] dates = toStringArray(getRandomInstants(10, false)); + doTestFieldNotIndexed("number_p_dt_ni", dates); + doTestFieldNotIndexed("number_p_dt_ni_mv", dates); } @Test public void testIndexOrDocValuesQuery() throws Exception { - String[] fieldTypeNames = new String[]{"_p_i", "_p_l", "_p_d", "_p_f"}; - FieldType[] fieldTypes = new FieldType[]{new IntPointField(), new LongPointField(), new DoublePointField(), new FloatPointField()}; - assert fieldTypeNames.length == fieldTypes.length; + String[] fieldTypeNames = new String[] { "_p_i", "_p_l", "_p_d", "_p_f", "_p_dt" }; + FieldType[] fieldTypes = new FieldType[] + { new IntPointField(), new LongPointField(), new DoublePointField(), new FloatPointField(), new DatePointField() }; + String[] ints = toStringArray(getRandomInts(2, false).stream().sorted().collect(Collectors.toList())); + String[] longs = toStringArray(getRandomLongs(2, false).stream().sorted().collect(Collectors.toList())); + String[] doubles = toStringArray(getRandomDoubles(2, false).stream().sorted().collect(Collectors.toList())); + String[] floats = toStringArray(getRandomFloats(2, false).stream().sorted().collect(Collectors.toList())); + String[] dates = toStringArray(getRandomInstants(2, false).stream().sorted().collect(Collectors.toList())); + String[] min = new String[] { ints[0], longs[0], doubles[0], floats[0], dates[0] }; + String[] max = new String[] { ints[1], longs[1], doubles[1], floats[1], dates[1] }; + assert fieldTypeNames.length == fieldTypes.length + && fieldTypeNames.length == max.length + && fieldTypeNames.length == min.length; for (int i = 0; i < fieldTypeNames.length; i++) { SchemaField fieldIndexed = h.getCore().getLatestSchema().getField("foo_" + fieldTypeNames[i]); SchemaField fieldIndexedAndDv = h.getCore().getLatestSchema().getField("foo_" + fieldTypeNames[i] + "_dv"); SchemaField fieldIndexedMv = h.getCore().getLatestSchema().getField("foo_" + fieldTypeNames[i] + "_mv"); SchemaField fieldIndexedAndDvMv = h.getCore().getLatestSchema().getField("foo_" + fieldTypeNames[i] + "_mv_dv"); - assertTrue(fieldTypes[i].getRangeQuery(null, fieldIndexed, "0", "10", true, true) instanceof PointRangeQuery); - assertTrue(fieldTypes[i].getRangeQuery(null, fieldIndexedAndDv, "0", "10", true, true) instanceof IndexOrDocValuesQuery); - assertTrue(fieldTypes[i].getRangeQuery(null, fieldIndexedMv, "0", "10", true, true) instanceof PointRangeQuery); - assertTrue(fieldTypes[i].getRangeQuery(null, fieldIndexedAndDvMv, "0", "10", true, true) instanceof IndexOrDocValuesQuery); - assertTrue(fieldTypes[i].getFieldQuery(null, fieldIndexed, "0") instanceof PointRangeQuery); - assertTrue(fieldTypes[i].getFieldQuery(null, fieldIndexedAndDv, "0") instanceof IndexOrDocValuesQuery); - assertTrue(fieldTypes[i].getFieldQuery(null, fieldIndexedMv, "0") instanceof PointRangeQuery); - assertTrue(fieldTypes[i].getFieldQuery(null, fieldIndexedAndDvMv, "0") instanceof IndexOrDocValuesQuery); + assertTrue(fieldTypes[i].getRangeQuery(null, fieldIndexed, min[i], max[i], true, true) instanceof PointRangeQuery); + assertTrue(fieldTypes[i].getRangeQuery(null, fieldIndexedAndDv, min[i], max[i], true, true) instanceof IndexOrDocValuesQuery); + assertTrue(fieldTypes[i].getRangeQuery(null, fieldIndexedMv, min[i], max[i], true, true) instanceof PointRangeQuery); + assertTrue(fieldTypes[i].getRangeQuery(null, fieldIndexedAndDvMv, min[i], max[i], true, true) instanceof IndexOrDocValuesQuery); + assertTrue(fieldTypes[i].getFieldQuery(null, fieldIndexed, min[i]) instanceof PointRangeQuery); + assertTrue(fieldTypes[i].getFieldQuery(null, fieldIndexedAndDv, min[i]) instanceof IndexOrDocValuesQuery); + assertTrue(fieldTypes[i].getFieldQuery(null, fieldIndexedMv, min[i]) instanceof PointRangeQuery); + assertTrue(fieldTypes[i].getFieldQuery(null, fieldIndexedAndDvMv, min[i]) instanceof IndexOrDocValuesQuery); } } public void testInternals() throws IOException { - String[] types = new String[]{"i", "l", "f", "d"}; + String[] types = new String[]{"i", "l", "f", "d", "dt"}; + String[][] values = new String[][] { + toStringArray(getRandomInts(10, false)), + toStringArray(getRandomLongs(10, false)), + toStringArray(getRandomFloats(10, false)), + toStringArray(getRandomDoubles(10, false)), + toStringArray(getRandomInstants(10, false)) + }; + assertEquals(types.length, values.length); Set typesTested = new HashSet<>(); - for (String type:types) { + for (int i = 0 ; i < types.length ; ++i) { for (String suffix:FIELD_SUFFIXES) { - doTestInternals("number_p_" + type + suffix, getSequentialStringArrayWithInts(10)); - typesTested.add("*_p_" + type + suffix); + doTestInternals("number_p_" + types[i] + suffix, values[i]); + typesTested.add("*_p_" + types[i] + suffix); } } - for (String suffix:FIELD_SUFFIXES) { - doTestInternals("number_p_dt" + suffix, getSequentialStringArrayWithDates(10)); - typesTested.add("*_p_dt" + suffix); - } - assertEquals("Missing types in the test", dynFieldRegexesForType(PointField.class), typesTested); } @@ -1197,24 +1814,43 @@ public class TestPointFields extends SolrTestCaseJ4 { } private List getRandomDoubles(int length, boolean missingVals) { - return getRandomList(length, missingVals, - () -> random().nextDouble() * Double.MAX_VALUE * (random().nextBoolean() ? 1.D : -1.D)); + return getRandomList(length, missingVals, () -> { + Double d = Double.NaN; + while (d.isNaN()) { + d = Double.longBitsToDouble(random().nextLong()); + } + return d; + }); } private List getRandomFloats(int length, boolean missingVals) { - return getRandomList(length, missingVals, - () -> random().nextFloat() * Float.MAX_VALUE * (random().nextBoolean() ? 1.f : -1.f)); + return getRandomList(length, missingVals, () -> { + Float f = Float.NaN; + while (f.isNaN()) { + f = Float.intBitsToFloat(random().nextInt()); + } + return f; + }); + } + + private List getRandomInts(int length, boolean missingVals, int bound) { + return getRandomList(length, missingVals, () -> random().nextInt() % bound); } private List getRandomInts(int length, boolean missingVals) { return getRandomList(length, missingVals, () -> random().nextInt()); } - private List getRandomLongs(int length, boolean missingVals){ + private List getRandomLongs(int length, boolean missingVals, long bound) { + assert bound > 0L; + return getRandomList(length, missingVals, () -> random().nextLong() % bound); // see Random.nextInt(int bound) + } + + private List getRandomLongs(int length, boolean missingVals) { return getRandomList(length, missingVals, () -> random().nextLong()); } - private List getRandomInstants(int length, boolean missingVals){ + private List getRandomInstants(int length, boolean missingVals) { return getRandomList(length, missingVals, () -> Instant.ofEpochMilli(random().nextLong())); } @@ -1272,47 +1908,46 @@ public class TestPointFields extends SolrTestCaseJ4 { doTestIntPointFieldExactQuery(field, testLong, true); } + private String getTestString(boolean searchable, int numFound) { + return "//*[@numFound='" + (searchable ? Integer.toString(numFound) : "0") + "']"; + } + /** * @param field the field to use for indexing and searching against * @param testLong set to true if "field" is expected to support long values, false if only integers * @param searchable set to true if searches against "field" should succeed, false if field is only stored and searches should always get numFound=0 */ private void doTestIntPointFieldExactQuery(final String field, final boolean testLong, final boolean searchable) throws Exception { - final String MATCH_ONE = "//*[@numFound='" + (searchable ? "1" : "0") + "']"; - final String MATCH_TWO = "//*[@numFound='" + (searchable ? "2" : "0") + "']"; - - for (int i=0; i < 10; i++) { - assertU(adoc("id", String.valueOf(i), field, String.valueOf(i+1))); + int numValues = 10 * RANDOM_MULTIPLIER; + Map randCount = new HashMap<>(numValues); + String[] rand = testLong ? toStringArray(getRandomLongs(numValues, false)) + : toStringArray(getRandomInts(numValues, false)); + for (int i = 0 ; i < numValues ; i++) { + randCount.merge(rand[i], 1, (a, b) -> a + b); // count unique values + assertU(adoc("id", String.valueOf(i), field, rand[i])); } assertU(commit()); - for (int i = 0; i < 10; i++) { - assertQ(req("q", field + ":"+(i+1), "fl", "id, " + field), - MATCH_ONE); + + for (int i = 0 ; i < numValues ; i++) { + assertQ(req("q", field + ":" + (rand[i].startsWith("-") ? "\\" : "") + rand[i], + "fl", "id," + field), getTestString(searchable, randCount.get(rand[i]))); } - for (int i = 0; i < 10; i++) { - assertQ(req("debug", "true", "q", field + ":" + (i+1) + " OR " + field + ":" + ((i+1)%10 + 1)), MATCH_TWO); + StringBuilder builder = new StringBuilder(); + for (String value : randCount.keySet()) { + if (builder.length() != 0) { + builder.append(" OR "); + } + if (value.startsWith("-")) { + builder.append("\\"); // escape negative sign + } + builder.append(value); } + assertQ(req("debug", "true", "q", field + ":(" + builder.toString() + ")"), getTestString(searchable, numValues)); assertU(adoc("id", String.valueOf(Integer.MAX_VALUE), field, String.valueOf(Integer.MAX_VALUE))); assertU(commit()); - assertQ(req("q", field + ":"+Integer.MAX_VALUE, "fl", "id, " + field), - MATCH_ONE); - - if (testLong) { - for (long i = (long)Integer.MAX_VALUE; i < (long)Integer.MAX_VALUE + 10; i++) { - assertU(adoc("id", String.valueOf(i), field, String.valueOf(i+1))); - } - assertU(commit()); - for (long i = (long)Integer.MAX_VALUE; i < (long)Integer.MAX_VALUE + 10; i++) { - assertQ(req("q", field + ":"+(i+1), "fl", "id, " + field), - MATCH_ONE); - } - assertU(adoc("id", String.valueOf(Long.MAX_VALUE), field, String.valueOf(Long.MAX_VALUE))); - assertU(commit()); - assertQ(req("q", field + ":"+Long.MAX_VALUE, "fl", "id, " + field), - MATCH_ONE); - } + assertQ(req("q", field + ":"+Integer.MAX_VALUE, "fl", "id, " + field), getTestString(searchable, 1)); clearIndex(); assertU(commit()); @@ -1489,54 +2124,6 @@ public class TestPointFields extends SolrTestCaseJ4 { SolrException.ErrorCode.BAD_REQUEST); } - private void doTestIntPointFieldRangeFacet(String docValuesField, String nonDocValuesField) throws Exception { - for (int i = 0; i < 10; i++) { - assertU(adoc("id", String.valueOf(i), docValuesField, String.valueOf(i), nonDocValuesField, String.valueOf(i))); - } - assertU(commit()); - assertTrue(h.getCore().getLatestSchema().getField(docValuesField).hasDocValues()); - assertTrue(h.getCore().getLatestSchema().getField(docValuesField).getType() instanceof PointField); - assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, "facet.range.start", "-10", "facet.range.end", "10", "facet.range.gap", "2"), - "//*[@numFound='10']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='2'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='4'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='6'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='8'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='-10'][.='0']"); - - assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, "facet.range.start", "-10", "facet.range.end", "10", "facet.range.gap", "2", "facet.range.method", "dv"), - "//*[@numFound='10']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='2'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='4'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='6'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='8'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='-10'][.='0']"); - - assertFalse(h.getCore().getLatestSchema().getField(nonDocValuesField).hasDocValues()); - assertTrue(h.getCore().getLatestSchema().getField(nonDocValuesField).getType() instanceof PointField); - // Range Faceting with method = filter should work - assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, "facet.range.start", "-10", "facet.range.end", "10", "facet.range.gap", "2", "facet.range.method", "filter"), - "//*[@numFound='10']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='2'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='4'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='6'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='8'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='-10'][.='0']"); - - // this should actually use filter method instead of dv - assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, "facet.range.start", "-10", "facet.range.end", "10", "facet.range.gap", "2", "facet.range.method", "dv"), - "//*[@numFound='10']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='2'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='4'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='6'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='8'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='-10'][.='0']"); - } - private void doTestIntPointFunctionQuery(String field, String type) throws Exception { for (int i = 9; i >= 0; i--) { assertU(adoc("id", String.valueOf(i), field, String.valueOf(i))); @@ -1600,7 +2187,7 @@ public class TestPointFields extends SolrTestCaseJ4 { } - private void testPointStats(String field, String dvField, String[] numbers, double min, double max, String count, String missing, double delta) { + private void testPointStats(String field, String dvField, String[] numbers, double min, double max, int count, int missing, double delta) { String minMin = String.valueOf(min - Math.abs(delta*min)); String maxMin = String.valueOf(min + Math.abs(delta*min)); String minMax = String.valueOf(max - Math.abs(delta*max)); @@ -1613,11 +2200,11 @@ public class TestPointFields extends SolrTestCaseJ4 { assertTrue(h.getCore().getLatestSchema().getField(dvField).hasDocValues()); assertTrue(h.getCore().getLatestSchema().getField(dvField).getType() instanceof PointField); assertQ(req("q", "*:*", "fl", "id, " + dvField, "stats", "true", "stats.field", dvField), - "//*[@numFound='11']", - "//lst[@name='stats']/lst[@name='stats_fields']/lst[@name='" + dvField+ "']/double[@name='min'][.>='" + minMin + "']", - "//lst[@name='stats']/lst[@name='stats_fields']/lst[@name='" + dvField+ "']/double[@name='min'][.<='" + maxMin+ "']", - "//lst[@name='stats']/lst[@name='stats_fields']/lst[@name='" + dvField+ "']/double[@name='max'][.>='" + minMax + "']", - "//lst[@name='stats']/lst[@name='stats_fields']/lst[@name='" + dvField+ "']/double[@name='max'][.<='" + maxMax + "']", + "//*[@numFound='" + (numbers.length + 1) + "']", + "//lst[@name='stats']/lst[@name='stats_fields']/lst[@name='" + dvField+ "']/double[@name='min'][.>=" + minMin + "]", + "//lst[@name='stats']/lst[@name='stats_fields']/lst[@name='" + dvField+ "']/double[@name='min'][.<=" + maxMin+ "]", + "//lst[@name='stats']/lst[@name='stats_fields']/lst[@name='" + dvField+ "']/double[@name='max'][.>=" + minMax + "]", + "//lst[@name='stats']/lst[@name='stats_fields']/lst[@name='" + dvField+ "']/double[@name='max'][.<=" + maxMax + "]", "//lst[@name='stats']/lst[@name='stats_fields']/lst[@name='" + dvField+ "']/long[@name='count'][.='" + count + "']", "//lst[@name='stats']/lst[@name='stats_fields']/lst[@name='" + dvField+ "']/long[@name='missing'][.='" + missing + "']"); @@ -1652,8 +2239,9 @@ public class TestPointFields extends SolrTestCaseJ4 { assertU(adoc("id", String.valueOf(i), fieldName, numbers[i], fieldName, numbers[i+10])); } assertU(commit()); + FieldType type = h.getCore().getLatestSchema().getField(fieldName).getType(); for (int i = 0; i < 20; i++) { - if (h.getCore().getLatestSchema().getField(fieldName).getType() instanceof DatePointField) { + if (type instanceof DatePointField) { assertQ(req("q", fieldName + ":\"" + numbers[i] + "\""), MATCH_ONE); } else { @@ -1663,7 +2251,7 @@ public class TestPointFields extends SolrTestCaseJ4 { } for (int i = 0; i < 20; i++) { - if (h.getCore().getLatestSchema().getField(fieldName).getType() instanceof DatePointField) { + if (type instanceof DatePointField) { assertQ(req("q", fieldName + ":\"" + numbers[i] + "\"" + " OR " + fieldName + ":\"" + numbers[(i+1)%10]+"\""), MATCH_TWO); } else { @@ -1684,8 +2272,8 @@ public class TestPointFields extends SolrTestCaseJ4 { if (Boolean.getBoolean("enable.update.log")) { for (int i = 0; i < 10; i++) { assertQ(req("qt", "/get", "id", String.valueOf(i)), - "//doc/arr[@name='" + fieldName + "']/" + type + "[1][.='" + numbers[i] + "']", - "//doc/arr[@name='" + fieldName + "']/" + type + "[2][.='" + numbers[i+10] + "']", + "//doc/arr[@name='" + fieldName + "']/" + type + "[.='" + numbers[i] + "']", + "//doc/arr[@name='" + fieldName + "']/" + type + "[.='" + numbers[i+10] + "']", "count(//doc/arr[@name='" + fieldName + "']/" + type + ")=2"); } } @@ -1694,8 +2282,8 @@ public class TestPointFields extends SolrTestCaseJ4 { if (Boolean.getBoolean("enable.update.log")) { for (int i = 0; i < 10; i++) { assertQ(req("qt", "/get", "id", String.valueOf(i)), - "//doc/arr[@name='" + fieldName + "']/" + type + "[1][.='" + numbers[i] + "']", - "//doc/arr[@name='" + fieldName + "']/" + type + "[2][.='" + numbers[i+10] + "']", + "//doc/arr[@name='" + fieldName + "']/" + type + "[.='" + numbers[i] + "']", + "//doc/arr[@name='" + fieldName + "']/" + type + "[.='" + numbers[i+10] + "']", "count(//doc/arr[@name='" + fieldName + "']/" + type + ")=2"); } } @@ -1703,8 +2291,8 @@ public class TestPointFields extends SolrTestCaseJ4 { expected[0] = "//*[@numFound='10']"; for (int i = 1; i <= 10; i++) { // checks for each doc's two values aren't next to eachother in array, but that doesn't matter for correctness - expected[i] = "//result/doc[" + i + "]/arr[@name='" + fieldName + "']/" + type + "[1][.='" + numbers[i-1] + "']"; - expected[i+10] = "//result/doc[" + i + "]/arr[@name='" + fieldName + "']/" + type + "[2][.='" + numbers[i + 9] + "']"; + expected[i] = "//result/doc[" + i + "]/arr[@name='" + fieldName + "']/" + type + "[.='" + numbers[i-1] + "']"; + expected[i+10] = "//result/doc[" + i + "]/arr[@name='" + fieldName + "']/" + type + "[.='" + numbers[i + 9] + "']"; } assertQ(req("q", "*:*", "fl", "id, " + fieldName, "sort","id asc"), expected); } @@ -2059,42 +2647,42 @@ public class TestPointFields extends SolrTestCaseJ4 { } - private void doTestFloatPointFieldExactQuery(final String field) throws Exception { - doTestFloatPointFieldExactQuery(field, true); + private void doTestFloatPointFieldExactQuery(final String field, boolean testDouble) throws Exception { + doTestFloatPointFieldExactQuery(field, true, testDouble); } /** * @param field the field to use for indexing and searching against * @param searchable set to true if searches against "field" should succeed, false if field is only stored and searches should always get numFound=0 */ - private void doTestFloatPointFieldExactQuery(String field, final boolean searchable) throws Exception { - final String MATCH_ONE = "//*[@numFound='" + (searchable ? "1" : "0") + "']"; - final String MATCH_TWO = "//*[@numFound='" + (searchable ? "2" : "0") + "']"; - - for (int i=0; i < 10; i++) { - assertU(adoc("id", String.valueOf(i), field, String.valueOf(i + "." + i))); + private void doTestFloatPointFieldExactQuery(final String field, final boolean searchable, final boolean testDouble) + throws Exception { + int numValues = 10 * RANDOM_MULTIPLIER; + Map randCount = new HashMap<>(numValues); + String[] rand = testDouble ? toStringArray(getRandomDoubles(numValues, false)) + : toStringArray(getRandomFloats(numValues, false)); + for (int i = 0 ; i < numValues ; i++) { + randCount.merge(rand[i], 1, (a, b) -> a + b); // count unique values + assertU(adoc("id", String.valueOf(i), field, rand[i])); } assertU(commit()); - for (int i = 0; i < 9; i++) { - assertQ(req("q", field + ":"+(i+1) + "." + (i+1), "fl", "id, " + field), - MATCH_ONE); + + for (int i = 0 ; i < numValues ; i++) { + assertQ(req("q", field + ":" + (rand[i].startsWith("-") ? "\\" : "") + rand[i], + "fl", "id," + field), getTestString(searchable, randCount.get(rand[i]))); } - - for (int i = 0; i < 9; i++) { - String num1 = (i+1) + "." + (i+1); - String num2 = ((i+1)%9 + 1) + "." + ((i+1)%9 + 1); - assertQ(req("q", field + ":" + num1 + " OR " + field + ":" + num2), - MATCH_TWO); - } - - clearIndex(); - assertU(commit()); - for (int i = 0; i < atLeast(10); i++) { - float rand = random().nextFloat() * 10; - assertU(adoc("id", "random_number ", field, String.valueOf(rand))); //always the same id to override - assertU(commit()); - assertQ(req("q", field + ":" + rand, "fl", "id, " + field), - MATCH_ONE); + + StringBuilder builder = new StringBuilder(); + for (String value : randCount.keySet()) { + if (builder.length() != 0) { + builder.append(" OR "); + } + if (value.startsWith("-")) { + builder.append("\\"); // escape negative sign + } + builder.append(value); } + assertQ(req("debug", "true", "q", field + ":(" + builder.toString() + ")"), getTestString(searchable, numValues)); + clearIndex(); assertU(commit()); } @@ -2263,55 +2851,6 @@ public class TestPointFields extends SolrTestCaseJ4 { } } - private void doTestFloatPointFieldRangeFacet(String docValuesField, String nonDocValuesField) throws Exception { - - for (int i = 0; i < 10; i++) { - assertU(adoc("id", String.valueOf(i), docValuesField, String.format(Locale.ROOT, "%f", (float)i*1.1), nonDocValuesField, String.format(Locale.ROOT, "%f", (float)i*1.1))); - } - assertU(commit()); - assertTrue(h.getCore().getLatestSchema().getField(docValuesField).hasDocValues()); - assertTrue(h.getCore().getLatestSchema().getField(docValuesField).getType() instanceof PointField); - assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, "facet.range.start", "-10", "facet.range.end", "10", "facet.range.gap", "2"), - "//*[@numFound='10']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='0.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='2.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='4.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='6.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='8.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='-10.0'][.='0']"); - - assertQ(req("q", "*:*", "facet", "true", "facet.range", docValuesField, "facet.range.start", "-10", "facet.range.end", "10", "facet.range.gap", "2", "facet.range.method", "dv"), - "//*[@numFound='10']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='0.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='2.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='4.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='6.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='8.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='-10.0'][.='0']"); - - assertFalse(h.getCore().getLatestSchema().getField(nonDocValuesField).hasDocValues()); - assertTrue(h.getCore().getLatestSchema().getField(nonDocValuesField).getType() instanceof PointField); - // Range Faceting with method = filter should work - assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, "facet.range.start", "-10", "facet.range.end", "10", "facet.range.gap", "2", "facet.range.method", "filter"), - "//*[@numFound='10']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='0.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='2.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='4.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='6.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='8.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='-10.0'][.='0']"); - - // this should actually use filter method instead of dv - assertQ(req("q", "*:*", "facet", "true", "facet.range", nonDocValuesField, "facet.range.start", "-10", "facet.range.end", "10", "facet.range.gap", "2", "facet.range.method", "dv"), - "//*[@numFound='10']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='0.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='2.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='4.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='6.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='8.0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='-10.0'][.='0']"); - } - private void doTestFloatPointFunctionQuery(String field, String type) throws Exception { for (int i = 9; i >= 0; i--) { assertU(adoc("id", String.valueOf(i), field, String.format(Locale.ROOT, "%f", (float)i*1.1))); @@ -2478,80 +3017,6 @@ public class TestPointFields extends SolrTestCaseJ4 { "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='-10.0'][.='0']"); } - private void doTestIntPointFieldMultiValuedRangeFacet(String docValuesField, String nonDocValuesField) throws Exception { - for (int i = 0; i < 10; i++) { - assertU(adoc("id", String.valueOf(i), docValuesField, String.valueOf(i), docValuesField, String.valueOf(i + 10), - nonDocValuesField, String.valueOf(i), nonDocValuesField, String.valueOf(i + 10))); - } - assertU(commit()); - assertTrue(h.getCore().getLatestSchema().getField(docValuesField).hasDocValues()); - assertTrue(h.getCore().getLatestSchema().getField(docValuesField).getType() instanceof PointField); - assertQ(req("q", "*:*", "fl", "id", "facet", "true", "facet.range", docValuesField, "facet.range.start", "-10", "facet.range.end", "20", "facet.range.gap", "2"), - "//*[@numFound='10']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='2'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='4'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='6'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='8'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='10'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='12'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='14'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='16'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='18'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='-10'][.='0']"); - - assertQ(req("q", "*:*", "fl", "id", "facet", "true", "facet.range", docValuesField, "facet.range.start", "-10", "facet.range.end", "20", "facet.range.gap", "2", "facet.range.method", "dv"), - "//*[@numFound='10']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='2'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='4'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='6'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='8'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='10'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='12'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='14'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='16'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='18'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='-10'][.='0']"); - - assertQ(req("q", "*:*", "fl", "id", "facet", "true", "facet.range", docValuesField, "facet.range.start", "0", "facet.range.end", "20", "facet.range.gap", "100"), - "//*[@numFound='10']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + docValuesField + "']/lst[@name='counts']/int[@name='0'][.='10']"); - - assertFalse(h.getCore().getLatestSchema().getField(nonDocValuesField).hasDocValues()); - assertTrue(h.getCore().getLatestSchema().getField(nonDocValuesField).getType() instanceof PointField); - // Range Faceting with method = filter should work - assertQ(req("q", "*:*", "fl", "id", "facet", "true", "facet.range", nonDocValuesField, "facet.range.start", "-10", "facet.range.end", "20", "facet.range.gap", "2", "facet.range.method", "filter"), - "//*[@numFound='10']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='2'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='4'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='6'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='8'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='10'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='12'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='14'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='16'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='18'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='-10'][.='0']"); - - // this should actually use filter method instead of dv - assertQ(req("q", "*:*", "fl", "id", "facet", "true", "facet.range", nonDocValuesField, "facet.range.start", "-10", "facet.range.end", "20", "facet.range.gap", "2", "facet.range.method", "dv"), - "//*[@numFound='10']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='0'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='2'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='4'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='6'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='8'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='10'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='12'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='14'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='16'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='18'][.='2']", - "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='" + nonDocValuesField + "']/lst[@name='counts']/int[@name='-10'][.='0']"); - } - - private void doTestDatePointFieldExactQuery(final String field, final String baseDate) throws Exception { doTestDatePointFieldExactQuery(field, baseDate, true); } @@ -2840,7 +3305,7 @@ public class TestPointFields extends SolrTestCaseJ4 { assertTrue(h.getCore().getLatestSchema().getField(dvField).hasDocValues()); assertTrue(h.getCore().getLatestSchema().getField(dvField).getType() instanceof PointField); assertQ(req("q", "*:*", "fl", "id, " + dvField, "stats", "true", "stats.field", dvField), - "//*[@numFound='11']", + "//*[@numFound='" + (dates.length + 1) + "']", "//lst[@name='stats']/lst[@name='stats_fields']/lst[@name='" + dvField+ "']/date[@name='min'][.='" + dates[0] + "']", "//lst[@name='stats']/lst[@name='stats_fields']/lst[@name='" + dvField+ "']/date[@name='max'][.='" + dates[dates.length-1] + "']", "//lst[@name='stats']/lst[@name='stats_fields']/lst[@name='" + dvField+ "']/long[@name='count'][.='" + dates.length + "']", @@ -2854,61 +3319,73 @@ public class TestPointFields extends SolrTestCaseJ4 { SolrException.ErrorCode.BAD_REQUEST); } - private void testDatePointFieldsAtomicUpdates(String field, String type) throws Exception { - String date = "1995-01-10T10:59:10Z"; - assertU(adoc(sdoc("id", "1", field, date))); - assertU(commit()); - - assertQ(req("q", "id:1"), - "//result/doc[1]/" + type + "[@name='" + field + "'][.='"+date+"']"); - - assertU(adoc(sdoc("id", "1", field, ImmutableMap.of("set", date+"+2DAYS")))); - assertU(commit()); - - assertQ(req("q", "id:1"), - "//result/doc[1]/" + type + "[@name='" + field + "'][.='1995-01-12T10:59:10Z']"); - } - - private void testMultiValuedDatePointFieldsAtomicUpdates(String field, String type) throws Exception { - String date1 = "1995-01-10T10:59:10Z"; - String date2 = "1995-01-11T10:59:10Z"; - String date3 = "1995-01-12T10:59:10Z"; + private void testDatePointFieldsAtomicUpdates(String field) throws Exception { + long millis1 = random().nextLong() % MAX_DATE_EPOCH_MILLIS; + BigInteger bigMillis1 = BigInteger.valueOf(millis1); + long millis2; + BigInteger maxLong = BigInteger.valueOf(Long.MAX_VALUE); + DateGapCeiling gap; + for ( ; ; ) { + millis2 = random().nextLong() % MAX_DATE_EPOCH_MILLIS; + gap = new DateGapCeiling(millis2 - millis1); + millis2 = gap.addTo(millis1); // adjust millis2 to the closest +/-UNIT gap + break; + } + String date1 = Instant.ofEpochMilli(millis1).toString(); + String date2 = Instant.ofEpochMilli(millis2).toString(); assertU(adoc(sdoc("id", "1", field, date1))); assertU(commit()); assertQ(req("q", "id:1"), - "//result/doc[1]/arr[@name='" + field + "']/" + type + "[.='"+date1+"']", - "count(//result/doc[1]/arr[@name='" + field + "']/" + type + ")=1"); + "//result/doc[1]/date[@name='" + field + "'][.='" + date1 + "']"); - assertU(adoc(sdoc("id", "1", field, ImmutableMap.of("add", date2)))); + assertU(adoc(sdoc("id", "1", field, ImmutableMap.of("set", date1 + gap.toString())))); assertU(commit()); assertQ(req("q", "id:1"), - "//result/doc[1]/arr[@name='" + field + "']/" + type + "[.='"+date1+"']", - "//result/doc[1]/arr[@name='" + field + "']/" + type + "[.='"+date2+"']", - "count(//result/doc[1]/arr[@name='" + field + "']/" + type + ")=2"); + "//result/doc[1]/date[@name='" + field + "'][.='" + date2 + "']"); + } - assertU(adoc(sdoc("id", "1", field, ImmutableMap.of("remove", date1)))); + private void testMultiValuedDatePointFieldsAtomicUpdates(String field) throws Exception { + List datesList = getRandomLongs(3, false, MAX_DATE_EPOCH_MILLIS) + .stream().map(millis -> Instant.ofEpochMilli(millis).toString()).collect(Collectors.toList()); + String[] dates = datesList.toArray(new String[datesList.size()]); + assertU(adoc(sdoc("id", "1", field, dates[0]))); assertU(commit()); assertQ(req("q", "id:1"), - "//result/doc[1]/arr[@name='" + field + "']/" + type + "[.='"+date2+"']", - "count(//result/doc[1]/arr[@name='" + field + "']/" + type + ")=1"); + "//result/doc[1]/arr[@name='" + field + "']/date[.='" + dates[0] + "']", + "count(//result/doc[1]/arr[@name='" + field + "']/date)=1"); - assertU(adoc(sdoc("id", "1", field, ImmutableMap.of("set", ImmutableList.of(date1, date2, date3))))); + assertU(adoc(sdoc("id", "1", field, ImmutableMap.of("add", dates[1])))); assertU(commit()); assertQ(req("q", "id:1"), - "//result/doc[1]/arr[@name='" + field + "']/" + type + "[.='"+date1+"']", - "//result/doc[1]/arr[@name='" + field + "']/" + type + "[.='"+date2+"']", - "//result/doc[1]/arr[@name='" + field + "']/" + type + "[.='"+date3+"']", - "count(//result/doc[1]/arr[@name='" + field + "']/" + type + ")=3"); + "//result/doc[1]/arr[@name='" + field + "']/date[.='" + dates[0] + "']", + "//result/doc[1]/arr[@name='" + field + "']/date[.='" + dates[1] + "']", + "count(//result/doc[1]/arr[@name='" + field + "']/date)=2"); + + assertU(adoc(sdoc("id", "1", field, ImmutableMap.of("remove", dates[0])))); + assertU(commit()); + + assertQ(req("q", "id:1"), + "//result/doc[1]/arr[@name='" + field + "']/date[.='" + dates[1] + "']", + "count(//result/doc[1]/arr[@name='" + field + "']/date)=1"); + + assertU(adoc(sdoc("id", "1", field, ImmutableMap.of("set", datesList)))); + assertU(commit()); + + assertQ(req("q", "id:1"), + "//result/doc[1]/arr[@name='" + field + "']/date[.='" + dates[0] + "']", + "//result/doc[1]/arr[@name='" + field + "']/date[.='" + dates[1] + "']", + "//result/doc[1]/arr[@name='" + field + "']/date[.='" + dates[2] + "']", + "count(//result/doc[1]/arr[@name='" + field + "']/date)=3"); assertU(adoc(sdoc("id", "1", field, ImmutableMap.of("removeregex", ".*")))); assertU(commit()); assertQ(req("q", "id:1"), - "count(//result/doc[1]/arr[@name='" + field + "']/" + type + ")=0"); + "count(//result/doc[1]/arr[@name='" + field + "']/date)=0"); } diff --git a/solr/core/src/test/org/apache/solr/schema/TestUseDocValuesAsStored.java b/solr/core/src/test/org/apache/solr/schema/TestUseDocValuesAsStored.java index 27b55d020db..81ead132c44 100644 --- a/solr/core/src/test/org/apache/solr/schema/TestUseDocValuesAsStored.java +++ b/solr/core/src/test/org/apache/solr/schema/TestUseDocValuesAsStored.java @@ -159,6 +159,19 @@ public class TestUseDocValuesAsStored extends AbstractBadConfigTestBase { "{'id':'xyz'}" + "]"); } + + @Test + public void testDuplicateMultiValued() throws Exception { + doTest("strTF", dvStringFieldName(3,true,false), "str", "X", "X", "Y"); + doTest("strTT", dvStringFieldName(3,true,true), "str", "X", "X", "Y"); + doTest("strFF", dvStringFieldName(3,false,false), "str", "X", "X", "Y"); + doTest("int", "test_is_dvo", "int", "42", "42", "-666"); + doTest("float", "test_fs_dvo", "float", "4.2", "4.2", "-66.666"); + doTest("long", "test_ls_dvo", "long", "420", "420", "-6666666" ); + doTest("double", "test_ds_dvo", "double", "0.0042", "0.0042", "-6.6666E-5"); + doTest("date", "test_dts_dvo", "date", "2016-07-04T03:02:01Z", "2016-07-04T03:02:01Z", "1999-12-31T23:59:59Z" ); + doTest("enum", "enums_dvo", "str", SEVERITY[0], SEVERITY[0], SEVERITY[1]); + } @Test public void testRandomSingleAndMultiValued() throws Exception { @@ -318,9 +331,14 @@ public class TestUseDocValuesAsStored extends AbstractBadConfigTestBase { xpaths[i] = "//arr[@name='" + field + "']/" + type + "[.='" + value[i] + "']"; } - // Docvalues are sets, but stored values are ordered multisets, so cardinality depends on the value source - xpaths[value.length] = "*[count(//arr[@name='" + field + "']/" + type + ") = " - + (isStoredField(field) ? value.length : valueSet.size()) + "]"; + // See SOLR-10924... + // Trie/String based Docvalues are sets, but stored values & Point DVs are ordered multisets, + // so cardinality depends on the value source + final int expectedCardinality = + (isStoredField(field) || (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP) + && ! (field.startsWith("enum") || field.startsWith("test_s")))) + ? value.length : valueSet.size(); + xpaths[value.length] = "*[count(//arr[@name='"+field+"']/"+type+")="+expectedCardinality+"]"; assertU(adoc(fieldAndValues)); } else { diff --git a/solr/server/solr/configsets/_default/conf/elevate.xml b/solr/server/solr/configsets/_default/conf/elevate.xml deleted file mode 100644 index 2c09ebed669..00000000000 --- a/solr/server/solr/configsets/_default/conf/elevate.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - - diff --git a/solr/server/solr/configsets/_default/conf/solrconfig.xml b/solr/server/solr/configsets/_default/conf/solrconfig.xml index f53636f474e..aa1ae698bd8 100644 --- a/solr/server/solr/configsets/_default/conf/solrconfig.xml +++ b/solr/server/solr/configsets/_default/conf/solrconfig.xml @@ -1004,7 +1004,6 @@ string - elevate.xml diff --git a/solr/solr-ref-guide/src/about-this-guide.adoc b/solr/solr-ref-guide/src/about-this-guide.adoc index 2168c1c76c1..3a44ab001a1 100644 --- a/solr/solr-ref-guide/src/about-this-guide.adoc +++ b/solr/solr-ref-guide/src/about-this-guide.adoc @@ -1,6 +1,7 @@ = About This Guide :page-shortname: about-this-guide :page-permalink: about-this-guide.html +:page-toc: false // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information @@ -26,38 +27,13 @@ Designed to provide high-level documentation, this guide is intended to be more The material as presented assumes that you are familiar with some basic search concepts and that you can read XML. It does not assume that you are a Java programmer, although knowledge of Java is helpful when working directly with Lucene or when developing custom extensions to a Lucene/Solr installation. -[[AboutThisGuide-SpecialInlineNotes]] -== Special Inline Notes - -Special notes are included throughout these pages. There are several types of notes: - -Information blocks:: -+ -NOTE: These provide additional information that's useful for you to know. - -Important:: -+ -IMPORTANT: These provide information that is critical for you to know. - -Tip:: -+ -TIP: These provide helpful tips. - -Caution:: -+ -CAUTION: These provide details on scenarios or configurations you should be careful with. - -Warning:: -+ -WARNING: These are meant to warn you from a possibly dangerous change or action. - - -[[AboutThisGuide-HostsandPortExamples]] == Hosts and Port Examples -The default port when running Solr is 8983. The samples, URLs and screenshots in this guide may show different ports, because the port number that Solr uses is configurable. If you have not customized your installation of Solr, please make sure that you use port 8983 when following the examples, or configure your own installation to use the port numbers shown in the examples. For information about configuring port numbers, see the section <>. +The default port when running Solr is 8983. The samples, URLs and screenshots in this guide may show different ports, because the port number that Solr uses is configurable. -Similarly, URL examples use 'localhost' throughout; if you are accessing Solr from a location remote to the server hosting Solr, replace 'localhost' with the proper domain or IP where Solr is running. +If you have not customized your installation of Solr, please make sure that you use port 8983 when following the examples, or configure your own installation to use the port numbers shown in the examples. For information about configuring port numbers, see the section <>. + +Similarly, URL examples use `localhost` throughout; if you are accessing Solr from a location remote to the server hosting Solr, replace `localhost` with the proper domain or IP where Solr is running. For example, we might provide a sample query like: @@ -67,7 +43,32 @@ There are several items in this URL you might need to change locally. First, if `\http://www.example.com/solr/mycollection/select?q=brown+cow` -[[AboutThisGuide-Paths]] == Paths -Path information is given relative to `solr.home`, which is the location under the main Solr installation where Solr's collections and their `conf` and `data` directories are stored. When running the various examples mentioned through out this tutorial (i.e., `bin/solr -e techproducts`) the `solr.home` will be a sub-directory of `example/` created for you automatically. +Path information is given relative to `solr.home`, which is the location under the main Solr installation where Solr's collections and their `conf` and `data` directories are stored. + +When running the various examples mentioned through out this tutorial (i.e., `bin/solr -e techproducts`) the `solr.home` will be a sub-directory of `example/` created for you automatically. + +== Special Inline Notes + +Special notes are included throughout these pages. There are several types of notes: + +=== Information blocks + +NOTE: These provide additional information that's useful for you to know. + +=== Important + +IMPORTANT: These provide information that is critical for you to know. + +=== Tip + +TIP: These provide helpful tips. + +=== Caution + +CAUTION: These provide details on scenarios or configurations you should be careful with. + +=== Warning + +WARNING: These are meant to warn you from a possibly dangerous change or action. diff --git a/solr/solr-ref-guide/src/about-tokenizers.adoc b/solr/solr-ref-guide/src/about-tokenizers.adoc index 5bee36cd77c..06227b49b54 100644 --- a/solr/solr-ref-guide/src/about-tokenizers.adoc +++ b/solr/solr-ref-guide/src/about-tokenizers.adoc @@ -37,7 +37,6 @@ A `TypeTokenFilterFactory` is available that creates a `TypeTokenFilter` that fi For a complete list of the available TokenFilters, see the section <>. -[[AboutTokenizers-WhenTouseaCharFiltervs.aTokenFilter]] == When To use a CharFilter vs. a TokenFilter There are several pairs of CharFilters and TokenFilters that have related (ie: `MappingCharFilter` and `ASCIIFoldingFilter`) or nearly identical (ie: `PatternReplaceCharFilterFactory` and `PatternReplaceFilterFactory`) functionality and it may not always be obvious which is the best choice. diff --git a/solr/solr-ref-guide/src/adding-custom-plugins-in-solrcloud-mode.adoc b/solr/solr-ref-guide/src/adding-custom-plugins-in-solrcloud-mode.adoc index f9277f0f4b1..6e9864eaa47 100644 --- a/solr/solr-ref-guide/src/adding-custom-plugins-in-solrcloud-mode.adoc +++ b/solr/solr-ref-guide/src/adding-custom-plugins-in-solrcloud-mode.adoc @@ -30,12 +30,10 @@ In addition to requiring that Solr by running in <> section below. ==== -[[AddingCustomPluginsinSolrCloudMode-UploadingJarFiles]] == Uploading Jar Files The first step is to use the <> to upload your jar files. This will to put your jars in the `.system` collection and distribute them across your SolrCloud nodes. These jars are added to a separate classloader and only accessible to components that are configured with the property `runtimeLib=true`. These components are loaded lazily because the `.system` collection may not be loaded when a particular core is loaded. -[[AddingCustomPluginsinSolrCloudMode-ConfigAPICommandstouseJarsasRuntimeLibraries]] == Config API Commands to use Jars as Runtime Libraries The runtime library feature uses a special set of commands for the <> to add, update, or remove jar files currently available in the blob store to the list of runtime libraries. @@ -74,14 +72,12 @@ curl http://localhost:8983/solr/techproducts/config -H 'Content-type:application }' ---- -[[AddingCustomPluginsinSolrCloudMode-SecuringRuntimeLibraries]] == Securing Runtime Libraries A drawback of this feature is that it could be used to load malicious executable code into the system. However, it is possible to restrict the system to load only trusted jars using http://en.wikipedia.org/wiki/Public_key_infrastructure[PKI] to verify that the executables loaded into the system are trustworthy. The following steps will allow you enable security for this feature. The instructions assume you have started all your Solr nodes with the `-Denable.runtime.lib=true`. -[[Step1_GenerateanRSAPrivateKey]] === Step 1: Generate an RSA Private Key The first step is to generate an RSA private key. The example below uses a 512-bit key, but you should use the strength appropriate to your needs. @@ -91,7 +87,6 @@ The first step is to generate an RSA private key. The example below uses a 512-b $ openssl genrsa -out priv_key.pem 512 ---- -[[Step2_OutputthePublicKey]] === Step 2: Output the Public Key The public portion of the key should be output in DER format so Java can read it. @@ -101,7 +96,6 @@ The public portion of the key should be output in DER format so Java can read it $ openssl rsa -in priv_key.pem -pubout -outform DER -out pub_key.der ---- -[[Step3_LoadtheKeytoZooKeeper]] === Step 3: Load the Key to ZooKeeper The `.der` files that are output from Step 2 should then be loaded to ZooKeeper under a node `/keys/exe` so they are available throughout every node. You can load any number of public keys to that node and all are valid. If a key is removed from the directory, the signatures of that key will cease to be valid. So, before removing the a key, make sure to update your runtime library configurations with valid signatures with the `update-runtimelib` command. @@ -130,7 +124,6 @@ $ .bin/zkCli.sh -server localhost:9983 After this, any attempt to load a jar will fail. All your jars must be signed with one of your private keys for Solr to trust it. The process to sign your jars and use the signature is outlined in Steps 4-6. -[[Step4_SignthejarFile]] === Step 4: Sign the jar File Next you need to sign the sha1 digest of your jar file and get the base64 string. @@ -142,7 +135,6 @@ $ openssl dgst -sha1 -sign priv_key.pem myjar.jar | openssl enc -base64 The output of this step will be a string that you will need to add the jar to your classpath in Step 6 below. -[[Step5_LoadthejartotheBlobStore]] === Step 5: Load the jar to the Blob Store Load your jar to the Blob store, using the <>. This step does not require a signature; you will need the signature in Step 6 to add it to your classpath. @@ -155,7 +147,6 @@ http://localhost:8983/solr/.system/blob/{blobname} The blob name that you give the jar file in this step will be used as the name in the next step. -[[Step6_AddthejartotheClasspath]] === Step 6: Add the jar to the Classpath Finally, add the jar to the classpath using the Config API as detailed above. In this step, you will need to provide the signature of the jar that you got in Step 4. diff --git a/solr/solr-ref-guide/src/analyzers.adoc b/solr/solr-ref-guide/src/analyzers.adoc index c274f8e912c..ae1ae905b10 100644 --- a/solr/solr-ref-guide/src/analyzers.adoc +++ b/solr/solr-ref-guide/src/analyzers.adoc @@ -60,7 +60,6 @@ In this case, no Analyzer class was specified on the `` element. Rathe The output of an Analyzer affects the _terms_ indexed in a given field (and the terms used when parsing queries against those fields) but it has no impact on the _stored_ value for the fields. For example: an analyzer might split "Brown Cow" into two indexed terms "brown" and "cow", but the stored value will still be a single String: "Brown Cow" ==== -[[Analyzers-AnalysisPhases]] == Analysis Phases Analysis takes place in two contexts. At index time, when a field is being created, the token stream that results from analysis is added to an index and defines the set of terms (including positions, sizes, and so on) for the field. At query time, the values being searched for are analyzed and the terms that result are matched against those that are stored in the field's index. @@ -89,7 +88,6 @@ In this theoretical example, at index time the text is tokenized, the tokens are At query time, the only normalization that happens is to convert the query terms to lowercase. The filtering and mapping steps that occur at index time are not applied to the query terms. Queries must then, in this example, be very precise, using only the normalized terms that were stored at index time. -[[Analyzers-AnalysisforMulti-TermExpansion]] === Analysis for Multi-Term Expansion In some types of queries (ie: Prefix, Wildcard, Regex, etc...) the input provided by the user is not natural language intended for Analysis. Things like Synonyms or Stop word filtering do not work in a logical way in these types of Queries. diff --git a/solr/solr-ref-guide/src/authentication-and-authorization-plugins.adoc b/solr/solr-ref-guide/src/authentication-and-authorization-plugins.adoc index 7f1586fc7d6..fce8acb0e8c 100644 --- a/solr/solr-ref-guide/src/authentication-and-authorization-plugins.adoc +++ b/solr/solr-ref-guide/src/authentication-and-authorization-plugins.adoc @@ -27,7 +27,6 @@ All authentication and authorization plugins can work with Solr whether they are The following section describes how to enable plugins with `security.json` and place them in the proper locations for your mode of operation. -[[AuthenticationandAuthorizationPlugins-EnablePluginswithsecurity.json]] == Enable Plugins with security.json All of the information required to initialize either type of security plugin is stored in a `security.json` file. This file contains 2 sections, one each for authentication and authorization. @@ -45,7 +44,7 @@ All of the information required to initialize either type of security plugin is } ---- -The `/security.json` file needs to be in the proper location before a Solr instance comes up so Solr starts with the security plugin enabled. See the section <> below for information on how to do this. +The `/security.json` file needs to be in the proper location before a Solr instance comes up so Solr starts with the security plugin enabled. See the section <> below for information on how to do this. Depending on the plugin(s) in use, other information will be stored in `security.json` such as user information or rules to create roles and permissions. This information is added through the APIs for each plugin provided by Solr, or, in the case of a custom plugin, the approach designed by you. @@ -66,10 +65,8 @@ Here is a more detailed `security.json` example. In this, the Basic authenticati }} ---- -[[AuthenticationandAuthorizationPlugins-Usingsecurity.jsonwithSolr]] == Using security.json with Solr -[[AuthenticationandAuthorizationPlugins-InSolrCloudmode]] === In SolrCloud Mode While configuring Solr to use an authentication or authorization plugin, you will need to upload a `security.json` file to ZooKeeper. The following command writes the file as it uploads it - you could also upload a file that you have already created locally. @@ -91,7 +88,6 @@ Depending on the authentication and authorization plugin that you use, you may h Once `security.json` has been uploaded to ZooKeeper, you should use the appropriate APIs for the plugins you're using to update it. You can edit it manually, but you must take care to remove any version data so it will be properly updated across all ZooKeeper nodes. The version data is found at the end of the `security.json` file, and will appear as the letter "v" followed by a number, such as `{"v":138}`. -[[AuthenticationandAuthorizationPlugins-InStandaloneMode]] === In Standalone Mode When running Solr in standalone mode, you need to create the `security.json` file and put it in the `$SOLR_HOME` directory for your installation (this is the same place you have located `solr.xml` and is usually `server/solr`). @@ -100,8 +96,7 @@ If you are using <` during startup. -[[AuthenticationandAuthorizationPlugins-AvailableAuthenticationPlugins]] === Available Authentication Plugins Solr has the following implementations of authentication plugins: @@ -135,12 +128,10 @@ Solr has the following implementations of authentication plugins: * <> * <> -[[AuthenticationandAuthorizationPlugins-Authorization]] == Authorization An authorization plugin can be written for Solr by extending the {solr-javadocs}/solr-core/org/apache/solr/security/AuthorizationPlugin.html[AuthorizationPlugin] interface. -[[AuthenticationandAuthorizationPlugins-LoadingaCustomPlugin]] === Loading a Custom Plugin * Make sure that the plugin implementation is in the classpath. @@ -162,21 +153,16 @@ All of the content in the `authorization` block of `security.json` would be pass The authorization plugin is only supported in SolrCloud mode. Also, reloading the plugin isn't yet supported and requires a restart of the Solr installation (meaning, the JVM should be restarted, not simply a core reload). ==== -[[AuthenticationandAuthorizationPlugins-AvailableAuthorizationPlugins]] === Available Authorization Plugins Solr has one implementation of an authorization plugin: * <> -[[AuthenticationandAuthorizationPlugins-PKISecuringInter-NodeRequests]] - -[[AuthenticationandAuthorizationPlugins-PKI]] == Securing Inter-Node Requests There are a lot of requests that originate from the Solr nodes itself. For example, requests from overseer to nodes, recovery threads, etc. Each Authentication plugin declares whether it is capable of securing inter-node requests or not. If not, Solr will fall back to using a special internode authentication mechanism where each Solr node is a super user and is fully trusted by other Solr nodes, described below. -[[AuthenticationandAuthorizationPlugins-PKIAuthenticationPlugin]] === PKIAuthenticationPlugin The PKIAuthenticationPlugin is used when there is any request going on between two Solr nodes, and the configured Authentication plugin does not wish to handle inter-node security. diff --git a/solr/solr-ref-guide/src/basic-authentication-plugin.adoc b/solr/solr-ref-guide/src/basic-authentication-plugin.adoc index f7282160f83..2d196cff7ee 100644 --- a/solr/solr-ref-guide/src/basic-authentication-plugin.adoc +++ b/solr/solr-ref-guide/src/basic-authentication-plugin.adoc @@ -22,10 +22,9 @@ Solr can support Basic authentication for users with the use of the BasicAuthPlu An authorization plugin is also available to configure Solr with permissions to perform various activities in the system. The authorization plugin is described in the section <>. -[[BasicAuthenticationPlugin-EnableBasicAuthentication]] == Enable Basic Authentication -To use Basic authentication, you must first create a `security.json` file. This file and where to put it is described in detail in the section <>. +To use Basic authentication, you must first create a `security.json` file. This file and where to put it is described in detail in the section <>. For Basic authentication, the `security.json` file must have an `authentication` part which defines the class being used for authentication. Usernames and passwords (as a sha256(password+salt) hash) could be added when the file is created, or can be added later with the Basic authentication API, described below. @@ -68,7 +67,6 @@ If you are using SolrCloud, you must upload `security.json` to ZooKeeper. You ca bin/solr zk cp file:path_to_local_security.json zk:/security.json -z localhost:9983 ---- -[[BasicAuthenticationPlugin-Caveats]] === Caveats There are a few things to keep in mind when using the Basic authentication plugin. @@ -77,19 +75,16 @@ There are a few things to keep in mind when using the Basic authentication plugi * A user who has access to write permissions to `security.json` will be able to modify all the permissions and how users have been assigned permissions. Special care should be taken to only grant access to editing security to appropriate users. * Your network should, of course, be secure. Even with Basic authentication enabled, you should not unnecessarily expose Solr to the outside world. -[[BasicAuthenticationPlugin-EditingAuthenticationPluginConfiguration]] == Editing Authentication Plugin Configuration An Authentication API allows modifying user IDs and passwords. The API provides an endpoint with specific commands to set user details or delete a user. -[[BasicAuthenticationPlugin-APIEntryPoint]] === API Entry Point `admin/authentication` This endpoint is not collection-specific, so users are created for the entire Solr cluster. If users need to be restricted to a specific collection, that can be done with the authorization rules. -[[BasicAuthenticationPlugin-AddaUserorEditaPassword]] === Add a User or Edit a Password The `set-user` command allows you to add users and change their passwords. For example, the following defines two users and their passwords: @@ -101,7 +96,6 @@ curl --user solr:SolrRocks http://localhost:8983/solr/admin/authentication -H 'C "harry":"HarrysSecret"}}' ---- -[[BasicAuthenticationPlugin-DeleteaUser]] === Delete a User The `delete-user` command allows you to remove a user. The user password does not need to be sent to remove a user. In the following example, we've asked that user IDs 'tom' and 'harry' be removed from the system. @@ -112,7 +106,6 @@ curl --user solr:SolrRocks http://localhost:8983/solr/admin/authentication -H 'C "delete-user": ["tom","harry"]}' ---- -[[BasicAuthenticationPlugin-Setaproperty]] === Set a Property Set arbitrary properties for authentication plugin. The only supported property is `'blockUnknown'` @@ -123,7 +116,6 @@ curl --user solr:SolrRocks http://localhost:8983/solr/admin/authentication -H 'C "set-property": {"blockUnknown":false}}' ---- -[[BasicAuthenticationPlugin-UsingBasicAuthwithSolrJ]] === Using BasicAuth with SolrJ In SolrJ, the basic authentication credentials need to be set for each request as in this example: @@ -144,7 +136,6 @@ req.setBasicAuthCredentials(userName, password); QueryResponse rsp = req.process(solrClient); ---- -[[BasicAuthenticationPlugin-UsingCommandLinescriptswithBasicAuth]] === Using Command Line scripts with BasicAuth Add the following line to the `solr.in.sh` or `solr.in.cmd` file. This example tells the `bin/solr` command line to to use "basic" as the type of authentication, and to pass credentials with the user-name "solr" and password "SolrRocks": diff --git a/solr/solr-ref-guide/src/blob-store-api.adoc b/solr/solr-ref-guide/src/blob-store-api.adoc index 63297b9e7b0..267ed1db1ab 100644 --- a/solr/solr-ref-guide/src/blob-store-api.adoc +++ b/solr/solr-ref-guide/src/blob-store-api.adoc @@ -28,7 +28,6 @@ When using the blob store, note that the API does not delete or overwrite a prev The blob store API is implemented as a requestHandler. A special collection named ".system" is used to store the blobs. This collection can be created in advance, but if it does not exist it will be created automatically. -[[BlobStoreAPI-Aboutthe.systemCollection]] == About the .system Collection Before uploading blobs to the blob store, a special collection must be created and it must be named `.system`. Solr will automatically create this collection if it does not already exist, but you can also create it manually if you choose. @@ -46,7 +45,6 @@ curl http://localhost:8983/solr/admin/collections?action=CREATE&name=.system&rep IMPORTANT: The `bin/solr` script cannot be used to create the `.system` collection. -[[BlobStoreAPI-UploadFilestoBlobStore]] == Upload Files to Blob Store After the `.system` collection has been created, files can be uploaded to the blob store with a request similar to the following: @@ -132,7 +130,6 @@ For the latest version of a blob, the \{version} can be omitted, curl http://localhost:8983/solr/.system/blob/{blobname}?wt=filestream > {outputfilename} ---- -[[BlobStoreAPI-UseaBlobinaHandlerorComponent]] == Use a Blob in a Handler or Component To use the blob as the class for a request handler or search component, you create a request handler in `solrconfig.xml` as usual. You will need to define the following parameters: diff --git a/solr/solr-ref-guide/src/blockjoin-faceting.adoc b/solr/solr-ref-guide/src/blockjoin-faceting.adoc index 1a89a570071..7f057f007c1 100644 --- a/solr/solr-ref-guide/src/blockjoin-faceting.adoc +++ b/solr/solr-ref-guide/src/blockjoin-faceting.adoc @@ -42,7 +42,7 @@ This example shows how you could add this search components to `solrconfig.xml` This component can be added into any search request handler. This component work with distributed search in SolrCloud mode. -Documents should be added in children-parent blocks as described in <>. Examples: +Documents should be added in children-parent blocks as described in <>. Examples: .Sample document [source,xml] @@ -95,7 +95,7 @@ Documents should be added in children-parent blocks as described in < ---- -Queries are constructed the same way as for a <>. For example: +Queries are constructed the same way as for a <>. For example: [source,text] ---- diff --git a/solr/solr-ref-guide/src/charfilterfactories.adoc b/solr/solr-ref-guide/src/charfilterfactories.adoc index 6010a319f57..8f0dd0fb793 100644 --- a/solr/solr-ref-guide/src/charfilterfactories.adoc +++ b/solr/solr-ref-guide/src/charfilterfactories.adoc @@ -22,7 +22,6 @@ CharFilter is a component that pre-processes input characters. CharFilters can be chained like Token Filters and placed in front of a Tokenizer. CharFilters can add, change, or remove characters while preserving the original character offsets to support features like highlighting. -[[CharFilterFactories-solr.MappingCharFilterFactory]] == solr.MappingCharFilterFactory This filter creates `org.apache.lucene.analysis.MappingCharFilter`, which can be used for changing one string to another (for example, for normalizing `é` to `e`.). @@ -65,7 +64,6 @@ Mapping file syntax: |=== ** A backslash followed by any other character is interpreted as if the character were present without the backslash. -[[CharFilterFactories-solr.HTMLStripCharFilterFactory]] == solr.HTMLStripCharFilterFactory This filter creates `org.apache.solr.analysis.HTMLStripCharFilter`. This CharFilter strips HTML from the input stream and passes the result to another CharFilter or a Tokenizer. @@ -114,7 +112,6 @@ Example: ---- -[[CharFilterFactories-solr.ICUNormalizer2CharFilterFactory]] == solr.ICUNormalizer2CharFilterFactory This filter performs pre-tokenization Unicode normalization using http://site.icu-project.org[ICU4J]. @@ -138,7 +135,6 @@ Example: ---- -[[CharFilterFactories-solr.PatternReplaceCharFilterFactory]] == solr.PatternReplaceCharFilterFactory This filter uses http://www.regular-expressions.info/reference.html[regular expressions] to replace or change character patterns. diff --git a/solr/solr-ref-guide/src/collapse-and-expand-results.adoc b/solr/solr-ref-guide/src/collapse-and-expand-results.adoc index 106fd1c31d5..0c0bbd10033 100644 --- a/solr/solr-ref-guide/src/collapse-and-expand-results.adoc +++ b/solr/solr-ref-guide/src/collapse-and-expand-results.adoc @@ -24,10 +24,9 @@ The Collapsing query parser groups documents (collapsing the result set) accordi [IMPORTANT] ==== -In order to use these features with SolrCloud, the documents must be located on the same shard. To ensure document co-location, you can define the `router.name` parameter as `compositeId` when creating the collection. For more information on this option, see the section <>. +In order to use these features with SolrCloud, the documents must be located on the same shard. To ensure document co-location, you can define the `router.name` parameter as `compositeId` when creating the collection. For more information on this option, see the section <>. ==== -[[CollapseandExpandResults-CollapsingQueryParser]] == Collapsing Query Parser The `CollapsingQParser` is really a _post filter_ that provides more performant field collapsing than Solr's standard approach when the number of distinct groups in the result set is high. This parser collapses the result set to a single document per group before it forwards the result set to the rest of the search components. So all downstream components (faceting, highlighting, etc...) will work with the collapsed result set. @@ -121,7 +120,6 @@ fq={!collapse field=group_field hint=top_fc} The CollapsingQParserPlugin fully supports the QueryElevationComponent. -[[CollapseandExpandResults-ExpandComponent]] == Expand Component The ExpandComponent can be used to expand the groups that were collapsed by the http://heliosearch.org/the-collapsingqparserplugin-solrs-new-high-performance-field-collapsing-postfilter/[CollapsingQParserPlugin]. diff --git a/solr/solr-ref-guide/src/collections-api.adoc b/solr/solr-ref-guide/src/collections-api.adoc index 3a43d396fe0..662c5fb4cf6 100644 --- a/solr/solr-ref-guide/src/collections-api.adoc +++ b/solr/solr-ref-guide/src/collections-api.adoc @@ -24,7 +24,7 @@ The Collections API is used to create, remove, or reload collections. In the context of SolrCloud you can use it to create collections with a specific number of shards and replicas, move replicas or shards, and create or delete collection aliases. -[[CollectionsAPI-create]] +[[create]] == CREATE: Create a Collection `/admin/collections?action=CREATE&name=_name_` @@ -45,7 +45,7 @@ The `compositeId` router hashes the value in the uniqueKey field and looks up th + When using the `implicit` router, the `shards` parameter is required. When using the `compositeId` router, the `numShards` parameter is required. + -For more information, see also the section <>. +For more information, see also the section <>. `numShards`:: The number of shards to be created as part of the collection. This is a required parameter when the `router.name` is `compositeId`. @@ -68,7 +68,7 @@ Allows defining the nodes to spread the new collection across. The format is a c + If not provided, the CREATE operation will create shard-replicas spread across all live Solr nodes. + -Alternatively, use the special value of `EMPTY` to initially create no shard-replica within the new collection and then later use the <> operation to add shard-replicas when and where required. +Alternatively, use the special value of `EMPTY` to initially create no shard-replica within the new collection and then later use the <> operation to add shard-replicas when and where required. `createNodeSet.shuffle`:: Controls wether or not the shard-replicas created for this collection will be assigned to the nodes specified by the `createNodeSet` in a sequential manner, or if the list of nodes should be shuffled prior to creating individual replicas. @@ -89,10 +89,10 @@ Please note that <> or retrieval by Set core property _name_ to _value_. See the section <> for details on supported properties and values. `autoAddReplicas`:: -When set to `true`, enables automatic addition of replicas on shared file systems (such as HDFS) only. See the section <> for more details on settings and overrides. The default is `false`. +When set to `true`, enables automatic addition of replicas on shared file systems (such as HDFS) only. See the section <> for more details on settings and overrides. The default is `false`. `async`:: -Request ID to track this action which will be <>. +Request ID to track this action which will be <>. `rule`:: Replica placement rules. See the section <> for details. @@ -141,7 +141,7 @@ http://localhost:8983/solr/admin/collections?action=CREATE&name=newCollection&nu ---- -[[CollectionsAPI-modifycollection]] +[[modifycollection]] == MODIFYCOLLECTION: Modify Attributes of a Collection `/admin/collections?action=MODIFYCOLLECTION&collection=_&=&=_` @@ -165,10 +165,9 @@ The attributes that can be modified are: * rule * snitch + -See the <> section above for details on these attributes. +See the <> section above for details on these attributes. - -[[CollectionsAPI-reload]] +[[reload]] == RELOAD: Reload a Collection `/admin/collections?action=RELOAD&name=_name_` @@ -177,11 +176,11 @@ The RELOAD action is used when you have changed a configuration in ZooKeeper. === RELOAD Parameters -|`name`:: +`name`:: The name of the collection to reload. This parameter is required. `async`:: -Request ID to track this action which will be <>. +Request ID to track this action which will be <>. === RELOAD Response @@ -222,7 +221,7 @@ http://localhost:8983/solr/admin/collections?action=RELOAD&name=newCollection ---- -[[CollectionsAPI-splitshard]] +[[splitshard]] == SPLITSHARD: Split a Shard `/admin/collections?action=SPLITSHARD&collection=_name_&shard=_shardID_` @@ -233,7 +232,7 @@ This command allows for seamless splitting and requires no downtime. A shard bei The split is performed by dividing the original shard's hash range into two equal partitions and dividing up the documents in the original shard according to the new sub-ranges. Two parameters discussed below, `ranges` and `split.key` provide further control over how the split occurs. -Shard splitting can be a long running process. In order to avoid timeouts, you should run this as an <>. +Shard splitting can be a long running process. In order to avoid timeouts, you should run this as an <>. === SPLITSHARD Parameters @@ -259,7 +258,7 @@ For example, suppose `split.key=A!` hashes to the range `12-15` and belongs to s Set core property _name_ to _value_. See the section <> for details on supported properties and values. `async`:: -Request ID to track this action which will be <> +Request ID to track this action which will be <> === SPLITSHARD Response @@ -338,7 +337,7 @@ http://localhost:8983/solr/admin/collections?action=SPLITSHARD&collection=anothe ---- -[[CollectionsAPI-createshard]] +[[createshard]] == CREATESHARD: Create a Shard Shards can only created with this API for collections that use the 'implicit' router (i.e., when the collection was created, `router.name=implicit`). A new shard with a name can be created for an existing 'implicit' collection. @@ -364,7 +363,7 @@ The format is a comma-separated list of node_names, such as `localhost:8983_solr Set core property _name_ to _value_. See the section <> for details on supported properties and values. `async`:: -Request ID to track this action which will be <>. +Request ID to track this action which will be <>. === CREATESHARD Response @@ -393,7 +392,7 @@ http://localhost:8983/solr/admin/collections?action=CREATESHARD&collection=anImp ---- -[[CollectionsAPI-deleteshard]] +[[deleteshard]] == DELETESHARD: Delete a Shard Deleting a shard will unload all replicas of the shard, remove them from `clusterstate.json`, and (by default) delete the instanceDir and dataDir for each replica. It will only remove shards that are inactive, or which have no range given for custom sharding. @@ -418,7 +417,7 @@ By default Solr will delete the dataDir of each replica that is deleted. Set thi By default Solr will delete the index of each replica that is deleted. Set this to `false` to prevent the index directory from being deleted. `async`:: -Request ID to track this action which will be <>. +Request ID to track this action which will be <>. === DELETESHARD Response @@ -455,7 +454,7 @@ http://localhost:8983/solr/admin/collections?action=DELETESHARD&collection=anoth ---- -[[CollectionsAPI-createalias]] +[[createalias]] == CREATEALIAS: Create or Modify an Alias for a Collection The `CREATEALIAS` action will create a new alias pointing to one or more collections. If an alias by the same name already exists, this action will replace the existing alias, effectively acting like an atomic "MOVE" command. @@ -471,14 +470,12 @@ The alias name to be created. This parameter is required. A comma-separated list of collections to be aliased. The collections must already exist in the cluster. This parameter is required. `async`:: -Request ID to track this action which will be <>. +Request ID to track this action which will be <>. -[[CollectionsAPI-Output.5]] === CREATEALIAS Response The output will simply be a responseHeader with details of the time it took to process the request. To confirm the creation of the alias, you can look in the Solr Admin UI, under the Cloud section and find the `aliases.json` file. -[[CollectionsAPI-Examples.5]] === Examples using CREATEALIAS *Input* @@ -502,7 +499,7 @@ http://localhost:8983/solr/admin/collections?action=CREATEALIAS&name=testalias&c ---- -[[CollectionsAPI-listaliases]] +[[listaliases]] == LISTALIASES: List of all aliases in the cluster `/admin/collections?action=LISTALIASES` @@ -531,7 +528,7 @@ The output will contain a list of aliases with the corresponding collection name ---- -[[CollectionsAPI-deletealias]] +[[deletealias]] == DELETEALIAS: Delete a Collection Alias `/admin/collections?action=DELETEALIAS&name=_name_` @@ -542,7 +539,7 @@ The output will contain a list of aliases with the corresponding collection name The name of the alias to delete. This parameter is required. `async`:: -Request ID to track this action which will be <>. +Request ID to track this action which will be <>. === DELETEALIAS Response @@ -571,7 +568,7 @@ http://localhost:8983/solr/admin/collections?action=DELETEALIAS&name=testalias ---- -[[CollectionsAPI-delete]] +[[delete]] == DELETE: Delete a Collection `/admin/collections?action=DELETE&name=_collection_` @@ -582,7 +579,7 @@ http://localhost:8983/solr/admin/collections?action=DELETEALIAS&name=testalias The name of the collection to delete. This parameter is required. `async`:: -Request ID to track this action which will be <>. +Request ID to track this action which will be <>. === DELETE Response @@ -625,7 +622,7 @@ http://localhost:8983/solr/admin/collections?action=DELETE&name=newCollection ---- -[[CollectionsAPI-deletereplica]] +[[deletereplica]] == DELETEREPLICA: Delete a Replica Deletes a named replica from the specified collection and shard. @@ -665,7 +662,7 @@ By default Solr will delete the index of the replica that is deleted. Set this t When set to `true`, no action will be taken if the replica is active. Default `false`. `async`:: -Request ID to track this action which will be <>. +Request ID to track this action which will be <>. === Examples using DELETEREPLICA @@ -688,7 +685,7 @@ http://localhost:8983/solr/admin/collections?action=DELETEREPLICA&collection=tes ---- -[[CollectionsAPI-addreplica]] +[[addreplica]] == ADDREPLICA: Add Replica Add a replica to a shard in a collection. The node name can be specified if the replica is to be created in a specific node. @@ -722,7 +719,8 @@ The directory in which the core should be created `property._name_=_value_`:: Set core property _name_ to _value_. See <> for details about supported properties and values. -`async`:: string |No |Request ID to track this action which will be <> +`async`:: +Request ID to track this action which will be <> === Examples using ADDREPLICA @@ -754,7 +752,7 @@ http://localhost:8983/solr/admin/collections?action=ADDREPLICA&collection=test2& ---- -[[CollectionsAPI-clusterprop]] +[[clusterprop]] == CLUSTERPROP: Cluster Properties Add, edit or delete a cluster-wide property. @@ -794,7 +792,7 @@ http://localhost:8983/solr/admin/collections?action=CLUSTERPROP&name=urlScheme&v ---- -[[CollectionsAPI-migrate]] +[[migrate]] == MIGRATE: Migrate Documents to Another Collection `/admin/collections?action=MIGRATE&collection=_name_&split.key=_key1!_&target.collection=_target_collection_&forward.timeout=60` @@ -827,7 +825,7 @@ The timeout, in seconds, until which write requests made to the source collectio Set core property _name_ to _value_. See the section <> for details on supported properties and values. `async`:: -Request ID to track this action which will be <>. +Request ID to track this action which will be <>. === MIGRATE Response @@ -988,7 +986,7 @@ http://localhost:8983/solr/admin/collections?action=MIGRATE&collection=test1&spl ---- -[[CollectionsAPI-addrole]] +[[addrole]] == ADDROLE: Add a Role `/admin/collections?action=ADDROLE&role=_roleName_&node=_nodeName_` @@ -1003,7 +1001,7 @@ Use this command to dedicate a particular node as Overseer. Invoke it multiple t The name of the role. The only supported role as of now is `overseer`. This parameter is required. `node`:: -|The name of the node that will be assigned the role. It is possible to assign a role even before that node is started. This parameter is started. +The name of the node that will be assigned the role. It is possible to assign a role even before that node is started. This parameter is started. === ADDROLE Response @@ -1030,7 +1028,7 @@ http://localhost:8983/solr/admin/collections?action=ADDROLE&role=overseer&node=1 ---- -[[CollectionsAPI-removerole]] +[[removerole]] == REMOVEROLE: Remove Role Remove an assigned role. This API is used to undo the roles assigned using ADDROLE operation @@ -1046,7 +1044,6 @@ The name of the role. The only supported role as of now is `overseer`. This para The name of the node where the role should be removed. -[[CollectionsAPI-Output.11]] === REMOVEROLE Response The response will include the status of the request and the properties that were updated or removed. If the status is anything other than "0", an error message will explain why the request failed. @@ -1072,7 +1069,7 @@ http://localhost:8983/solr/admin/collections?action=REMOVEROLE&role=overseer&nod ---- -[[CollectionsAPI-overseerstatus]] +[[overseerstatus]] == OVERSEERSTATUS: Overseer Status and Statistics Returns the current status of the overseer, performance statistics of various overseer APIs, and the last 10 failures per operation type. @@ -1146,7 +1143,7 @@ http://localhost:8983/solr/admin/collections?action=OVERSEERSTATUS&wt=json } ---- -[[CollectionsAPI-clusterstatus]] +[[clusterstatus]] == CLUSTERSTATUS: Cluster Status Fetch the cluster status including collections, shards, replicas, configuration name as well as collection aliases and cluster properties. @@ -1168,7 +1165,6 @@ This can be used if you need the details of the shard where a particular documen The response will include the status of the request and the status of the cluster. -[[CollectionsAPI-Examples.15]] === Examples using CLUSTERSTATUS *Input* @@ -1247,10 +1243,10 @@ http://localhost:8983/solr/admin/collections?action=clusterstatus&wt=json } ---- -[[CollectionsAPI-requeststatus]] +[[requeststatus]] == REQUESTSTATUS: Request Status of an Async Call -Request the status and response of an already submitted <> (below) call. This call is also used to clear up the stored statuses. +Request the status and response of an already submitted <> (below) call. This call is also used to clear up the stored statuses. `/admin/collections?action=REQUESTSTATUS&requestid=_request-id_` @@ -1307,10 +1303,10 @@ http://localhost:8983/solr/admin/collections?action=REQUESTSTATUS&requestid=1004 ---- -[[CollectionsAPI-deletestatus]] +[[deletestatus]] == DELETESTATUS: Delete Status -Deletes the stored response of an already failed or completed <> call. +Deletes the stored response of an already failed or completed <> call. `/admin/collections?action=DELETESTATUS&requestid=_request-id_` @@ -1384,7 +1380,7 @@ http://localhost:8983/solr/admin/collections?action=DELETESTATUS&flush=true ---- -[[CollectionsAPI-list]] +[[list]] == LIST: List Collections Fetch the names of the collections in the cluster. @@ -1413,7 +1409,7 @@ http://localhost:8983/solr/admin/collections?action=LIST&wt=json "example2"]} ---- -[[CollectionsAPI-addreplicaprop]] +[[addreplicaprop]] == ADDREPLICAPROP: Add Replica Property Assign an arbitrary property to a particular replica and give it the value specified. If the property already exists, it will be overwritten with the new value. @@ -1501,7 +1497,7 @@ http://localhost:8983/solr/admin/collections?action=ADDREPLICAPROP&shard=shard1& http://localhost:8983/solr/admin/collections?action=ADDREPLICAPROP&shard=shard1&collection=collection1&replica=core_node3&property=testprop&property.value=value2&shardUnique=true ---- -[[CollectionsAPI-deletereplicaprop]] +[[deletereplicaprop]] == DELETEREPLICAPROP: Delete Replica Property Deletes an arbitrary property from a particular replica. @@ -1555,7 +1551,7 @@ http://localhost:8983/solr/admin/collections?action=DELETEREPLICAPROP&shard=shar ---- -[[CollectionsAPI-balanceshardunique]] +[[balanceshardunique]] == BALANCESHARDUNIQUE: Balance a Property Across Nodes `/admin/collections?action=BALANCESHARDUNIQUE&collection=_collectionName_&property=_propertyName_` @@ -1607,7 +1603,7 @@ http://localhost:8983/solr/admin/collections?action=BALANCESHARDUNIQUE&collectio Examining the clusterstate after issuing this call should show exactly one replica in each shard that has this property. -[[CollectionsAPI-rebalanceleaders]] +[[rebalanceleaders]] == REBALANCELEADERS: Rebalance Leaders Reassigns leaders in a collection according to the preferredLeader property across active nodes. @@ -1709,10 +1705,7 @@ The replica in the "inactivePreferreds" section had the `preferredLeader` proper Examining the clusterstate after issuing this call should show that every live node that has the `preferredLeader` property should also have the "leader" property set to _true_. - -[[CollectionsAPI-FORCELEADER_ForceShardLeader]] - -[[CollectionsAPI-forceleader]] +[[forceleader]] == FORCELEADER: Force Shard Leader In the unlikely event of a shard losing its leader, this command can be invoked to force the election of a new leader. @@ -1729,7 +1722,7 @@ The name of the shard where leader election should occur. This parameter is requ WARNING: This is an expert level command, and should be invoked only when regular leader election is not working. This may potentially lead to loss of data in the event that the new leader doesn't have certain updates, possibly recent ones, which were acknowledged by the old leader before going down. -[[CollectionsAPI-migratestateformat]] +[[migratestateformat]] == MIGRATESTATEFORMAT: Migrate Cluster State A expert level utility API to move a collection from shared `clusterstate.json` zookeeper node (created with `stateFormat=1`, the default in all Solr releases prior to 5.0) to the per-collection `state.json` stored in ZooKeeper (created with `stateFormat=2`, the current default) seamlessly without any application down-time. @@ -1742,11 +1735,11 @@ A expert level utility API to move a collection from shared `clusterstate.json` The name of the collection to be migrated from `clusterstate.json` to its own `state.json` ZooKeeper node. This parameter is required. `async`:: -Request ID to track this action which will be <>. +Request ID to track this action which will be <>. This API is useful in migrating any collections created prior to Solr 5.0 to the more scalable cluster state format now used by default. If a collection was created in any Solr 5.x version or higher, then executing this command is not necessary. -[[CollectionsAPI-backup]] +[[backup]] == BACKUP: Backup Collection Backs up Solr collections and associated configurations to a shared filesystem - for example a Network File System. @@ -1761,15 +1754,15 @@ The BACKUP command will backup Solr indexes and configurations for a specified c The name of the collection to be backed up. This parameter is required. `location`:: -The location on a shared drive for the backup command to write to. Alternately it can be set as a <>. +The location on a shared drive for the backup command to write to. Alternately it can be set as a <>. `async`:: -Request ID to track this action which will be <>. +Request ID to track this action which will be <>. `repository`:: The name of a repository to be used for the backup. If no repository is specified then the local filesystem repository will be used automatically. -[[CollectionsAPI-restore]] +[[restore]] == RESTORE: Restore Collection Restores Solr indexes and associated configurations. @@ -1782,7 +1775,7 @@ The collection created will be have the same number of shards and replicas as th While restoring, if a configSet with the same name exists in ZooKeeper then Solr will reuse that, or else it will upload the backed up configSet in ZooKeeper and use that. -You can use the collection <> command to make sure clients don't need to change the endpoint to query or index against the newly restored collection. +You can use the collection <> command to make sure clients don't need to change the endpoint to query or index against the newly restored collection. === RESTORE Parameters @@ -1790,10 +1783,10 @@ You can use the collection <> command to The collection where the indexes will be restored into. This parameter is required. `location`:: -The location on a shared drive for the RESTORE command to read from. Alternately it can be set as a <>. +The location on a shared drive for the RESTORE command to read from. Alternately it can be set as a <>. `async`:: -Request ID to track this action which will be <>. +Request ID to track this action which will be <>. `repository`:: The name of a repository to be used for the backup. If no repository is specified then the local filesystem repository will be used automatically. @@ -1814,12 +1807,11 @@ When creating collections, the shards and/or replicas are spread across all avai If a node is not live when the CREATE operation is called, it will not get any parts of the new collection, which could lead to too many replicas being created on a single live node. Defining `maxShardsPerNode` sets a limit on the number of replicas CREATE will spread to each node. If the entire collection can not be fit into the live nodes, no collection will be created at all. `autoAddReplicas`:: -When set to `true`, enables auto addition of replicas on shared file systems. See the section <> for more details on settings and overrides. +When set to `true`, enables auto addition of replicas on shared file systems. See the section <> for more details on settings and overrides. `property._name_=_value_`:: Set core property _name_ to _value_. See the section <> for details on supported properties and values. -[[CollectionsAPI-deletenode]] == DELETENODE: Delete Replicas in a Node Deletes all replicas of all collections in that node. Please note that the node itself will remain as a live node after this operation. @@ -1828,12 +1820,12 @@ Deletes all replicas of all collections in that node. Please note that the node === DELETENODE Parameters -`node`:: string |Yes |The node to be removed. This parameter is required. +`node`:: +The node to be removed. This parameter is required. `async`:: -Request ID to track this action which will be <>. +Request ID to track this action which will be <>. -[[CollectionsAPI-replacenode]] == REPLACENODE: Move All Replicas in a Node to Another This command recreates replicas in one node (the source) to another node (the target). After each replica is copied, the replicas in the source node are deleted. @@ -1854,7 +1846,7 @@ The target node where replicas will be copied. This parameter is required. If this flag is set to `true`, all replicas are created in separate threads. Keep in mind that this can lead to very high network and disk I/O if the replicas have very large indices. The default is `false`. `async`:: -Request ID to track this action which will be <>. +Request ID to track this action which will be <>. `timeout`:: Time in seconds to wait until new replicas are created, and until leader replicas are fully recovered. The default is `300`, or 5 minutes. @@ -1864,7 +1856,6 @@ Time in seconds to wait until new replicas are created, and until leader replica This operation does not hold necessary locks on the replicas that belong to on the source node. So don't perform other collection operations in this period. ==== -[[CollectionsAPI-movereplica]] == MOVEREPLICA: Move a Replica to a New Node This command moves a replica from one node to a new node. In case of shared filesystems the `dataDir` will be reused. @@ -1889,12 +1880,11 @@ The name of the node that contains the replica. This parameter is required. The name of the destination node. This parameter is required. `async`:: -Request ID to track this action which will be <>. +Request ID to track this action which will be <>. -[[CollectionsAPI-async]] == Asynchronous Calls -Since some collection API calls can be long running tasks (such as SPLITSHARD), you can optionally have the calls run asynchronously. Specifying `async=` enables you to make an asynchronous call, the status of which can be requested using the <> call at any time. +Since some collection API calls can be long running tasks (such as SPLITSHARD), you can optionally have the calls run asynchronously. Specifying `async=` enables you to make an asynchronous call, the status of which can be requested using the <> call at any time. As of now, REQUESTSTATUS does not automatically clean up the tracking data structures, meaning the status of completed or failed tasks stays stored in ZooKeeper unless cleared manually. DELETESTATUS can be used to clear the stored statuses. However, there is a limit of 10,000 on the number of async call responses stored in a cluster. diff --git a/solr/solr-ref-guide/src/collections-core-admin.adoc b/solr/solr-ref-guide/src/collections-core-admin.adoc index fad75601a36..77d66cf9ffb 100644 --- a/solr/solr-ref-guide/src/collections-core-admin.adoc +++ b/solr/solr-ref-guide/src/collections-core-admin.adoc @@ -36,6 +36,6 @@ image::images/collections-core-admin/collection-admin.png[image,width=653,height Replicas can be deleted by clicking the red "X" next to the replica name. -If the shard is inactive, for example after a <>, an option to delete the shard will appear as a red "X" next to the shard name. +If the shard is inactive, for example after a <>, an option to delete the shard will appear as a red "X" next to the shard name. image::images/collections-core-admin/DeleteShard.png[image,width=486,height=250] diff --git a/solr/solr-ref-guide/src/command-line-utilities.adoc b/solr/solr-ref-guide/src/command-line-utilities.adoc index e927f02f35e..294a1bcac90 100644 --- a/solr/solr-ref-guide/src/command-line-utilities.adoc +++ b/solr/solr-ref-guide/src/command-line-utilities.adoc @@ -36,7 +36,6 @@ The `zkcli.sh` provided by Solr is not the same as the https://zookeeper.apache. ZooKeeper's `zkCli.sh` provides a completely general, application-agnostic shell for manipulating data in ZooKeeper. Solr's `zkcli.sh` – discussed in this section – is specific to Solr, and has command line arguments specific to dealing with Solr data in ZooKeeper. ==== -[[CommandLineUtilities-UsingSolr_sZooKeeperCLI]] == Using Solr's ZooKeeper CLI Use the `help` option to get a list of available commands from the script itself, as in `./server/scripts/cloud-scrips/zkcli.sh help`. @@ -91,23 +90,20 @@ The short form parameter options may be specified with a single dash (eg: `-c my The long form parameter options may be specified using either a single dash (eg: `-collection mycollection`) or a double dash (eg: `--collection mycollection`) ==== -[[CommandLineUtilities-ZooKeeperCLIExamples]] == ZooKeeper CLI Examples Below are some examples of using the `zkcli.sh` CLI which assume you have already started the SolrCloud example (`bin/solr -e cloud -noprompt`) If you are on Windows machine, simply replace `zkcli.sh` with `zkcli.bat` in these examples. -[[CommandLineUtilities-Uploadaconfigurationdirectory]] -=== Upload a configuration directory +=== Upload a Configuration Directory [source,bash] ---- ./server/scripts/cloud-scripts/zkcli.sh -zkhost 127.0.0.1:9983 -cmd upconfig -confname my_new_config -confdir server/solr/configsets/_default/conf ---- -[[CommandLineUtilities-BootstrapZooKeeperfromexistingSOLR_HOME]] -=== Bootstrap ZooKeeper from existing SOLR_HOME +=== Bootstrap ZooKeeper from an Existing solr.home [source,bash] ---- @@ -120,32 +116,28 @@ If you are on Windows machine, simply replace `zkcli.sh` with `zkcli.bat` in the Using the boostrap command with a zookeeper chroot in the `-zkhost` parameter, e.g. `-zkhost 127.0.0.1:2181/solr`, will automatically create the chroot path before uploading the configs. ==== -[[CommandLineUtilities-PutarbitrarydataintoanewZooKeeperfile]] -=== Put arbitrary data into a new ZooKeeper file +=== Put Arbitrary Data into a New ZooKeeper file [source,bash] ---- ./server/scripts/cloud-scripts/zkcli.sh -zkhost 127.0.0.1:9983 -cmd put /my_zk_file.txt 'some data' ---- -[[CommandLineUtilities-PutalocalfileintoanewZooKeeperfile]] -=== Put a local file into a new ZooKeeper file +=== Put a Local File into a New ZooKeeper File [source,bash] ---- ./server/scripts/cloud-scripts/zkcli.sh -zkhost 127.0.0.1:9983 -cmd putfile /my_zk_file.txt /tmp/my_local_file.txt ---- -[[CommandLineUtilities-Linkacollectiontoaconfigurationset]] -=== Link a collection to a configuration set +=== Link a Collection to a ConfigSet [source,bash] ---- ./server/scripts/cloud-scripts/zkcli.sh -zkhost 127.0.0.1:9983 -cmd linkconfig -collection gettingstarted -confname my_new_config ---- -[[CommandLineUtilities-CreateanewZooKeeperpath]] -=== Create a new ZooKeeper path +=== Create a New ZooKeeper Path This can be useful to create a chroot path in ZooKeeper before first cluster start. @@ -154,13 +146,11 @@ This can be useful to create a chroot path in ZooKeeper before first cluster sta ./server/scripts/cloud-scripts/zkcli.sh -zkhost 127.0.0.1:2181 -cmd makepath /solr ---- - -[[CommandLineUtilities-Setaclusterproperty]] -=== Set a cluster property +=== Set a Cluster Property This command will add or modify a single cluster property in `clusterprops.json`. Use this command instead of the usual getfile \-> edit \-> putfile cycle. -Unlike the CLUSTERPROP command on the <>, this command does *not* require a running Solr cluster. +Unlike the CLUSTERPROP command on the <>, this command does *not* require a running Solr cluster. [source,bash] ---- diff --git a/solr/solr-ref-guide/src/common-query-parameters.adoc b/solr/solr-ref-guide/src/common-query-parameters.adoc index 826cbe2cb36..1eea0807d46 100644 --- a/solr/solr-ref-guide/src/common-query-parameters.adoc +++ b/solr/solr-ref-guide/src/common-query-parameters.adoc @@ -20,7 +20,7 @@ Several query parsers share supported query parameters. -The table below summarizes Solr's common query parameters, which are supported by the <> +The table below summarizes Solr's common query parameters, which are supported by the <> // TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed @@ -249,7 +249,7 @@ As this check is periodically performed, the actual time for which a request can This parameter may be set to either true or false. -If set to true, and if <> for this collection is a {solr-javadocs}/solr-core/org/apache/solr/index/SortingMergePolicyFactory.html[`SortingMergePolicyFactory`] which uses a `sort` option which is compatible with <> specified for this query, then Solr will attempt to use an {lucene-javadocs}/core/org/apache/lucene/search/EarlyTerminatingSortingCollector.html[`EarlyTerminatingSortingCollector`]. +If set to true, and if <> for this collection is a {solr-javadocs}/solr-core/org/apache/solr/index/SortingMergePolicyFactory.html[`SortingMergePolicyFactory`] which uses a `sort` option which is compatible with <> specified for this query, then Solr will attempt to use an {lucene-javadocs}/core/org/apache/lucene/search/EarlyTerminatingSortingCollector.html[`EarlyTerminatingSortingCollector`]. If early termination is used, a `segmentTerminatedEarly` header will be included in the `responseHeader`. diff --git a/solr/solr-ref-guide/src/config-api.adoc b/solr/solr-ref-guide/src/config-api.adoc index 8f2d23b806d..00db2815f64 100644 --- a/solr/solr-ref-guide/src/config-api.adoc +++ b/solr/solr-ref-guide/src/config-api.adoc @@ -24,15 +24,13 @@ This feature is enabled by default and works similarly in both SolrCloud and sta When using this API, `solrconfig.xml` is not changed. Instead, all edited configuration is stored in a file called `configoverlay.json`. The values in `configoverlay.json` override the values in `solrconfig.xml`. -[[ConfigAPI-APIEntryPoints]] -== API Entry Points +== Config API Entry Points * `/config`: retrieve or modify the config. GET to retrieve and POST for executing commands * `/config/overlay`: retrieve the details in the `configoverlay.json` alone * `/config/params` : allows creating parameter sets that can override or take the place of parameters defined in `solrconfig.xml`. See the <> section for more details. -[[ConfigAPI-Retrievingtheconfig]] -== Retrieving the config +== Retrieving the Config All configuration items, can be retrieved by sending a GET request to the `/config` endpoint - the results will be the effective configuration resulting from merging settings in `configoverlay.json` with those in `solrconfig.xml`: @@ -55,18 +53,16 @@ To further restrict returned results to a single component within a top level se curl http://localhost:8983/solr/techproducts/config/requestHandler?componentName=/select ---- -[[ConfigAPI-Commandstomodifytheconfig]] -== Commands to modify the config +== Commands to Modify the Config This API uses specific commands to tell Solr what property or type of property to add to `configoverlay.json`. The commands are passed as part of the data sent with the request. The config commands are categorized into 3 different sections which manipulate various data structures in `solrconfig.xml`. Each of these is described below. -* <> -* <> -* <> +* <> +* <> +* <> -[[ConfigAPI-CommandsforCommonProperties]] === Commands for Common Properties The common properties are those that are frequently need to be customized in a Solr instance. They are manipulated with two commands: @@ -120,7 +116,6 @@ The properties that are configured with these commands are predefined and listed * `requestDispatcher.requestParsers.enableStreamBody` * `requestDispatcher.requestParsers.addHttpRequestToContext` -[[ConfigAPI-CommandsforCustomHandlersandLocalComponents]] === Commands for Custom Handlers and Local Components Custom request handlers, search components, and other types of localized Solr components (such as custom query parsers, update processors, etc.) can be added, updated and deleted with specific commands for the component being modified. @@ -133,7 +128,6 @@ Settings removed from `configoverlay.json` are not removed from `solrconfig.xml` The full list of available commands follows below: -[[ConfigAPI-GeneralPurposeCommands]] ==== General Purpose Commands These commands are the most commonly used: @@ -151,7 +145,6 @@ These commands are the most commonly used: * `update-queryresponsewriter` * `delete-queryresponsewriter` -[[ConfigAPI-AdvancedCommands]] ==== Advanced Commands These commands allow registering more advanced customizations to Solr: @@ -179,9 +172,8 @@ These commands allow registering more advanced customizations to Solr: * `update-runtimelib` * `delete-runtimelib` -See the section <> below for examples of using these commands. +See the section <> below for examples of using these commands. -[[ConfigAPI-Whatabout_updateRequestProcessorChain_]] ==== What about updateRequestProcessorChain? The Config API does not let you create or edit `updateRequestProcessorChain` elements. However, it is possible to create `updateProcessor` entries and can use them by name to create a chain. @@ -198,7 +190,6 @@ curl http://localhost:8983/solr/techproducts/config -H 'Content-type:application You can use this directly in your request by adding a parameter in the `updateRequestProcessorChain` for the specific update processor called `processor=firstFld`. -[[ConfigAPI-CommandsforUser-DefinedProperties]] === Commands for User-Defined Properties Solr lets users templatize the `solrconfig.xml` using the place holder format `${variable_name:default_val}`. You could set the values using system properties, for example, `-Dvariable_name= my_customvalue`. The same can be achieved during runtime using these commands: @@ -208,11 +199,10 @@ Solr lets users templatize the `solrconfig.xml` using the place holder format `$ The structure of the request is similar to the structure of requests using other commands, in the format of `"command":{"variable_name":"property_value"}`. You can add more than one variable at a time if necessary. -For more information about user-defined properties, see the section <>. +For more information about user-defined properties, see the section <>. -See also the section <> below for examples of how to use this type of command. +See also the section <> below for examples of how to use this type of command. -[[ConfigAPI-HowtoMapsolrconfig.xmlPropertiestoJSON]] == How to Map solrconfig.xml Properties to JSON By using this API, you will be generating JSON representations of properties defined in `solrconfig.xml`. To understand how properties should be represented with the API, let's take a look at a few examples. @@ -364,15 +354,12 @@ Define the same properties with the Config API: } ---- -[[ConfigAPI-NameComponentsfortheConfigAPI]] === Name Components for the Config API The Config API always allows changing the configuration of any component by name. However, some configurations such as `listener` or `initParams` do not require a name in `solrconfig.xml`. In order to be able to `update` and `delete` of the same item in `configoverlay.json`, the name attribute becomes mandatory. -[[ConfigAPI-Examples]] -== Examples +== Config API Examples -[[ConfigAPI-CreatingandUpdatingCommonProperties]] === Creating and Updating Common Properties This change sets the `query.filterCache.autowarmCount` to 1000 items and unsets the `query.filterCache.size`. @@ -403,7 +390,6 @@ And you should get a response like this: "size":25}}}}} ---- -[[ConfigAPI-CreatingandUpdatingRequestHandlers]] === Creating and Updating Request Handlers To create a request handler, we can use the `add-requesthandler` command: @@ -471,7 +457,6 @@ curl http://localhost:8983/solr/techproducts/config -H 'Content-type:application }' ---- -[[ConfigAPI-CreatingandUpdatingUser-DefinedProperties]] === Creating and Updating User-Defined Properties This command sets a user property. @@ -507,14 +492,12 @@ To unset the variable, issue a command like this: curl http://localhost:8983/solr/techproducts/config -H'Content-type:application/json' -d '{"unset-user-property" : "variable_name"}' ---- -[[ConfigAPI-HowItWorks]] -== How It Works +== How the Config API Works Every core watches the ZooKeeper directory for the configset being used with that core. In standalone mode, however, there is no watch (because ZooKeeper is not running). If there are multiple cores in the same node using the same configset, only one ZooKeeper watch is used. For instance, if the configset 'myconf' is used by a core, the node would watch `/configs/myconf`. Every write operation performed through the API would 'touch' the directory (sets an empty byte[] to trigger watches) and all watchers are notified. Every core would check if the Schema file, `solrconfig.xml` or `configoverlay.json` is modified by comparing the `znode` versions and if modified, the core is reloaded. If `params.json` is modified, the params object is just updated without a core reload (see the section <> for more information about `params.json`). -[[ConfigAPI-EmptyCommand]] === Empty Command If an empty command is sent to the `/config` endpoint, the watch is triggered on all cores using this configset. For example: @@ -528,7 +511,6 @@ Directly editing any files without 'touching' the directory *will not* make it v It is possible for components to watch for the configset 'touch' events by registering a listener using `SolrCore#registerConfListener()` . -[[ConfigAPI-ListeningtoconfigChanges]] === Listening to config Changes Any component can register a listener using: diff --git a/solr/solr-ref-guide/src/configsets-api.adoc b/solr/solr-ref-guide/src/configsets-api.adoc index 603e08e097e..cae9b132961 100644 --- a/solr/solr-ref-guide/src/configsets-api.adoc +++ b/solr/solr-ref-guide/src/configsets-api.adoc @@ -1,6 +1,7 @@ = ConfigSets API :page-shortname: configsets-api :page-permalink: configsets-api.html +:page-toclevels: 1 // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information @@ -24,45 +25,40 @@ To use a ConfigSet created with this API as the configuration for a collection, This API can only be used with Solr running in SolrCloud mode. If you are not running Solr in SolrCloud mode but would still like to use shared configurations, please see the section <>. -[[ConfigSetsAPI-APIEntryPoints]] -== API Entry Points +== ConfigSets API Entry Points The base URL for all API calls is `\http://:/solr`. -* `/admin/configs?action=CREATE`: <> a ConfigSet, based on an existing ConfigSet -* `/admin/configs?action=DELETE`: <> a ConfigSet -* `/admin/configs?action=LIST`: <> all ConfigSets -* `/admin/configs?action=UPLOAD`: <> a ConfigSet +* `/admin/configs?action=CREATE`: <> a ConfigSet, based on an existing ConfigSet +* `/admin/configs?action=DELETE`: <> a ConfigSet +* `/admin/configs?action=LIST`: <> all ConfigSets +* `/admin/configs?action=UPLOAD`: <> a ConfigSet -[[ConfigSetsAPI-createCreateaConfigSet]] - -[[ConfigSetsAPI-create]] +[[configsets-create]] == Create a ConfigSet `/admin/configs?action=CREATE&name=_name_&baseConfigSet=_baseConfigSet_` Create a ConfigSet, based on an existing ConfigSet. -[[ConfigSetsAPI-Input]] -=== Input +=== Create ConfigSet Parameters The following parameters are supported when creating a ConfigSet. -name:: The ConfigSet to be created. This parameter is required. +name:: +The ConfigSet to be created. This parameter is required. -baseConfigSet:: The ConfigSet to copy as a base. This parameter is required. +baseConfigSet:: +The ConfigSet to copy as a base. This parameter is required. -configSetProp._name_=_value_:: Any ConfigSet property from base to override. +configSetProp._name_=_value_:: +Any ConfigSet property from base to override. -[[ConfigSetsAPI-Output]] -=== Output +=== Create ConfigSet Response -*Output Content* +The response will include the status of the request. If the status is anything other than "success", an error message will explain why the request failed. -The output will include the status of the request. If the status is anything other than "success", an error message will explain why the request failed. - -[[ConfigSetsAPI-Examples]] -=== Examples +=== Create ConfigSet Examples *Input* @@ -85,31 +81,23 @@ http://localhost:8983/solr/admin/configs?action=CREATE&name=myConfigSet&baseConf ---- -[[ConfigSetsAPI-deleteDeleteaConfigSet]] - -[[ConfigSetsAPI-delete]] +[[configsets-delete]] == Delete a ConfigSet `/admin/configs?action=DELETE&name=_name_` Delete a ConfigSet -[[ConfigSetsAPI-Input.1]] -=== Input +=== Delete ConfigSet Parameters -*Query Parameters* +name:: +The ConfigSet to be deleted. This parameter is required. -name:: The ConfigSet to be deleted. This parameter is required. - -[[ConfigSetsAPI-Output.1]] -=== Output - -*Output Content* +=== Delete ConfigSet Response The output will include the status of the request. If the status is anything other than "success", an error message will explain why the request failed. -[[ConfigSetsAPI-Examples.1]] -=== Examples +=== Delete ConfigSet Examples *Input* @@ -132,15 +120,14 @@ http://localhost:8983/solr/admin/configs?action=DELETE&name=myConfigSet ---- -[[ConfigSetsAPI-list]] +[[configsets-list]] == List ConfigSets `/admin/configs?action=LIST` Fetch the names of the ConfigSets in the cluster. -[[ConfigSetsAPI-Examples.2]] -=== Examples +=== List ConfigSet Examples *Input* @@ -161,7 +148,7 @@ http://localhost:8983/solr/admin/configs?action=LIST&wt=json "myConfig2"]} ---- -[[ConfigSetsAPI-upload]] +[[configsets-upload]] == Upload a ConfigSet `/admin/configs?action=UPLOAD&name=_name_` @@ -173,22 +160,18 @@ Upload a ConfigSet, sent in as a zipped file. Please note that a ConfigSet is up * XSLT transformer (tr parameter) cannot be used at request processing time. * StatelessScriptUpdateProcessor does not initialize, if specified in the ConfigSet. -[[ConfigSetsAPI-Input.3]] -=== Input +=== Upload ConfigSet Parameters -name:: The ConfigSet to be created when the upload is complete. This parameter is required. +name:: +The ConfigSet to be created when the upload is complete. This parameter is required. The body of the request should contain a zipped config set. -[[ConfigSetsAPI-Output.3]] -=== Output - -*Output Content* +=== Upload ConfigSet Response The output will include the status of the request. If the status is anything other than "success", an error message will explain why the request failed. -[[ConfigSetsAPI-Examples.3]] -=== Examples +=== Upload ConfigSet Examples Create a ConfigSet named 'myConfigSet' based on a 'predefinedTemplate' ConfigSet, overriding the immutable property to false. diff --git a/solr/solr-ref-guide/src/configuring-logging.adoc b/solr/solr-ref-guide/src/configuring-logging.adoc index 7e22f387f86..05a6c7465ef 100644 --- a/solr/solr-ref-guide/src/configuring-logging.adoc +++ b/solr/solr-ref-guide/src/configuring-logging.adoc @@ -25,7 +25,6 @@ Solr logs are a key way to know what's happening in the system. There are severa In addition to the logging options described below, there is a way to configure which request parameters (such as parameters sent as part of queries) are logged with an additional request parameter called `logParamsList`. See the section on <> for more information. ==== -[[ConfiguringLogging-TemporaryLoggingSettings]] == Temporary Logging Settings You can control the amount of logging output in Solr by using the Admin Web interface. Select the *LOGGING* link. Note that this page only lets you change settings in the running system and is not saved for the next run. (For more information about the Admin Web interface, see <>.) @@ -59,7 +58,6 @@ Log levels settings are as follows: Multiple settings at one time are allowed. -[[ConfiguringLogging-LoglevelAPI]] === Log level API There is also a way of sending REST commands to the logging endpoint to do the same. Example: @@ -70,7 +68,6 @@ There is also a way of sending REST commands to the logging endpoint to do the s curl -s http://localhost:8983/solr/admin/info/logging --data-binary "set=root:WARN&wt=json" ---- -[[ConfiguringLogging-ChoosingLogLevelatStartup]] == Choosing Log Level at Startup You can temporarily choose a different logging level as you start Solr. There are two ways: @@ -87,7 +84,6 @@ bin/solr start -f -v bin/solr start -f -q ---- -[[ConfiguringLogging-PermanentLoggingSettings]] == Permanent Logging Settings Solr uses http://logging.apache.org/log4j/1.2/[Log4J version 1.2] for logging which is configured using `server/resources/log4j.properties`. Take a moment to inspect the contents of the `log4j.properties` file so that you are familiar with its structure. By default, Solr log messages will be written to `SOLR_LOGS_DIR/solr.log`. @@ -109,7 +105,6 @@ On every startup of Solr, the start script will clean up old logs and rotate the You can disable the automatic log rotation at startup by changing the setting `SOLR_LOG_PRESTART_ROTATION` found in `bin/solr.in.sh` or `bin/solr.in.cmd` to false. -[[ConfiguringLogging-LoggingSlowQueries]] == Logging Slow Queries For high-volume search applications, logging every query can generate a large amount of logs and, depending on the volume, potentially impact performance. If you mine these logs for additional insights into your application, then logging every query request may be useful. diff --git a/solr/solr-ref-guide/src/configuring-solrconfig-xml.adoc b/solr/solr-ref-guide/src/configuring-solrconfig-xml.adoc index 48f9084452e..7888e29a8af 100644 --- a/solr/solr-ref-guide/src/configuring-solrconfig-xml.adoc +++ b/solr/solr-ref-guide/src/configuring-solrconfig-xml.adoc @@ -51,14 +51,12 @@ We've covered the options in the following sections: * <> * <> -[[Configuringsolrconfig.xml-SubstitutingPropertiesinSolrConfigFiles]] == Substituting Properties in Solr Config Files Solr supports variable substitution of property values in config files, which allows runtime specification of various configuration options in `solrconfig.xml`. The syntax is `${propertyname[:option default value]`}. This allows defining a default that can be overridden when Solr is launched. If a default value is not specified, then the property _must_ be specified at runtime or the configuration file will generate an error when parsed. There are multiple methods for specifying properties that can be used in configuration files. Of those below, strongly consider "config overlay" as the preferred approach, as it stays local to the config set and because it's easy to modify. -[[Configuringsolrconfig.xml-JVMSystemProperties]] === JVM System Properties Any JVM System properties, usually specified using the `-D` flag when starting the JVM, can be used as variables in any XML configuration file in Solr. @@ -79,8 +77,7 @@ bin/solr start -Dsolr.lock.type=none In general, any Java system property that you want to set can be passed through the `bin/solr` script using the standard `-Dproperty=value` syntax. Alternatively, you can add common system properties to the `SOLR_OPTS` environment variable defined in the Solr include file (`bin/solr.in.sh` or `bin/solr.in.cmd`). For more information about how the Solr include file works, refer to: <>. -[[Configuringsolrconfig.xml-ConfigAPI]] -=== Config API +=== Config API to Override solrconfig.xml The <> allows you to use an API to modify Solr's configuration, specifically user defined properties. Changes made with this API are stored in a file named `configoverlay.json`. This file should only be edited with the API, but will look like this example: @@ -94,7 +91,6 @@ The <> allows you to use an API to modify For more details, see the section <>. -[[Configuringsolrconfig.xml-solrcore.properties]] === solrcore.properties If the configuration directory for a Solr core contains a file named `solrcore.properties` that file can contain any arbitrary user defined property names and values using the Java standard https://en.wikipedia.org/wiki/.properties[properties file format], and those properties can be used as variables in the XML configuration files for that Solr core. @@ -120,7 +116,6 @@ The path and name of the `solrcore.properties` file can be overridden using the ==== -[[Configuringsolrconfig.xml-Userdefinedpropertiesfromcore.properties]] === User-Defined Properties in core.properties Every Solr core has a `core.properties` file, automatically created when using the APIs. When you create a SolrCloud collection, you can pass through custom parameters to go into each core.properties that will be created, by prefixing the parameter name with "property." as a URL parameter. Example: @@ -148,7 +143,6 @@ The `my.custom.prop` property can then be used as a variable, such as in `solrco ---- -[[Configuringsolrconfig.xml-ImplicitCoreProperties]] === Implicit Core Properties Several attributes of a Solr core are available as "implicit" properties that can be used in variable substitution, independent of where or how they underlying value is initialized. For example: regardless of whether the name for a particular Solr core is explicitly configured in `core.properties` or inferred from the name of the instance directory, the implicit property `solr.core.name` is available for use as a variable in that core's configuration file... diff --git a/solr/solr-ref-guide/src/content-streams.adoc b/solr/solr-ref-guide/src/content-streams.adoc index f5f01a84f27..c4fc19427a1 100644 --- a/solr/solr-ref-guide/src/content-streams.adoc +++ b/solr/solr-ref-guide/src/content-streams.adoc @@ -22,8 +22,7 @@ Content streams are bulk data passed with a request to Solr. When Solr RequestHandlers are accessed using path based URLs, the `SolrQueryRequest` object containing the parameters of the request may also contain a list of ContentStreams containing bulk data for the request. (The name SolrQueryRequest is a bit misleading: it is involved in all requests, regardless of whether it is a query request or an update request.) -[[ContentStreams-StreamSources]] -== Stream Sources +== Content Stream Sources Currently request handlers can get content streams in a variety of ways: @@ -34,7 +33,6 @@ Currently request handlers can get content streams in a variety of ways: By default, curl sends a `contentType="application/x-www-form-urlencoded"` header. If you need to test a SolrContentHeader content stream, you will need to set the content type with curl's `-H` flag. -[[ContentStreams-RemoteStreaming]] == RemoteStreaming Remote streaming lets you send the contents of a URL as a stream to a given SolrRequestHandler. You could use remote streaming to send a remote or local file to an update plugin. @@ -65,10 +63,9 @@ curl -d ' [IMPORTANT] ==== -If `enableRemoteStreaming="true"` is used, be aware that this allows _anyone_ to send a request to any URL or local file. If <> is enabled, it will allow anyone to view any file on your system. +If `enableRemoteStreaming="true"` is used, be aware that this allows _anyone_ to send a request to any URL or local file. If the <> is enabled, it will allow anyone to view any file on your system. ==== -[[ContentStreams-DebuggingRequests]] == Debugging Requests The implicit "dump" RequestHandler (see <>) simply outputs the contents of the SolrQueryRequest using the specified writer type `wt`. This is a useful tool to help understand what streams are available to the RequestHandlers. diff --git a/solr/solr-ref-guide/src/coreadmin-api.adoc b/solr/solr-ref-guide/src/coreadmin-api.adoc index a4a8ea9166e..67587015b0b 100644 --- a/solr/solr-ref-guide/src/coreadmin-api.adoc +++ b/solr/solr-ref-guide/src/coreadmin-api.adoc @@ -29,7 +29,7 @@ CoreAdmin actions can be executed by via HTTP requests that specify an `action` All action names are uppercase, and are defined in depth in the sections below. -[[CoreAdminAPI-STATUS]] +[[coreadmin-status]] == STATUS The `STATUS` action returns the status of all running Solr cores, or status for only the named core. @@ -44,7 +44,7 @@ The name of a core, as listed in the "name" attribute of a `` element in ` `indexInfo`:: If `false`, information about the index will not be returned with a core STATUS request. In Solr implementations with a large number of cores (i.e., more than hundreds), retrieving the index information for each core can take a lot of time and isn't always required. The default is `true`. -[[CoreAdminAPI-CREATE]] +[[coreadmin-create]] == CREATE The `CREATE` action creates a new core and registers it. @@ -102,7 +102,7 @@ WARNING: While it's possible to create a core for a non-existent collection, thi The shard id this core represents. Normally you want to be auto-assigned a shard id. `property._name_=_value_`:: -Sets the core property _name_ to _value_. See the section on defining <>. +Sets the core property _name_ to _value_. See the section on defining <>. `async`:: Request ID to track this action which will be processed asynchronously. @@ -115,7 +115,7 @@ Use `collection.configName=_configname_` to point to the config for a new collec http://localhost:8983/solr/admin/cores?action=CREATE&name=my_core&collection=my_collection&shard=shard2 -[[CoreAdminAPI-RELOAD]] +[[coreadmin-reload]] == RELOAD The RELOAD action loads a new core from the configuration of an existing, registered Solr core. While the new core is initializing, the existing one will continue to handle requests. When the new Solr core is ready, it takes over and the old core is unloaded. @@ -134,7 +134,7 @@ RELOAD performs "live" reloads of SolrCore, reusing some existing objects. Some `core`:: The name of the core, as listed in the "name" attribute of a `` element in `solr.xml`. This parameter is required. -[[CoreAdminAPI-RENAME]] +[[coreadmin-rename]] == RENAME The `RENAME` action changes the name of a Solr core. @@ -153,7 +153,7 @@ The new name for the Solr core. If the persistent attribute of `` is `true Request ID to track this action which will be processed asynchronously. -[[CoreAdminAPI-SWAP]] +[[coreadmin-swap]] == SWAP `SWAP` atomically swaps the names used to access two existing Solr cores. This can be used to swap new content into production. The prior core remains available and can be swapped back, if necessary. Each core will be known by the name of the other, after the swap. @@ -162,9 +162,7 @@ Request ID to track this action which will be processed asynchronously. [IMPORTANT] ==== - Do not use `SWAP` with a SolrCloud node. It is not supported and can result in the core being unusable. - ==== === SWAP Parameters @@ -179,7 +177,7 @@ The name of one of the cores to be swapped. This parameter is required. Request ID to track this action which will be processed asynchronously. -[[CoreAdminAPI-UNLOAD]] +[[coreadmin-unload]] == UNLOAD The `UNLOAD` action removes a core from Solr. Active requests will continue to be processed, but no new requests will be sent to the named core. If a core is registered under more than one name, only the given name is removed. @@ -210,8 +208,7 @@ If `true`, removes everything related to the core, including the index directory `async`:: Request ID to track this action which will be processed asynchronously. - -[[CoreAdminAPI-MERGEINDEXES]] +[[coreadmin-mergeindexes]] == MERGEINDEXES The `MERGEINDEXES` action merges one or more indexes to another index. The indexes must have completed commits, and should be locked against writes until the merge is complete or the resulting merged index may become corrupted. The target core index must already exist and have a compatible schema with the one or more indexes that will be merged to it. Another commit on the target core should also be performed after the merge is complete. @@ -243,7 +240,7 @@ Multi-valued, source cores that would be merged. Request ID to track this action which will be processed asynchronously -[[CoreAdminAPI-SPLIT]] +[[coreadmin-split]] == SPLIT The `SPLIT` action splits an index into two or more indexes. The index being split can continue to handle requests. The split pieces can be placed into a specified directory on the server's filesystem or it can be merged into running Solr cores. @@ -270,7 +267,6 @@ The key to be used for splitting the index. If this parameter is used, `ranges` `async`:: Request ID to track this action which will be processed asynchronously. - === SPLIT Examples The `core` index will be split into as many pieces as the number of `path` or `targetCore` parameters. @@ -305,9 +301,9 @@ This example uses the `ranges` parameter with hash ranges 0-500, 501-1000 and 10 The `targetCore` must already exist and must have a compatible schema with the `core` index. A commit is automatically called on the `core` index before it is split. -This command is used as part of the <> command but it can be used for non-cloud Solr cores as well. When used against a non-cloud core without `split.key` parameter, this action will split the source index and distribute its documents alternately so that each split piece contains an equal number of documents. If the `split.key` parameter is specified then only documents having the same route key will be split from the source index. +This command is used as part of the <> command but it can be used for non-cloud Solr cores as well. When used against a non-cloud core without `split.key` parameter, this action will split the source index and distribute its documents alternately so that each split piece contains an equal number of documents. If the `split.key` parameter is specified then only documents having the same route key will be split from the source index. -[[CoreAdminAPI-REQUESTSTATUS]] +[[coreadmin-requeststatus]] == REQUESTSTATUS Request the status of an already submitted asynchronous CoreAdmin API call. @@ -326,7 +322,7 @@ The call below will return the status of an already submitted asynchronous CoreA [source,bash] http://localhost:8983/solr/admin/cores?action=REQUESTSTATUS&requestid=1 -[[CoreAdminAPI-REQUESTRECOVERY]] +[[coreadmin-requestrecovery]] == REQUESTRECOVERY The `REQUESTRECOVERY` action manually asks a core to recover by synching with the leader. This should be considered an "expert" level command and should be used in situations where the node (SorlCloud replica) is unable to become active automatically. @@ -338,7 +334,6 @@ The `REQUESTRECOVERY` action manually asks a core to recover by synching with th `core`:: The name of the core to re-sync. This parameter is required. -[[CoreAdminAPI-Examples.1]] === REQUESTRECOVERY Examples [source,bash] diff --git a/solr/solr-ref-guide/src/cross-data-center-replication-cdcr.adoc b/solr/solr-ref-guide/src/cross-data-center-replication-cdcr.adoc index bffa71ff873..50d4396230d 100644 --- a/solr/solr-ref-guide/src/cross-data-center-replication-cdcr.adoc +++ b/solr/solr-ref-guide/src/cross-data-center-replication-cdcr.adoc @@ -140,8 +140,6 @@ The CDCR replication logic requires modification to the maintenance logic of the If the communication with one of the target data center is slow, the Updates Log on the source data center can grow to a substantial size. In such a scenario, it is necessary for the Updates Log to be able to efficiently find a given update operation given its identifier. Given that its identifier is an incremental number, it is possible to implement an efficient search strategy. Each transaction log file contains as part of its filename the version number of the first element. This is used to quickly traverse all the transaction log files and find the transaction log file containing one specific version number. - -[[CrossDataCenterReplication_CDCR_-Monitoring]] === Monitoring CDCR provides the following monitoring capabilities over the replication operations: @@ -155,24 +153,19 @@ Information about the lifecycle and statistics will be provided on a per-shard b The CDC Replicator is a background thread that is responsible for replicating updates from a Source data center to one or more target data centers. It is responsible in providing monitoring information on a per-shard basis. As there can be a large number of collections and shards in a cluster, we will use a fixed-size pool of CDC Replicator threads that will be shared across shards. - -[[CrossDataCenterReplication_CDCR_-Limitations]] -=== Limitations +=== CDCR Limitations The current design of CDCR has some limitations. CDCR will continue to evolve over time and many of these limitations will be addressed. Among them are: * CDCR is unlikely to be satisfactory for bulk-load situations where the update rate is high, especially if the bandwidth between the Source and target clusters is restricted. In this scenario, the initial bulk load should be performed, the Source and target data centers synchronized and CDCR be utilized for incremental updates. * CDCR is currently only active-passive; data is pushed from the Source cluster to the target cluster. There is active work being done in this area in the 6x code line to remove this limitation. * CDCR works most robustly with the same number of shards in the Source and target collection. The shards in the two collections may have different numbers of replicas. +* Running CDCR with the indexes on HDFS is not currently supported, see the https://issues.apache.org/jira/browse/SOLR-9861[Solr CDCR over HDFS] JIRA issue. - -[[CrossDataCenterReplication_CDCR_-Configuration]] -== Configuration +== CDCR Configuration The source and target configurations differ in the case of the data centers being in separate clusters. "Cluster" here means separate ZooKeeper ensembles controlling disjoint Solr instances. Whether these data centers are physically separated or not is immaterial for this discussion. - -[[CrossDataCenterReplication_CDCR_-SourceConfiguration]] === Source Configuration Here is a sample of a source configuration file, a section in `solrconfig.xml`. The presence of the section causes CDCR to use this cluster as the Source and should not be present in the target collections in the cluster-to-cluster case. Details about each setting are after the two examples: @@ -211,8 +204,6 @@ Here is a sample of a source configuration file, a section in `solrconfig.xml`. ---- - -[[CrossDataCenterReplication_CDCR_-TargetConfiguration]] === Target Configuration Here is a typical target configuration. @@ -256,7 +247,6 @@ The configuration details, defaults and options are as follows: CDCR can be configured to forward update requests to one or more replicas. A replica is defined with a “replica” list as follows: - `zkHost`:: The host address for ZooKeeper of the target SolrCloud. Usually this is a comma-separated list of addresses to each node in the target ZooKeeper ensemble. This parameter is required. @@ -303,41 +293,27 @@ Monitor actions are performed at a core level, i.e., by using the following base Currently, none of the CDCR API calls have parameters. - === API Entry Points (Control) -* `/cdcr?action=STATUS`: <> of CDCR. -* `/cdcr?action=START`: <> replication -* `/cdcr?action=STOP`: <> replication. -* `/cdcr?action=ENABLEBUFFER`: <> of updates. -* `/cdcr?action=DISABLEBUFFER`: <> of updates. - +* `/cdcr?action=STATUS`: <> of CDCR. +* `/cdcr?action=START`: <> replication +* `/cdcr?action=STOP`: <> replication. +* `/cdcr?action=ENABLEBUFFER`: <> of updates. +* `/cdcr?action=DISABLEBUFFER`: <> of updates. === API Entry Points (Monitoring) -* `core/cdcr?action=QUEUES`: <> for each replica and about the update logs. -* `core/cdcr?action=OPS`: <> (operations per second) for each replica. -* `core/cdcr?action=ERRORS`: <> for each replica. +* `core/cdcr?action=QUEUES`: <> for each replica and about the update logs. +* `core/cdcr?action=OPS`: <> (operations per second) for each replica. +* `core/cdcr?action=ERRORS`: <> for each replica. === Control Commands -[[CrossDataCenterReplication_CDCR_-STATUS]] -==== STATUS +==== CDCR STATUS `/collection/cdcr?action=STATUS` -===== Input - -*Query Parameters:* There are no parameters to this command. - -===== Output - -*Output Content* - -The current state of the CDCR, which includes the state of the replication process and the state of the buffer. - -[[cdcr_examples]] -===== Examples +===== CDCR Status Example *Input* @@ -362,22 +338,15 @@ The current state of the CDCR, which includes the state of the replication proce } ---- -[[CrossDataCenterReplication_CDCR_-ENABLEBUFFER]] ==== ENABLEBUFFER `/collection/cdcr?action=ENABLEBUFFER` -===== Input +===== Enable Buffer Response -*Query Parameters:* There are no parameters to this command. +The status of the process and an indication of whether the buffer is enabled. -===== Output - -*Output Content* - -The status of the process and an indication of whether the buffer is enabled - -===== Examples +===== Enable Buffer Example *Input* @@ -402,20 +371,15 @@ The status of the process and an indication of whether the buffer is enabled } ---- -[[CrossDataCenterReplication_CDCR_-DISABLEBUFFER]] ==== DISABLEBUFFER `/collection/cdcr?action=DISABLEBUFFER` -===== Input +===== Disable Buffer Response -*Query Parameters:* There are no parameters to this command +The status of CDCR and an indication that the buffer is disabled. -===== Output - -*Output Content:* The status of CDCR and an indication that the buffer is disabled. - -===== Examples +===== Disable Buffer Example *Input* @@ -440,20 +404,15 @@ http://host:8983/solr//cdcr?action=DISABLEBUFFER } ---- -[[CrossDataCenterReplication_CDCR_-START]] -==== START +==== CDCR START `/collection/cdcr?action=START` -===== Input +===== CDCR Start Response -*Query Parameters:* There are no parameters for this action +Confirmation that CDCR is started and the status of buffering -===== Output - -*Output Content:* Confirmation that CDCR is started and the status of buffering - -===== Examples +===== CDCR Start Examples *Input* @@ -478,20 +437,15 @@ http://host:8983/solr//cdcr?action=START } ---- -[[CrossDataCenterReplication_CDCR_-STOP]] -==== STOP +==== CDCR STOP `/collection/cdcr?action=STOP` -===== Input +===== CDCR Stop Response -*Query Parameters:* There are no parameters for this command. +The status of CDCR, including the confirmation that CDCR is stopped. -===== Output - -*Output Content:* The status of CDCR, including the confirmation that CDCR is stopped - -===== Examples +===== CDCR Stop Examples *Input* @@ -517,19 +471,13 @@ http://host:8983/solr//cdcr?action=START ---- -[[CrossDataCenterReplication_CDCR_-Monitoringcommands]] -=== Monitoring commands +=== CDCR Monitoring Commands -[[CrossDataCenterReplication_CDCR_-QUEUES]] ==== QUEUES `/core/cdcr?action=QUEUES` -===== Input - -*Query Parameters:* There are no parameters for this command - -===== Output +===== QUEUES Response *Output Content* @@ -537,7 +485,7 @@ The output is composed of a list “queues” which contains a list of (ZooKeepe The “queues” object also contains information about the updates log, such as the size (in bytes) of the updates log on disk (“tlogTotalSize”), the number of transaction log files (“tlogTotalCount”) and the status of the updates log synchronizer (“updateLogSynchronizer”). -===== Examples +===== QUEUES Examples *Input* @@ -569,20 +517,15 @@ The “queues” object also contains information about the updates log, such as } ---- -[[CrossDataCenterReplication_CDCR_-OPS]] ==== OPS `/core/cdcr?action=OPS` -===== Input +===== OPS Response -*Query Parameters:* There are no parameters for this command. +The output is composed of `operationsPerSecond` which contains a list of (ZooKeeper) target hosts, themselves containing a list of target collections. For each collection, the average number of processed operations per second since the start of the replication process is provided. The operations are further broken down into two groups: add and delete operations. -===== Output - -*Output Content:* The output is composed of a list “operationsPerSecond” which contains a list of (ZooKeeper) target hosts, themselves containing a list of target collections. For each collection, the average number of processed operations per second since the start of the replication process is provided. The operations are further broken down into two groups: add and delete operations. - -===== Examples +===== OPS Examples *Input* @@ -612,20 +555,15 @@ The “queues” object also contains information about the updates log, such as } ---- -[[CrossDataCenterReplication_CDCR_-ERRORS]] ==== ERRORS `/core/cdcr?action=ERRORS` -===== Input +===== ERRORS Response -*Query Parameters:* There are no parameters for this command. +The output is composed of a list “errors” which contains a list of (ZooKeeper) target hosts, themselves containing a list of target collections. For each collection, information about errors encountered during the replication is provided, such as the number of consecutive errors encountered by the replicator thread, the number of bad requests or internal errors since the start of the replication process, and a list of the last errors encountered ordered by timestamp. -===== Output - -*Output Content:* The output is composed of a list “errors” which contains a list of (ZooKeeper) target hosts, themselves containing a list of target collections. For each collection, information about errors encountered during the replication is provided, such as the number of consecutive errors encountered by the replicator thread, the number of bad requests or internal errors since the start of the replication process, and a list of the last errors encountered ordered by timestamp. - -===== Examples +===== ERRORS Examples *Input* @@ -728,7 +666,6 @@ http://host:port/solr/collection_name/cdcr?action=DISABLEBUFFER + * Renable indexing -[[CrossDataCenterReplication_CDCR_-Monitoring.1]] == Monitoring . Network and disk space monitoring are essential. Ensure that the system has plenty of available storage to queue up changes if there is a disconnect between the Source and Target. A network outage between the two data centers can cause your disk usage to grow. @@ -763,8 +700,3 @@ curl http:///solr/cloud1/update -H 'Content-type:application/json' -d '[ #check the Target curl "http://:8983/solr//select?q=SKU:ABC&wt=json&indent=true" ---- - -[[CrossDataCenterReplication_CDCR_-Limitations.1]] -== Limitations - -* Running CDCR with the indexes on HDFS is not currently supported, see: https://issues.apache.org/jira/browse/SOLR-9861[Solr CDCR over HDFS]. diff --git a/solr/solr-ref-guide/src/datadir-and-directoryfactory-in-solrconfig.adoc b/solr/solr-ref-guide/src/datadir-and-directoryfactory-in-solrconfig.adoc index c68a3adf748..f3e8dc969ff 100644 --- a/solr/solr-ref-guide/src/datadir-and-directoryfactory-in-solrconfig.adoc +++ b/solr/solr-ref-guide/src/datadir-and-directoryfactory-in-solrconfig.adoc @@ -35,7 +35,6 @@ If you are using replication to replicate the Solr index (as described in <//data`. -[[DataDirandDirectoryFactoryinSolrConfig-SpecifyingtheDirectoryFactoryForYourIndex]] == Specifying the DirectoryFactory For Your Index The default {solr-javadocs}/solr-core/org/apache/solr/core/StandardDirectoryFactory.html[`solr.StandardDirectoryFactory`] is filesystem based, and tries to pick the best implementation for the current JVM and platform. You can force a particular implementation and/or config options by specifying {solr-javadocs}/solr-core/org/apache/solr/core/MMapDirectoryFactory.html[`solr.MMapDirectoryFactory`], {solr-javadocs}/solr-core/org/apache/solr/core/NIOFSDirectoryFactory.html[`solr.NIOFSDirectoryFactory`], or {solr-javadocs}/solr-core/org/apache/solr/core/SimpleFSDirectoryFactory.html[`solr.SimpleFSDirectoryFactory`]. @@ -57,7 +56,5 @@ The {solr-javadocs}/solr-core/org/apache/solr/core/RAMDirectoryFactory.html[`sol [NOTE] ==== - If you are using Hadoop and would like to store your indexes in HDFS, you should use the {solr-javadocs}/solr-core/org/apache/solr/core/HdfsDirectoryFactory.html[`solr.HdfsDirectoryFactory`] instead of either of the above implementations. For more details, see the section <>. - ==== diff --git a/solr/solr-ref-guide/src/dataimport-screen.adoc b/solr/solr-ref-guide/src/dataimport-screen.adoc index 363a2bd2a75..9f3cb43af80 100644 --- a/solr/solr-ref-guide/src/dataimport-screen.adoc +++ b/solr/solr-ref-guide/src/dataimport-screen.adoc @@ -23,7 +23,6 @@ The Dataimport screen shows the configuration of the DataImportHandler (DIH) and .The Dataimport Screen image::images/dataimport-screen/dataimport.png[image,width=485,height=250] - This screen also lets you adjust various options to control how the data is imported to Solr, and view the data import configuration file that controls the import. For more information about data importing with DIH, see the section on <>. diff --git a/solr/solr-ref-guide/src/de-duplication.adoc b/solr/solr-ref-guide/src/de-duplication.adoc index 3e9cd46a141..67f8d8cb192 100644 --- a/solr/solr-ref-guide/src/de-duplication.adoc +++ b/solr/solr-ref-guide/src/de-duplication.adoc @@ -26,7 +26,6 @@ Preventing duplicate or near duplicate documents from entering an index or taggi * Lookup3Signature: 64-bit hash used for exact duplicate detection. This is much faster than MD5 and smaller to index. * http://wiki.apache.org/solr/TextProfileSignature[TextProfileSignature]: Fuzzy hashing implementation from Apache Nutch for near duplicate detection. It's tunable but works best on longer text. - Other, more sophisticated algorithms for fuzzy/near hashing can be added later. [IMPORTANT] @@ -36,12 +35,10 @@ Adding in the de-duplication process will change the `allowDups` setting so that Of course the `signatureField` could be the unique field, but generally you want the unique field to be unique. When a document is added, a signature will automatically be generated and attached to the document in the specified `signatureField`. ==== -[[De-Duplication-ConfigurationOptions]] == Configuration Options There are two places in Solr to configure de-duplication: in `solrconfig.xml` and in `schema.xml`. -[[De-Duplication-Insolrconfig.xml]] === In solrconfig.xml The `SignatureUpdateProcessorFactory` has to be registered in `solrconfig.xml` as part of an <>, as in this example: @@ -84,8 +81,6 @@ Set to *false* to disable de-duplication processing. The default is *true*. overwriteDupes:: If true, the default, when a document exists that already matches this signature, it will be overwritten. - -[[De-Duplication-Inschema.xml]] === In schema.xml If you are using a separate field for storing the signature, you must have it indexed: diff --git a/solr/solr-ref-guide/src/defining-core-properties.adoc b/solr/solr-ref-guide/src/defining-core-properties.adoc index a533098609e..142432706ef 100644 --- a/solr/solr-ref-guide/src/defining-core-properties.adoc +++ b/solr/solr-ref-guide/src/defining-core-properties.adoc @@ -29,7 +29,6 @@ A minimal `core.properties` file looks like the example below. However, it can a name=my_core_name ---- -[[Definingcore.properties-Placementofcore.properties]] == Placement of core.properties Solr cores are configured by placing a file named `core.properties` in a sub-directory under `solr.home`. There are no a-priori limits to the depth of the tree, nor are there limits to the number of cores that can be defined. Cores may be anywhere in the tree with the exception that cores may _not_ be defined under an existing core. That is, the following is not allowed: @@ -61,11 +60,8 @@ Your `core.properties` file can be empty if necessary. Suppose `core.properties` You can run Solr without configuring any cores. ==== -[[Definingcore.properties-Definingcore.propertiesFiles]] == Defining core.properties Files -[[Definingcore.properties-core.properties_files]] - The minimal `core.properties` file is an empty file, in which case all of the properties are defaulted appropriately. Java properties files allow the hash (`#`) or bang (`!`) characters to specify comment-to-end-of-line. @@ -98,4 +94,4 @@ The following properties are available: `roles`:: Future parameter for SolrCloud or a way for users to mark nodes for their own use. -Additional user-defined properties may be specified for use as variables. For more information on how to define local properties, see the section <>. +Additional user-defined properties may be specified for use as variables. For more information on how to define local properties, see the section <>. diff --git a/solr/solr-ref-guide/src/defining-fields.adoc b/solr/solr-ref-guide/src/defining-fields.adoc index 8e6de9c4269..ef93d605d6d 100644 --- a/solr/solr-ref-guide/src/defining-fields.adoc +++ b/solr/solr-ref-guide/src/defining-fields.adoc @@ -20,8 +20,7 @@ Fields are defined in the fields element of `schema.xml`. Once you have the field types set up, defining the fields themselves is simple. -[[DefiningFields-Example]] -== Example +== Example Field Definition The following example defines a field named `price` with a type named `float` and a default value of `0.0`; the `indexed` and `stored` properties are explicitly set to `true`, while any other properties specified on the `float` field type are inherited. @@ -30,7 +29,6 @@ The following example defines a field named `price` with a type named `float` an ---- -[[DefiningFields-FieldProperties]] == Field Properties Field definitions can have the following properties: @@ -44,7 +42,6 @@ The name of the `fieldType` for this field. This will be found in the `name` att `default`:: A default value that will be added automatically to any document that does not have a value in this field when it is indexed. If this property is not specified, there is no default. -[[DefiningFields-OptionalFieldTypeOverrideProperties]] == Optional Field Type Override Properties Fields can have many of the same properties as field types. Properties from the table below which are specified on an individual field will override any explicit value for that property specified on the the `fieldType` of the field, or any implicit default property value provided by the underlying `fieldType` implementation. The table below is reproduced from <>, which has more details: diff --git a/solr/solr-ref-guide/src/detecting-languages-during-indexing.adoc b/solr/solr-ref-guide/src/detecting-languages-during-indexing.adoc index 4003f1ac914..392a0dfe092 100644 --- a/solr/solr-ref-guide/src/detecting-languages-during-indexing.adoc +++ b/solr/solr-ref-guide/src/detecting-languages-during-indexing.adoc @@ -31,12 +31,10 @@ For specific information on each of these language identification implementation For more information about language analysis in Solr, see <>. -[[DetectingLanguagesDuringIndexing-ConfiguringLanguageDetection]] == Configuring Language Detection You can configure the `langid` UpdateRequestProcessor in `solrconfig.xml`. Both implementations take the same parameters, which are described in the following section. At a minimum, you must specify the fields for language identification and a field for the resulting language code. -[[DetectingLanguagesDuringIndexing-ConfiguringTikaLanguageDetection]] === Configuring Tika Language Detection Here is an example of a minimal Tika `langid` configuration in `solrconfig.xml`: @@ -51,7 +49,6 @@ Here is an example of a minimal Tika `langid` configuration in `solrconfig.xml`: ---- -[[DetectingLanguagesDuringIndexing-ConfiguringLangDetectLanguageDetection]] === Configuring LangDetect Language Detection Here is an example of a minimal LangDetect `langid` configuration in `solrconfig.xml`: @@ -66,7 +63,6 @@ Here is an example of a minimal LangDetect `langid` configuration in `solrconfig ---- -[[DetectingLanguagesDuringIndexing-langidParameters]] == langid Parameters As previously mentioned, both implementations of the `langid` UpdateRequestProcessor take the same parameters. diff --git a/solr/solr-ref-guide/src/distributed-requests.adoc b/solr/solr-ref-guide/src/distributed-requests.adoc index 6d2c58503ca..b9c392039f5 100644 --- a/solr/solr-ref-guide/src/distributed-requests.adoc +++ b/solr/solr-ref-guide/src/distributed-requests.adoc @@ -22,10 +22,9 @@ When a Solr node receives a search request, the request is routed behind the sce The chosen replica acts as an aggregator: it creates internal requests to randomly chosen replicas of every shard in the collection, coordinates the responses, issues any subsequent internal requests as needed (for example, to refine facets values, or request additional stored fields), and constructs the final response for the client. -[[DistributedRequests-LimitingWhichShardsareQueried]] == Limiting Which Shards are Queried -While one of the advantages of using SolrCloud is the ability to query very large collections distributed among various shards, in some cases <>. You have the option of searching over all of your data or just parts of it. +While one of the advantages of using SolrCloud is the ability to query very large collections distributed among various shards, in some cases <>. You have the option of searching over all of your data or just parts of it. Querying all shards for a collection should look familiar; it's as though SolrCloud didn't even come into play: @@ -71,7 +70,6 @@ And of course, you can specify a list of shards (seperated by commas) each defin http://localhost:8983/solr/gettingstarted/select?q=*:*&shards=shard1,localhost:7574/solr/gettingstarted|localhost:7500/solr/gettingstarted ---- -[[DistributedRequests-ConfiguringtheShardHandlerFactory]] == Configuring the ShardHandlerFactory You can directly configure aspects of the concurrency and thread-pooling used within distributed search in Solr. This allows for finer grained control and you can tune it to target your own specific requirements. The default configuration favors throughput over latency. @@ -118,7 +116,6 @@ If specified, the thread pool will use a backing queue instead of a direct hando `fairnessPolicy`:: Chooses the JVM specifics dealing with fair policy queuing, if enabled distributed searches will be handled in a First in First out fashion at a cost to throughput. If disabled throughput will be favored over latency. The default is `false`. -[[DistributedRequests-ConfiguringstatsCache_DistributedIDF_]] == Configuring statsCache (Distributed IDF) Document and term statistics are needed in order to calculate relevancy. Solr provides four implementations out of the box when it comes to document stats calculation: @@ -135,15 +132,13 @@ The implementation can be selected by setting `` in `solrconfig.xml` ---- -[[DistributedRequests-AvoidingDistributedDeadlock]] == Avoiding Distributed Deadlock Each shard serves top-level query requests and then makes sub-requests to all of the other shards. Care should be taken to ensure that the max number of threads serving HTTP requests is greater than the possible number of requests from both top-level clients and other shards. If this is not the case, the configuration may result in a distributed deadlock. For example, a deadlock might occur in the case of two shards, each with just a single thread to service HTTP requests. Both threads could receive a top-level request concurrently, and make sub-requests to each other. Because there are no more remaining threads to service requests, the incoming requests will be blocked until the other pending requests are finished, but they will not finish since they are waiting for the sub-requests. By ensuring that Solr is configured to handle a sufficient number of threads, you can avoid deadlock situations like this. -[[DistributedRequests-PreferLocalShards]] -== Prefer Local Shards +== preferLocalShards Parameter Solr allows you to pass an optional boolean parameter named `preferLocalShards` to indicate that a distributed query should prefer local replicas of a shard when available. In other words, if a query includes `preferLocalShards=true`, then the query controller will look for local replicas to service the query instead of selecting replicas at random from across the cluster. This is useful when a query requests many fields or large fields to be returned per document because it avoids moving large amounts of data over the network when it is available locally. In addition, this feature can be useful for minimizing the impact of a problematic replica with degraded performance, as it reduces the likelihood that the degraded replica will be hit by other healthy replicas. diff --git a/solr/solr-ref-guide/src/distributed-search-with-index-sharding.adoc b/solr/solr-ref-guide/src/distributed-search-with-index-sharding.adoc index b1ad8dc8b27..0e6e7d8917c 100644 --- a/solr/solr-ref-guide/src/distributed-search-with-index-sharding.adoc +++ b/solr/solr-ref-guide/src/distributed-search-with-index-sharding.adoc @@ -26,14 +26,12 @@ Everything on this page is specific to legacy setup of distributed search. Users Update reorders (i.e., replica A may see update X then Y, and replica B may see update Y then X). *deleteByQuery* also handles reorders the same way, to ensure replicas are consistent. All replicas of a shard are consistent, even if the updates arrive in a different order on different replicas. -[[DistributedSearchwithIndexSharding-DistributingDocumentsacrossShards]] == Distributing Documents across Shards When not using SolrCloud, it is up to you to get all your documents indexed on each shard of your server farm. Solr supports distributed indexing (routing) in its true form only in the SolrCloud mode. In the legacy distributed mode, Solr does not calculate universal term/doc frequencies. For most large-scale implementations, it is not likely to matter that Solr calculates TF/IDF at the shard level. However, if your collection is heavily skewed in its distribution across servers, you may find misleading relevancy results in your searches. In general, it is probably best to randomly distribute documents to your shards. -[[DistributedSearchwithIndexSharding-ExecutingDistributedSearcheswiththeshardsParameter]] == Executing Distributed Searches with the shards Parameter If a query request includes the `shards` parameter, the Solr server distributes the request across all the shards listed as arguments to the parameter. The `shards` parameter uses this syntax: @@ -63,7 +61,6 @@ The following components support distributed search: * The *Stats* component, which returns simple statistics for numeric fields within the DocSet. * The *Debug* component, which helps with debugging. -[[DistributedSearchwithIndexSharding-LimitationstoDistributedSearch]] == Limitations to Distributed Search Distributed searching in Solr has the following limitations: @@ -78,12 +75,10 @@ Distributed searching in Solr has the following limitations: Formerly a limitation was that TF/IDF relevancy computations only used shard-local statistics. This is still the case by default. If your data isn't randomly distributed, or if you want more exact statistics, then remember to configure the ExactStatsCache. -[[DistributedSearchwithIndexSharding-AvoidingDistributedDeadlock]] -== Avoiding Distributed Deadlock +== Avoiding Distributed Deadlock with Distributed Search Like in SolrCloud mode, inter-shard requests could lead to a distributed deadlock. It can be avoided by following the instructions in the section <>. -[[DistributedSearchwithIndexSharding-TestingIndexShardingonTwoLocalServers]] == Testing Index Sharding on Two Local Servers For simple functional testing, it's easiest to just set up two local Solr servers on different ports. (In a production environment, of course, these servers would be deployed on separate machines.) diff --git a/solr/solr-ref-guide/src/documents-screen.adoc b/solr/solr-ref-guide/src/documents-screen.adoc index 4605dd79e64..7c16ee98663 100644 --- a/solr/solr-ref-guide/src/documents-screen.adoc +++ b/solr/solr-ref-guide/src/documents-screen.adoc @@ -42,28 +42,24 @@ The first step is to define the RequestHandler to use (aka, 'qt'). By default `/ Then choose the Document Type to define the type of document to load. The remaining parameters will change depending on the document type selected. -[[DocumentsScreen-JSON]] -== JSON +== JSON Documents When using the JSON document type, the functionality is similar to using a requestHandler on the command line. Instead of putting the documents in a curl command, they can instead be input into the Document entry box. The document structure should still be in proper JSON format. Then you can choose when documents should be added to the index (Commit Within), & whether existing documents should be overwritten with incoming documents with the same id (if this is not *true*, then the incoming documents will be dropped). -This option will only add or overwrite documents to the index; for other update tasks, see the <> option. +This option will only add or overwrite documents to the index; for other update tasks, see the <> option. -[[DocumentsScreen-CSV]] -== CSV +== CSV Documents When using the CSV document type, the functionality is similar to using a requestHandler on the command line. Instead of putting the documents in a curl command, they can instead be input into the Document entry box. The document structure should still be in proper CSV format, with columns delimited and one row per document. Then you can choose when documents should be added to the index (Commit Within), and whether existing documents should be overwritten with incoming documents with the same id (if this is not *true*, then the incoming documents will be dropped). -[[DocumentsScreen-DocumentBuilder]] == Document Builder The Document Builder provides a wizard-like interface to enter fields of a document -[[DocumentsScreen-FileUpload]] == File Upload The File Upload option allows choosing a prepared file and uploading it. If using only `/update` for the Request-Handler option, you will be limited to XML, CSV, and JSON. @@ -72,18 +68,16 @@ However, to use the ExtractingRequestHandler (aka Solr Cell), you can modify the Then you can choose when documents should be added to the index (Commit Within), and whether existing documents should be overwritten with incoming documents with the same id (if this is not *true*, then the incoming documents will be dropped). -[[DocumentsScreen-SolrCommand]] == Solr Command The Solr Command option allows you use XML or JSON to perform specific actions on documents, such as defining documents to be added or deleted, updating only certain fields of documents, or commit and optimize commands on the index. The documents should be structured as they would be if using `/update` on the command line. -[[DocumentsScreen-XML]] -== XML +== XML Documents When using the XML document type, the functionality is similar to using a requestHandler on the command line. Instead of putting the documents in a curl command, they can instead be input into the Document entry box. The document structure should still be in proper Solr XML format, with each document separated by `` tags and each field defined. Then you can choose when documents should be added to the index (Commit Within), and whether existing documents should be overwritten with incoming documents with the same id (if this is not **true**, then the incoming documents will be dropped). -This option will only add or overwrite documents to the index; for other update tasks, see the <> option. +This option will only add or overwrite documents to the index; for other update tasks, see the <> option. diff --git a/solr/solr-ref-guide/src/docvalues.adoc b/solr/solr-ref-guide/src/docvalues.adoc index b2debda76b4..2ec3677575b 100644 --- a/solr/solr-ref-guide/src/docvalues.adoc +++ b/solr/solr-ref-guide/src/docvalues.adoc @@ -28,7 +28,6 @@ For other features that we now commonly associate with search, such as sorting, In Lucene 4.0, a new approach was introduced. DocValue fields are now column-oriented fields with a document-to-value mapping built at index time. This approach promises to relieve some of the memory requirements of the fieldCache and make lookups for faceting, sorting, and grouping much faster. -[[DocValues-EnablingDocValues]] == Enabling DocValues To use docValues, you only need to enable it for a field that you will use it with. As with all schema design, you need to define a field type and then define fields of that type with docValues enabled. All of these actions are done in `schema.xml`. @@ -76,7 +75,6 @@ Lucene index back-compatibility is only supported for the default codec. If you If `docValues="true"` for a field, then DocValues will automatically be used any time the field is used for <>, <> or <>. -[[DocValues-RetrievingDocValuesDuringSearch]] === Retrieving DocValues During Search Field values retrieved during search queries are typically returned from stored values. However, non-stored docValues fields will be also returned along with other stored fields when all fields (or pattern matching globs) are specified to be returned (e.g. "`fl=*`") for search queries depending on the effective value of the `useDocValuesAsStored` parameter for each field. For schema versions >= 1.6, the implicit default is `useDocValuesAsStored="true"`. See <> & <> for more details. diff --git a/solr/solr-ref-guide/src/enabling-ssl.adoc b/solr/solr-ref-guide/src/enabling-ssl.adoc index be2025ed6f1..5357ab1c61a 100644 --- a/solr/solr-ref-guide/src/enabling-ssl.adoc +++ b/solr/solr-ref-guide/src/enabling-ssl.adoc @@ -24,10 +24,8 @@ This section describes enabling SSL using a self-signed certificate. For background on SSL certificates and keys, see http://www.tldp.org/HOWTO/SSL-Certificates-HOWTO/. -[[EnablingSSL-BasicSSLSetup]] == Basic SSL Setup -[[EnablingSSL-Generateaself-signedcertificateandakey]] === Generate a Self-Signed Certificate and a Key To generate a self-signed certificate and a single key that will be used to authenticate both the server and the client, we'll use the JDK https://docs.oracle.com/javase/8/docs/technotes/tools/unix/keytool.html[`keytool`] command and create a separate keystore. This keystore will also be used as a truststore below. It's possible to use the keystore that comes with the JDK for these purposes, and to use a separate truststore, but those options aren't covered here. @@ -45,7 +43,6 @@ keytool -genkeypair -alias solr-ssl -keyalg RSA -keysize 2048 -keypass secret -s The above command will create a keystore file named `solr-ssl.keystore.jks` in the current directory. -[[EnablingSSL-ConvertthecertificateandkeytoPEMformatforusewithcURL]] === Convert the Certificate and Key to PEM Format for Use with cURL cURL isn't capable of using JKS formatted keystores, so the JKS keystore needs to be converted to PEM format, which cURL understands. @@ -73,7 +70,6 @@ If you want to use cURL on OS X Yosemite (10.10), you'll need to create a certif openssl pkcs12 -nokeys -in solr-ssl.keystore.p12 -out solr-ssl.cacert.pem ---- -[[EnablingSSL-SetcommonSSLrelatedsystemproperties]] === Set Common SSL-Related System Properties The Solr Control Script is already setup to pass SSL-related Java system properties to the JVM. To activate the SSL settings, uncomment and update the set of properties beginning with SOLR_SSL_* in `bin/solr.in.sh`. (or `bin\solr.in.cmd` on Windows). @@ -116,7 +112,6 @@ REM Enable clients to authenticate (but not require) set SOLR_SSL_WANT_CLIENT_AUTH=false ---- -[[EnablingSSL-RunSingleNodeSolrusingSSL]] === Run Single Node Solr using SSL Start Solr using the command shown below; by default clients will not be required to authenticate: @@ -133,12 +128,10 @@ bin/solr -p 8984 bin\solr.cmd -p 8984 ---- -[[EnablingSSL-SolrCloud]] == SSL with SolrCloud This section describes how to run a two-node SolrCloud cluster with no initial collections and a single-node external ZooKeeper. The commands below assume you have already created the keystore described above. -[[EnablingSSL-ConfigureZooKeeper]] === Configure ZooKeeper NOTE: ZooKeeper does not support encrypted communication with clients like Solr. There are several related JIRA tickets where SSL support is being planned/worked on: https://issues.apache.org/jira/browse/ZOOKEEPER-235[ZOOKEEPER-235]; https://issues.apache.org/jira/browse/ZOOKEEPER-236[ZOOKEEPER-236]; https://issues.apache.org/jira/browse/ZOOKEEPER-1000[ZOOKEEPER-1000]; and https://issues.apache.org/jira/browse/ZOOKEEPER-2120[ZOOKEEPER-2120]. @@ -161,12 +154,10 @@ server/scripts/cloud-scripts/zkcli.sh -zkhost localhost:2181 -cmd clusterprop -n server\scripts\cloud-scripts\zkcli.bat -zkhost localhost:2181 -cmd clusterprop -name urlScheme -val https ---- -If you have set up your ZooKeeper cluster to use a <> , make sure you use the correct `zkhost` string with `zkcli`, e.g. `-zkhost localhost:2181/solr`. +If you have set up your ZooKeeper cluster to use a <> , make sure you use the correct `zkhost` string with `zkcli`, e.g. `-zkhost localhost:2181/solr`. -[[EnablingSSL-RunSolrCloudwithSSL]] === Run SolrCloud with SSL -[[EnablingSSL-CreateSolrhomedirectoriesfortwonodes]] ==== Create Solr Home Directories for Two Nodes Create two copies of the `server/solr/` directory which will serve as the Solr home directories for each of your two SolrCloud nodes: @@ -187,7 +178,6 @@ xcopy /E server\solr cloud\node1\ xcopy /E server\solr cloud\node2\ ---- -[[EnablingSSL-StartthefirstSolrnode]] ==== Start the First Solr Node Next, start the first Solr node on port 8984. Be sure to stop the standalone server first if you started it when working through the previous section on this page. @@ -220,7 +210,6 @@ bin/solr -cloud -s cloud/node1 -z localhost:2181 -p 8984 -Dsolr.ssl.checkPeerNam bin\solr.cmd -cloud -s cloud\node1 -z localhost:2181 -p 8984 -Dsolr.ssl.checkPeerName=false ---- -[[EnablingSSL-StartthesecondSolrnode]] ==== Start the Second Solr Node Finally, start the second Solr node on port 7574 - again, to skip hostname verification, add `-Dsolr.ssl.checkPeerName=false`; @@ -237,14 +226,13 @@ bin/solr -cloud -s cloud/node2 -z localhost:2181 -p 7574 bin\solr.cmd -cloud -s cloud\node2 -z localhost:2181 -p 7574 ---- -[[EnablingSSL-ExampleClientActions]] == Example Client Actions [IMPORTANT] ==== cURL on OS X Mavericks (10.9) has degraded SSL support. For more information and workarounds to allow one-way SSL, see http://curl.haxx.se/mail/archive-2013-10/0036.html. cURL on OS X Yosemite (10.10) is improved - 2-way SSL is possible - see http://curl.haxx.se/mail/archive-2014-10/0053.html . -The cURL commands in the following sections will not work with the system `curl` on OS X Yosemite (10.10). Instead, the certificate supplied with the `-E` param must be in PKCS12 format, and the file supplied with the `--cacert` param must contain only the CA certificate, and no key (see <> for instructions on creating this file): +The cURL commands in the following sections will not work with the system `curl` on OS X Yosemite (10.10). Instead, the certificate supplied with the `-E` param must be in PKCS12 format, and the file supplied with the `--cacert` param must contain only the CA certificate, and no key (see <> for instructions on creating this file): [source,bash] curl -E solr-ssl.keystore.p12:secret --cacert solr-ssl.cacert.pem ... @@ -271,7 +259,6 @@ bin\solr.cmd create -c mycollection -shards 2 The `create` action will pass the `SOLR_SSL_*` properties set in your include file to the SolrJ code used to create the collection. -[[EnablingSSL-RetrieveSolrCloudclusterstatususingcURL]] === Retrieve SolrCloud Cluster Status using cURL To get the resulting cluster status (again, if you have not enabled client authentication, remove the `-E solr-ssl.pem:secret` option): @@ -317,7 +304,6 @@ You should get a response that looks like this: "properties":{"urlScheme":"https"}}} ---- -[[EnablingSSL-Indexdocumentsusingpost.jar]] === Index Documents using post.jar Use `post.jar` to index some example documents to the SolrCloud collection created above: @@ -329,7 +315,6 @@ cd example/exampledocs java -Djavax.net.ssl.keyStorePassword=secret -Djavax.net.ssl.keyStore=../../server/etc/solr-ssl.keystore.jks -Djavax.net.ssl.trustStore=../../server/etc/solr-ssl.keystore.jks -Djavax.net.ssl.trustStorePassword=secret -Durl=https://localhost:8984/solr/mycollection/update -jar post.jar *.xml ---- -[[EnablingSSL-QueryusingcURL]] === Query Using cURL Use cURL to query the SolrCloud collection created above, from a directory containing the PEM formatted certificate and key created above (e.g. `example/etc/`) - if you have not enabled client authentication (system property `-Djetty.ssl.clientAuth=true)`, then you can remove the `-E solr-ssl.pem:secret` option: @@ -339,8 +324,7 @@ Use cURL to query the SolrCloud collection created above, from a directory conta curl -E solr-ssl.pem:secret --cacert solr-ssl.pem "https://localhost:8984/solr/mycollection/select?q=*:*&wt=json&indent=on" ---- -[[EnablingSSL-IndexadocumentusingCloudSolrClient]] -=== Index a document using CloudSolrClient +=== Index a Document using CloudSolrClient From a java client using SolrJ, index a document. In the code below, the `javax.net.ssl.*` system properties are set programmatically, but you could instead specify them on the java command line, as in the `post.jar` example above: diff --git a/solr/solr-ref-guide/src/errata.adoc b/solr/solr-ref-guide/src/errata.adoc index 9030ee354ad..7484c1759bd 100644 --- a/solr/solr-ref-guide/src/errata.adoc +++ b/solr/solr-ref-guide/src/errata.adoc @@ -18,14 +18,12 @@ // specific language governing permissions and limitations // under the License. -[[Errata-ErrataForThisDocumentation]] == Errata For This Documentation Any mistakes found in this documentation after its release will be listed on the on-line version of this page: https://lucene.apache.org/solr/guide/{solr-docs-version}/errata.html -[[Errata-ErrataForPastVersionsofThisDocumentation]] == Errata For Past Versions of This Documentation Any known mistakes in past releases of this documentation will be noted below. diff --git a/solr/solr-ref-guide/src/exporting-result-sets.adoc b/solr/solr-ref-guide/src/exporting-result-sets.adoc index 33852fadb1f..0f8866dc8f9 100644 --- a/solr/solr-ref-guide/src/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/src/exporting-result-sets.adoc @@ -25,19 +25,16 @@ This feature uses a stream sorting technique that begins to send records within The cases where this functionality may be useful include: session analysis, distributed merge joins, time series roll-ups, aggregations on high cardinality fields, fully distributed field collapsing, and sort based stats. -[[ExportingResultSets-FieldRequirements]] == Field Requirements All the fields being sorted and exported must have docValues set to true. For more information, see the section on <>. -[[ExportingResultSets-The_exportRequestHandler]] == The /export RequestHandler The `/export` request handler with the appropriate configuration is one of Solr's out-of-the-box request handlers - see <> for more information. Note that this request handler's properties are defined as "invariants", which means they cannot be overridden by other properties passed at another time (such as at query time). -[[ExportingResultSets-RequestingResultsExport]] == Requesting Results Export You can use `/export` to make requests to export the result set of a query. @@ -53,19 +50,16 @@ Here is an example of an export request of some indexed log data: http://localhost:8983/solr/core_name/export?q=my-query&sort=severity+desc,timestamp+desc&fl=severity,timestamp,msg ---- -[[ExportingResultSets-SpecifyingtheSortCriteria]] === Specifying the Sort Criteria The `sort` property defines how documents will be sorted in the exported result set. Results can be sorted by any field that has a field type of int,long, float, double, string. The sort fields must be single valued fields. Up to four sort fields can be specified per request, with the 'asc' or 'desc' properties. -[[ExportingResultSets-SpecifyingtheFieldList]] === Specifying the Field List The `fl` property defines the fields that will be exported with the result set. Any of the field types that can be sorted (i.e., int, long, float, double, string, date, boolean) can be used in the field list. The fields can be single or multi-valued. However, returning scores and wildcards are not supported at this time. -[[ExportingResultSets-DistributedSupport]] == Distributed Support See the section <> for distributed support. diff --git a/solr/solr-ref-guide/src/faceting.adoc b/solr/solr-ref-guide/src/faceting.adoc index b0a79c0751a..4384a74f462 100644 --- a/solr/solr-ref-guide/src/faceting.adoc +++ b/solr/solr-ref-guide/src/faceting.adoc @@ -21,7 +21,7 @@ Faceting is the arrangement of search results into categories based on indexed terms. -Searchers are presented with the indexed terms, along with numerical counts of how many matching documents were found were each term. Faceting makes it easy for users to explore search results, narrowing in on exactly the results they are looking for. +Searchers are presented with the indexed terms, along with numerical counts of how many matching documents were found for each term. Faceting makes it easy for users to explore search results, narrowing in on exactly the results they are looking for. [[Faceting-GeneralParameters]] == General Parameters @@ -351,7 +351,7 @@ The `facet.mincount` parameter, the same one as used in field faceting is also a [NOTE] ==== -Range faceting on date fields is a common situation where the <> parameter can be useful to ensure that the "facet counts per day" or "facet counts per month" are based on a meaningful definition of when a given day/month "starts" relative to a particular TimeZone. +Range faceting on date fields is a common situation where the <> parameter can be useful to ensure that the "facet counts per day" or "facet counts per month" are based on a meaningful definition of when a given day/month "starts" relative to a particular TimeZone. For more information, see the examples in the <> section. diff --git a/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc b/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc index 89b8e9062ad..695146b8de0 100644 --- a/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc +++ b/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc @@ -27,7 +27,6 @@ A field type definition can include four types of information: * If the field type is `TextField`, a description of the field analysis for the field type. * Field type properties - depending on the implementation class, some properties may be mandatory. -[[FieldTypeDefinitionsandProperties-FieldTypeDefinitionsinschema.xml]] == Field Type Definitions in schema.xml Field types are defined in `schema.xml`. Each field type is defined between `fieldType` elements. They can optionally be grouped within a `types` element. Here is an example of a field type definition for a type called `text_general`: @@ -91,9 +90,9 @@ For multivalued fields, specifies a distance between multiple values, which prev `autoGeneratePhraseQueries`:: For text fields. If `true`, Solr automatically generates phrase queries for adjacent terms. If `false`, terms must be enclosed in double-quotes to be treated as phrases. `enableGraphQueries`:: -For text fields, applicable when querying with <>. Use `true` (the default) for field types with query analyzers including graph-aware filters, e.g., <> and <>. +For text fields, applicable when querying with <>. Use `true` (the default) for field types with query analyzers including graph-aware filters, e.g., <> and <>. + -Use `false` for field types with query analyzers including filters that can match docs when some tokens are missing, e.g., <>. +Use `false` for field types with query analyzers including filters that can match docs when some tokens are missing, e.g., <>. [[FieldTypeDefinitionsandProperties-docValuesFormat]] `docValuesFormat`:: @@ -137,9 +136,8 @@ The default values for each property depend on the underlying `FieldType` class, // TODO: SOLR-10655 END -[[FieldTypeDefinitionsandProperties-FieldTypeSimilarity]] == Field Type Similarity A field type may optionally specify a `` that will be used when scoring documents that refer to fields with this type, as long as the "global" similarity for the collection allows it. -By default, any field type which does not define a similarity, uses `BM25Similarity`. For more details, and examples of configuring both global & per-type Similarities, please see <>. +By default, any field type which does not define a similarity, uses `BM25Similarity`. For more details, and examples of configuring both global & per-type Similarities, please see <>. diff --git a/solr/solr-ref-guide/src/field-types-included-with-solr.adoc b/solr/solr-ref-guide/src/field-types-included-with-solr.adoc index 5c82970b777..4ba0e45598c 100644 --- a/solr/solr-ref-guide/src/field-types-included-with-solr.adoc +++ b/solr/solr-ref-guide/src/field-types-included-with-solr.adoc @@ -27,17 +27,17 @@ The following table lists the field types that are available in Solr. The `org.a |Class |Description |BinaryField |Binary data. |BoolField |Contains either true or false. Values of "1", "t", or "T" in the first character are interpreted as true. Any other values in the first character are interpreted as false. -|CollationField |Supports Unicode collation for sorting and range queries. ICUCollationField is a better choice if you can use ICU4J. See the section <>. +|CollationField |Supports Unicode collation for sorting and range queries. ICUCollationField is a better choice if you can use ICU4J. See the section <>. |CurrencyField |Deprecated in favor of CurrencyFieldType. |CurrencyFieldType |Supports currencies and exchange rates. See the section <>. |DateRangeField |Supports indexing date ranges, to include point in time date instances as well (single-millisecond durations). See the section <> for more detail on using this field type. Consider using this field type even if it's just for date instances, particularly when the queries typically fall on UTC year/month/day/hour, etc., boundaries. |ExternalFileField |Pulls values from a file on disk. See the section <>. |EnumField |Allows defining an enumerated set of values which may not be easily sorted by either alphabetic or numeric order (such as a list of severities, for example). This field type takes a configuration file, which lists the proper order of the field values. See the section <> for more information. -|ICUCollationField |Supports Unicode collation for sorting and range queries. See the section <>. +|ICUCollationField |Supports Unicode collation for sorting and range queries. See the section <>. |LatLonPointSpatialField |<>: a latitude/longitude coordinate pair; possibly multi-valued for multiple points. Usually it's specified as "lat,lon" order with a comma. |LatLonType |(deprecated) <>: a single-valued latitude/longitude coordinate pair. Usually it's specified as "lat,lon" order with a comma. |PointType |<>: A single-valued n-dimensional point. It's both for sorting spatial data that is _not_ lat-lon, and for some more rare use-cases. (NOTE: this is _not_ related to the "Point" based numeric fields) -|PreAnalyzedField |Provides a way to send to Solr serialized token streams, optionally with independent stored values of a field, and have this information stored and indexed without any additional text processing. Configuration and usage of PreAnalyzedField is documented on the <> page. +|PreAnalyzedField |Provides a way to send to Solr serialized token streams, optionally with independent stored values of a field, and have this information stored and indexed without any additional text processing. Configuration and usage of PreAnalyzedField is documented on the <> page. |RandomSortField |Does not contain a value. Queries that sort on this field type will return results in random order. Use a dynamic field to use this feature. |SpatialRecursivePrefixTreeFieldType |(RPT for short) <>: Accepts latitude comma longitude strings or other shapes in WKT format. |StrField |String (UTF-8 encoded string or Unicode). Strings are intended for small fields and are _not_ tokenized or analyzed in any way. They have a hard limit of slightly less than 32K. diff --git a/solr/solr-ref-guide/src/filter-descriptions.adoc b/solr/solr-ref-guide/src/filter-descriptions.adoc index f428678f894..4ced59e9d37 100644 --- a/solr/solr-ref-guide/src/filter-descriptions.adoc +++ b/solr/solr-ref-guide/src/filter-descriptions.adoc @@ -50,7 +50,6 @@ The following sections describe the filter factories that are included in this r For user tips about Solr's filters, see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters. -[[FilterDescriptions-ASCIIFoldingFilter]] == ASCII Folding Filter This filter converts alphabetic, numeric, and symbolic Unicode characters which are not in the Basic Latin Unicode block (the first 127 ASCII characters) to their ASCII equivalents, if one exists. This filter converts characters from the following Unicode blocks: @@ -92,10 +91,9 @@ This filter converts alphabetic, numeric, and symbolic Unicode characters which *Out:* "a" (ASCII character 97) -[[FilterDescriptions-Beider-MorseFilter]] == Beider-Morse Filter -Implements the Beider-Morse Phonetic Matching (BMPM) algorithm, which allows identification of similar names, even if they are spelled differently or in different languages. More information about how this works is available in the section on <>. +Implements the Beider-Morse Phonetic Matching (BMPM) algorithm, which allows identification of similar names, even if they are spelled differently or in different languages. More information about how this works is available in the section on <>. [IMPORTANT] ==== @@ -125,10 +123,9 @@ BeiderMorseFilter changed its behavior in Solr 5.0 due to an update to version 3 ---- -[[FilterDescriptions-ClassicFilter]] == Classic Filter -This filter takes the output of the <> and strips periods from acronyms and "'s" from possessives. +This filter takes the output of the <> and strips periods from acronyms and "'s" from possessives. *Factory class:* `solr.ClassicFilterFactory` @@ -150,7 +147,6 @@ This filter takes the output of the <>. +Collation allows sorting of text in a language-sensitive way. It is usually used for sorting, but can also be used with advanced searches. We've covered this in much more detail in the section on <>. -[[FilterDescriptions-Daitch-MokotoffSoundexFilter]] == Daitch-Mokotoff Soundex Filter Implements the Daitch-Mokotoff Soundex algorithm, which allows identification of similar names, even if they are spelled differently. More information about how this works is available in the section on <>. @@ -207,7 +201,6 @@ Implements the Daitch-Mokotoff Soundex algorithm, which allows identification of ---- -[[FilterDescriptions-DoubleMetaphoneFilter]] == Double Metaphone Filter This filter creates tokens using the http://commons.apache.org/codec/apidocs/org/apache/commons/codec/language/DoubleMetaphone.html[`DoubleMetaphone`] encoding algorithm from commons-codec. For more information, see the <> section. @@ -260,7 +253,6 @@ Discard original token (`inject="false"`). Note that "Kuczewski" has two encodings, which are added at the same position. -[[FilterDescriptions-EdgeN-GramFilter]] == Edge N-Gram Filter This filter generates edge n-gram tokens of sizes within the given range. @@ -327,7 +319,6 @@ A range of 4 to 6. *Out:* "four", "scor", "score", "twen", "twent", "twenty" -[[FilterDescriptions-EnglishMinimalStemFilter]] == English Minimal Stem Filter This filter stems plural English words to their singular form. @@ -352,7 +343,6 @@ This filter stems plural English words to their singular form. *Out:* "dog", "cat" -[[FilterDescriptions-EnglishPossessiveFilter]] == English Possessive Filter This filter removes singular possessives (trailing *'s*) from words. Note that plural possessives, e.g. the *s'* in "divers' snorkels", are not removed by this filter. @@ -377,7 +367,6 @@ This filter removes singular possessives (trailing *'s*) from words. Note that p *Out:* "Man", "dog", "bites", "dogs'", "man" -[[FilterDescriptions-FingerprintFilter]] == Fingerprint Filter This filter outputs a single token which is a concatenation of the sorted and de-duplicated set of input tokens. This can be useful for clustering/linking use cases. @@ -406,7 +395,6 @@ This filter outputs a single token which is a concatenation of the sorted and de *Out:* "brown_dog_fox_jumped_lazy_over_quick_the" -[[FilterDescriptions-FlattenGraphFilter]] == Flatten Graph Filter This filter must be included on index-time analyzer specifications that include at least one graph-aware filter, including Synonym Graph Filter and Word Delimiter Graph Filter. @@ -417,7 +405,6 @@ This filter must be included on index-time analyzer specifications that include See the examples below for <> and <>. -[[FilterDescriptions-HunspellStemFilter]] == Hunspell Stem Filter The `Hunspell Stem Filter` provides support for several languages. You must provide the dictionary (`.dic`) and rules (`.aff`) files for each language you wish to use with the Hunspell Stem Filter. You can download those language files http://wiki.services.openoffice.org/wiki/Dictionaries[here]. @@ -456,7 +443,6 @@ Be aware that your results will vary widely based on the quality of the provided *Out:* "jump", "jump", "jump" -[[FilterDescriptions-HyphenatedWordsFilter]] == Hyphenated Words Filter This filter reconstructs hyphenated words that have been tokenized as two tokens because of a line break or other intervening whitespace in the field test. If a token ends with a hyphen, it is joined with the following token and the hyphen is discarded. @@ -483,10 +469,9 @@ Note that for this filter to work properly, the upstream tokenizer must not remo *Out:* "A", "hyphenated", "word" -[[FilterDescriptions-ICUFoldingFilter]] == ICU Folding Filter -This filter is a custom Unicode normalization form that applies the foldings specified in http://www.unicode.org/reports/tr30/tr30-4.html[Unicode Technical Report 30] in addition to the `NFKC_Casefold` normalization form as described in <>. This filter is a better substitute for the combined behavior of the <>, <>, and <>. +This filter is a custom Unicode normalization form that applies the foldings specified in http://www.unicode.org/reports/tr30/tr30-4.html[Unicode Technical Report 30] in addition to the `NFKC_Casefold` normalization form as described in <>. This filter is a better substitute for the combined behavior of the <>, <>, and <>. To use this filter, see `solr/contrib/analysis-extras/README.txt` for instructions on which jars you need to add to your `solr_home/lib`. For more information about adding jars, see the section <>. @@ -506,7 +491,6 @@ To use this filter, see `solr/contrib/analysis-extras/README.txt` for instructio For detailed information on this normalization form, see http://www.unicode.org/reports/tr30/tr30-4.html. -[[FilterDescriptions-ICUNormalizer2Filter]] == ICU Normalizer 2 Filter This filter factory normalizes text according to one of five Unicode Normalization Forms as described in http://unicode.org/reports/tr15/[Unicode Standard Annex #15]: @@ -539,7 +523,6 @@ For detailed information about these Unicode Normalization Forms, see http://uni To use this filter, see `solr/contrib/analysis-extras/README.txt` for instructions on which jars you need to add to your `solr_home/lib`. -[[FilterDescriptions-ICUTransformFilter]] == ICU Transform Filter This filter applies http://userguide.icu-project.org/transforms/general[ICU Tranforms] to text. This filter supports only ICU System Transforms. Custom rule sets are not supported. @@ -564,7 +547,6 @@ For detailed information about ICU Transforms, see http://userguide.icu-project. To use this filter, see `solr/contrib/analysis-extras/README.txt` for instructions on which jars you need to add to your `solr_home/lib`. -[[FilterDescriptions-KeepWordFilter]] == Keep Word Filter This filter discards all tokens except those that are listed in the given word list. This is the inverse of the Stop Words Filter. This filter can be useful for building specialized indices for a constrained set of terms. @@ -638,7 +620,6 @@ Using LowerCaseFilterFactory before filtering for keep words, no `ignoreCase` fl *Out:* "happy", "funny" -[[FilterDescriptions-KStemFilter]] == KStem Filter KStem is an alternative to the Porter Stem Filter for developers looking for a less aggressive stemmer. KStem was written by Bob Krovetz, ported to Lucene by Sergio Guzman-Lara (UMASS Amherst). This stemmer is only appropriate for English language text. @@ -663,7 +644,6 @@ KStem is an alternative to the Porter Stem Filter for developers looking for a l *Out:* "jump", "jump", "jump" -[[FilterDescriptions-LengthFilter]] == Length Filter This filter passes tokens whose length falls within the min/max limit specified. All other tokens are discarded. @@ -694,7 +674,6 @@ This filter passes tokens whose length falls within the min/max limit specified. *Out:* "turn", "right" -[[FilterDescriptions-LimitTokenCountFilter]] == Limit Token Count Filter This filter limits the number of accepted tokens, typically useful for index analysis. @@ -726,7 +705,6 @@ By default, this filter ignores any tokens in the wrapped `TokenStream` once the *Out:* "1", "2", "3", "4", "5", "6", "7", "8", "9", "10" -[[FilterDescriptions-LimitTokenOffsetFilter]] == Limit Token Offset Filter This filter limits tokens to those before a configured maximum start character offset. This can be useful to limit highlighting, for example. @@ -758,7 +736,6 @@ By default, this filter ignores any tokens in the wrapped `TokenStream` once the *Out:* "0", "2", "4", "6", "8", "A" -[[FilterDescriptions-LimitTokenPositionFilter]] == Limit Token Position Filter This filter limits tokens to those before a configured maximum token position. @@ -790,7 +767,6 @@ By default, this filter ignores any tokens in the wrapped `TokenStream` once the *Out:* "1", "2", "3" -[[FilterDescriptions-LowerCaseFilter]] == Lower Case Filter Converts any uppercase letters in a token to the equivalent lowercase token. All other characters are left unchanged. @@ -815,10 +791,9 @@ Converts any uppercase letters in a token to the equivalent lowercase token. All *Out:* "down", "with", "camelcase" -[[FilterDescriptions-ManagedStopFilter]] == Managed Stop Filter -This is specialized version of the <> that uses a set of stop words that are <> +This is specialized version of the <> that uses a set of stop words that are <> *Arguments:* @@ -836,12 +811,11 @@ With this configuration the set of words is named "english" and can be managed v ---- -See <> for example input/output. +See <> for example input/output. -[[FilterDescriptions-ManagedSynonymFilter]] == Managed Synonym Filter -This is specialized version of the <> that uses a mapping on synonyms that is <> +This is specialized version of the <> that uses a mapping on synonyms that is <> .Managed Synonym Filter has been Deprecated [WARNING] @@ -851,12 +825,11 @@ Managed Synonym Filter has been deprecated in favor of Managed Synonym Graph Fil *Factory class:* `solr.ManagedSynonymFilterFactory` -For arguments and examples, see the Managed Synonym Graph Filter below. +For arguments and examples, see the <> below. -[[FilterDescriptions-ManagedSynonymGraphFilter]] == Managed Synonym Graph Filter -This is specialized version of the <> that uses a mapping on synonyms that is <> +This is specialized version of the <> that uses a mapping on synonyms that is <> This filter maps single- or multi-token synonyms, producing a fully correct graph output. This filter is a replacement for the Managed Synonym Filter, which produces incorrect graphs for multi-token synonyms. @@ -881,9 +854,8 @@ With this configuration the set of mappings is named "english" and can be manage ---- -See <> for example input/output. +See <> for example input/output. -[[FilterDescriptions-N-GramFilter]] == N-Gram Filter Generates n-gram tokens of sizes in the given range. Note that tokens are ordered by position and then by gram size. @@ -950,7 +922,6 @@ A range of 3 to 5. *Out:* "fou", "four", "our", "sco", "scor", "score", "cor", "core", "ore" -[[FilterDescriptions-NumericPayloadTokenFilter]] == Numeric Payload Token Filter This filter adds a numeric floating point payload value to tokens that match a given type. Refer to the Javadoc for the `org.apache.lucene.analysis.Token` class for more information about token types and payloads. @@ -979,7 +950,6 @@ This filter adds a numeric floating point payload value to tokens that match a g *Out:* "bing"[0.75], "bang"[0.75], "boom"[0.75] -[[FilterDescriptions-PatternReplaceFilter]] == Pattern Replace Filter This filter applies a regular expression to each token and, for those that match, substitutes the given replacement string in place of the matched pattern. Tokens which do not match are passed though unchanged. @@ -1048,7 +1018,6 @@ More complex pattern with capture group reference in the replacement. Tokens tha *Out:* "cat", "foo_1234", "9987", "blah1234foo" -[[FilterDescriptions-PhoneticFilter]] == Phonetic Filter This filter creates tokens using one of the phonetic encoding algorithms in the `org.apache.commons.codec.language` package. For more information, see the section on <>. @@ -1119,7 +1088,6 @@ Default Soundex encoder. *Out:* "four"(1), "F600"(1), "score"(2), "S600"(2), "and"(3), "A530"(3), "twenty"(4), "T530"(4) -[[FilterDescriptions-PorterStemFilter]] == Porter Stem Filter This filter applies the Porter Stemming Algorithm for English. The results are similar to using the Snowball Porter Stemmer with the `language="English"` argument. But this stemmer is coded directly in Java and is not based on Snowball. It does not accept a list of protected words and is only appropriate for English language text. However, it has been benchmarked as http://markmail.org/thread/d2c443z63z37rwf6[four times faster] than the English Snowball stemmer, so can provide a performance enhancement. @@ -1144,7 +1112,6 @@ This filter applies the Porter Stemming Algorithm for English. The results are s *Out:* "jump", "jump", "jump" -[[FilterDescriptions-RemoveDuplicatesTokenFilter]] == Remove Duplicates Token Filter The filter removes duplicate tokens in the stream. Tokens are considered to be duplicates ONLY if they have the same text and position values. @@ -1223,7 +1190,6 @@ This filter reverses tokens to provide faster leading wildcard and prefix querie *Out:* "oof*", "rab*" -[[FilterDescriptions-ShingleFilter]] == Shingle Filter This filter constructs shingles, which are token n-grams, from the token stream. It combines runs of tokens into a single token. @@ -1278,7 +1244,6 @@ A shingle size of four, do not include original token. *Out:* "To be"(1), "To be or"(1), "To be or not"(1), "be or"(2), "be or not"(2), "be or not to"(2), "or not"(3), "or not to"(3), "or not to be"(3), "not to"(4), "not to be"(4), "to be"(5) -[[FilterDescriptions-SnowballPorterStemmerFilter]] == Snowball Porter Stemmer Filter This filter factory instantiates a language-specific stemmer generated by Snowball. Snowball is a software package that generates pattern-based word stemmers. This type of stemmer is not as accurate as a table-based stemmer, but is faster and less complex. Table-driven stemmers are labor intensive to create and maintain and so are typically commercial products. @@ -1349,7 +1314,6 @@ Spanish stemmer, Spanish words: *Out:* "cant", "cant" -[[FilterDescriptions-StandardFilter]] == Standard Filter This filter removes dots from acronyms and the substring "'s" from the end of tokens. This filter depends on the tokens being tagged with the appropriate term-type to recognize acronyms and words with apostrophes. @@ -1363,7 +1327,6 @@ This filter removes dots from acronyms and the substring "'s" from the end of to This filter is no longer operational in Solr when the `luceneMatchVersion` (in `solrconfig.xml`) is higher than "3.1". ==== -[[FilterDescriptions-StopFilter]] == Stop Filter This filter discards, or _stops_ analysis of, tokens that are on the given stop words list. A standard stop words list is included in the Solr `conf` directory, named `stopwords.txt`, which is appropriate for typical English language text. @@ -1414,10 +1377,9 @@ Case-sensitive matching, capitalized words not stopped. Token positions skip sto *Out:* "what"(4) -[[FilterDescriptions-SuggestStopFilter]] == Suggest Stop Filter -Like <>, this filter discards, or _stops_ analysis of, tokens that are on the given stop words list. +Like <>, this filter discards, or _stops_ analysis of, tokens that are on the given stop words list. Suggest Stop Filter differs from Stop Filter in that it will not remove the last token unless it is followed by a token separator. For example, a query `"find the"` would preserve the `'the'` since it was not followed by a space, punctuation etc., and mark it as a `KEYWORD` so that following filters will not change or remove it. @@ -1455,7 +1417,6 @@ By contrast, a query like "`find the popsicle`" would remove '`the`' as a stopwo *Out:* "the"(2) -[[FilterDescriptions-SynonymFilter]] == Synonym Filter This filter does synonym mapping. Each token is looked up in the list of synonyms and if a match is found, then the synonym is emitted in place of the token. The position value of the new tokens are set such they all occur at the same position as the original token. @@ -1470,7 +1431,6 @@ Synonym Filter has been deprecated in favor of Synonym Graph Filter, which is re For arguments and examples, see the Synonym Graph Filter below. -[[FilterDescriptions-SynonymGraphFilter]] == Synonym Graph Filter This filter maps single- or multi-token synonyms, producing a fully correct graph output. This filter is a replacement for the Synonym Filter, which produces incorrect graphs for multi-token synonyms. @@ -1542,7 +1502,6 @@ small => tiny,teeny,weeny *Out:* "the"(1), "large"(2), "large"(3), "couch"(4), "sofa"(4), "divan"(4) -[[FilterDescriptions-TokenOffsetPayloadFilter]] == Token Offset Payload Filter This filter adds the numeric character offsets of the token as a payload value for that token. @@ -1567,7 +1526,6 @@ This filter adds the numeric character offsets of the token as a payload value f *Out:* "bing"[0,4], "bang"[5,9], "boom"[10,14] -[[FilterDescriptions-TrimFilter]] == Trim Filter This filter trims leading and/or trailing whitespace from tokens. Most tokenizers break tokens at whitespace, so this filter is most often used for special situations. @@ -1596,7 +1554,6 @@ The PatternTokenizerFactory configuration used here splits the input on simple c *Out:* "one", "two", "three", "four" -[[FilterDescriptions-TypeAsPayloadFilter]] == Type As Payload Filter This filter adds the token's type, as an encoded byte sequence, as its payload. @@ -1621,10 +1578,9 @@ This filter adds the token's type, as an encoded byte sequence, as its payload. *Out:* "Pay"[], "Bob's"[], "I.O.U."[] -[[FilterDescriptions-TypeTokenFilter]] == Type Token Filter -This filter blacklists or whitelists a specified list of token types, assuming the tokens have type metadata associated with them. For example, the <> emits "" and "" typed tokens, as well as other types. This filter would allow you to pull out only e-mail addresses from text as tokens, if you wish. +This filter blacklists or whitelists a specified list of token types, assuming the tokens have type metadata associated with them. For example, the <> emits "" and "" typed tokens, as well as other types. This filter would allow you to pull out only e-mail addresses from text as tokens, if you wish. *Factory class:* `solr.TypeTokenFilterFactory` @@ -1645,7 +1601,6 @@ This filter blacklists or whitelists a specified list of token types, assuming t ---- -[[FilterDescriptions-WordDelimiterFilter]] == Word Delimiter Filter This filter splits tokens at word delimiters. @@ -1660,7 +1615,6 @@ Word Delimiter Filter has been deprecated in favor of Word Delimiter Graph Filte For a full description, including arguments and examples, see the Word Delimiter Graph Filter below. -[[FilterDescriptions-WordDelimiterGraphFilter]] == Word Delimiter Graph Filter This filter splits tokens at word delimiters. diff --git a/solr/solr-ref-guide/src/function-queries.adoc b/solr/solr-ref-guide/src/function-queries.adoc index 29cca9cd9bf..5a9f6dfda5a 100644 --- a/solr/solr-ref-guide/src/function-queries.adoc +++ b/solr/solr-ref-guide/src/function-queries.adoc @@ -25,14 +25,13 @@ Function queries are supported by the <> or <> . For example: +* Via an explicit QParser that expects function arguments, such <> or <> . For example: + [source,text] ---- @@ -76,7 +75,6 @@ q=_val_:mynumericfield _val_:"recip(rord(myfield),1,2,3)" Only functions with fast random access are recommended. -[[FunctionQueries-AvailableFunctions]] == Available Functions The table below summarizes the functions available for function queries. @@ -89,7 +87,7 @@ Returns the absolute value of the specified value or function. * `abs(x)` `abs(-5)` === childfield(field) Function -Returns the value of the given field for one of the matched child docs when searching by <>. It can be used only in `sort` parameter. +Returns the value of the given field for one of the matched child docs when searching by <>. It can be used only in `sort` parameter. *Syntax Examples* @@ -149,7 +147,6 @@ You can quote the term if it's more complex, or do parameter substitution for th * `docfreq(text,'solr')` * `...&defType=func` `&q=docfreq(text,$myterm)&myterm=solr` -[[FunctionQueries-field]] === field Function Returns the numeric docValues or indexed value of the field with the specified name. In its simplest (single argument) form, this function can only be used on single valued fields, and can be called using the name of the field as a string, or for most conventional field names simply use the field name by itself with out using the `field(...)` syntax. @@ -232,7 +229,7 @@ If the value of `x` does not fall between `min` and `max`, then either the value === max Function Returns the maximum numeric value of multiple nested functions or constants, which are specified as arguments: `max(x,y,...)`. The `max` function can also be useful for "bottoming out" another function or field at some specified constant. -Use the `field(myfield,max)` syntax for <>. +Use the `field(myfield,max)` syntax for <>. *Syntax Example* @@ -248,7 +245,7 @@ Returns the number of documents in the index, including those that are marked as === min Function Returns the minimum numeric value of multiple nested functions of constants, which are specified as arguments: `min(x,y,...)`. The `min` function can also be useful for providing an "upper bound" on a function using a constant. -Use the `field(myfield,min)` <>. +Use the `field(myfield,min)` <>. *Syntax Example* @@ -502,8 +499,6 @@ Returns `true` if any member of the field exists. *Syntax Example* * `if(lt(ms(mydatefield),315569259747),0.8,1)` translates to this pseudocode: `if mydatefield < 315569259747 then 0.8 else 1` - -[[FunctionQueries-ExampleFunctionQueries]] == Example Function Queries To give you a better understanding of how function queries can be used in Solr, suppose an index stores the dimensions in meters x,y,z of some hypothetical boxes with arbitrary names stored in field `boxname`. Suppose we want to search for box matching name `findbox` but ranked according to volumes of boxes. The query parameters would be: @@ -521,7 +516,6 @@ Suppose that you also have a field storing the weight of the box as `weight`. To http://localhost:8983/solr/collection_name/select?q=boxname:findbox _val_:"div(weight,product(x,y,z))"&fl=boxname x y z weight score ---- -[[FunctionQueries-SortByFunction]] == Sort By Function You can sort your query results by the output of a function. For example, to sort results by distance, you could enter: diff --git a/solr/solr-ref-guide/src/getting-started-with-solrcloud.adoc b/solr/solr-ref-guide/src/getting-started-with-solrcloud.adoc index d5126606549..30dd9b1e24b 100644 --- a/solr/solr-ref-guide/src/getting-started-with-solrcloud.adoc +++ b/solr/solr-ref-guide/src/getting-started-with-solrcloud.adoc @@ -33,10 +33,8 @@ In this section you will learn how to start a SolrCloud cluster using startup sc This tutorial assumes that you're already familiar with the basics of using Solr. If you need a refresher, please see the <> to get a grounding in Solr concepts. If you load documents as part of that exercise, you should start over with a fresh Solr installation for these SolrCloud tutorials. ==== -[[GettingStartedwithSolrCloud-SolrCloudExample]] == SolrCloud Example -[[GettingStartedwithSolrCloud-InteractiveStartup]] === Interactive Startup The `bin/solr` script makes it easy to get started with SolrCloud as it walks you through the process of launching Solr nodes in cloud mode and adding a collection. To get started, simply do: @@ -120,7 +118,6 @@ To stop Solr in SolrCloud mode, you would use the `bin/solr` script and issue th bin/solr stop -all ---- -[[GettingStartedwithSolrCloud-Startingwith-noprompt]] === Starting with -noprompt You can also get SolrCloud started with all the defaults instead of the interactive session using the following command: @@ -130,7 +127,6 @@ You can also get SolrCloud started with all the defaults instead of the interact bin/solr -e cloud -noprompt ---- -[[GettingStartedwithSolrCloud-RestartingNodes]] === Restarting Nodes You can restart your SolrCloud nodes using the `bin/solr` script. For instance, to restart node1 running on port 8983 (with an embedded ZooKeeper server), you would do: @@ -149,7 +145,6 @@ bin/solr restart -c -p 7574 -z localhost:9983 -s example/cloud/node2/solr Notice that you need to specify the ZooKeeper address (`-z localhost:9983`) when starting node2 so that it can join the cluster with node1. -[[GettingStartedwithSolrCloud-Addinganodetoacluster]] === Adding a node to a cluster Adding a node to an existing cluster is a bit advanced and involves a little more understanding of Solr. Once you startup a SolrCloud cluster using the startup scripts, you can add a new node to it by: diff --git a/solr/solr-ref-guide/src/graph-traversal.adoc b/solr/solr-ref-guide/src/graph-traversal.adoc index 007019bb2ac..a23b32e7d52 100644 --- a/solr/solr-ref-guide/src/graph-traversal.adoc +++ b/solr/solr-ref-guide/src/graph-traversal.adoc @@ -31,7 +31,6 @@ The `nodes` function can be combined with the `scoreNodes` function to provide r This document assumes a basic understanding of graph terminology and streaming expressions. You can begin exploring graph traversal concepts with this https://en.wikipedia.org/wiki/Graph_traversal[Wikipedia article]. More details about streaming expressions are available in this Guide, in the section <>. ==== -[[GraphTraversal-BasicSyntax]] == Basic Syntax We'll start with the most basic syntax and slowly build up more complexity. The most basic syntax for `nodes` is: @@ -161,7 +160,6 @@ When scattering both branches and leaves the output would like this: Now the level 0 root node is included in the output. -[[GraphTraversal-Aggregations]] == Aggregations `nodes` also supports aggregations. For example: @@ -182,8 +180,7 @@ Edges are uniqued as part of the traversal so the count will *not* reflect the n The aggregation functions supported are `count(*)`, `sum(field)`, `min(field)`, `max(field)`, and `avg(field)`. The fields being aggregated should be present in the edges collected during the traversal. Later examples (below) will show aggregations can be a powerful tool for providing recommendations and limiting the scope of traversals. -[[GraphTraversal-Nestingnodesfunctions]] -== Nesting nodes functions +== Nesting nodes Functions The `nodes` function can be nested to traverse deeper into the graph. For example: @@ -207,14 +204,12 @@ Put more simply, the inner expression gathers all the people that "\johndoe@apac This construct of nesting `nodes` functions is the basic technique for doing a controlled traversal through the graph. -[[GraphTraversal-CycleDetection]] == Cycle Detection The `nodes` function performs cycle detection across the entire traversal. This ensures that nodes that have already been visited are not traversed again. Cycle detection is important for both limiting the size of traversals and gathering accurate aggregations. Without cycle detection the size of the traversal could grow exponentially with each hop in the traversal. With cycle detection only new nodes encountered are traversed. Cycle detection *does not* cross collection boundaries. This is because internally the collection name is part of the node ID. For example the node ID "\johndoe@apache.org", is really `emails/johndoe@apache.org`. When traversing to another collection "\johndoe@apache.org" will be traversed. -[[GraphTraversal-FilteringtheTraversal]] == Filtering the Traversal Each level in the traversal can be filtered with a filter query. For example: @@ -229,7 +224,6 @@ nodes(emails, In the example above only emails that match the filter query will be included in the traversal. Any Solr query can be included here. So you can do fun things like <>, apply any of the available <>, or even write custom query parsers to limit the traversal. -[[GraphTraversal-RootStreams]] == Root Streams Any streaming expression can be used to provide the root nodes for a traversal. For example: @@ -246,7 +240,6 @@ The example above provides the root nodes through a search expression. You can a Notice that the `walk` parameter maps a field from the tuples generated by the inner stream. In this case it maps the `to` field from the inner stream to the `from` field. -[[GraphTraversal-SkippingHighFrequencyNodes]] == Skipping High Frequency Nodes It's often desirable to skip traversing high frequency nodes in the graph. This is similar in nature to a search term stop list. The best way to describe this is through an example use case. @@ -277,7 +270,6 @@ The `nodes` function has the `maxDocFreq` param to allow for filtering out high In the example above, the inner search expression searches the `logs` collection and returning all the articles viewed by "user1". The outer `nodes` expression takes all the articles emitted from the inner search expression and finds all the records in the logs collection for those articles. It then gathers and aggregates the users that have read the articles. The `maxDocFreq` parameter limits the articles returned to those that appear in no more then 10,000 log records (per shard). This guards against returning articles that have been viewed by millions of users. -[[GraphTraversal-TrackingtheTraversal]] == Tracking the Traversal By default the `nodes` function only tracks enough information to do cycle detection. This provides enough information to output the nodes and aggregations in the graph. @@ -298,7 +290,6 @@ nodes(emails, gather="to") ---- -[[GraphTraversal-Cross-CollectionTraversals]] == Cross-Collection Traversals Nested `nodes` functions can operate on different SolrCloud collections. This allow traversals to "walk" from one collection to another to gather nodes. Cycle detection does not cross collection boundaries, so nodes collected in one collection will be traversed in a different collection. This was done deliberately to support cross-collection traversals. Note that the output from a cross-collection traversal will likely contain duplicate nodes with different collection attributes. @@ -320,7 +311,6 @@ nodes(logs, The example above finds all people who sent emails with a body that contains "solr rocks". It then finds all the people these people have emailed. Then it traverses to the logs collection and gathers all the content IDs that these people have edited. -[[GraphTraversal-CombiningnodesWithOtherStreamingExpressions]] == Combining nodes With Other Streaming Expressions The `nodes` function can act as both a stream source and a stream decorator. The connection with the wider stream expression library provides tremendous power and flexibility when performing graph traversals. Here is an example of using the streaming expression library to intersect two friend networks: @@ -348,10 +338,8 @@ The `nodes` function can act as both a stream source and a stream decorator. The The example above gathers two separate friend networks, one rooted with "\johndoe@apache.org" and another rooted with "\janedoe@apache.org". The friend networks are then sorted by the `node` field, and intersected. The resulting node set will be the intersection of the two friend networks. -[[GraphTraversal-SampleUseCases]] -== Sample Use Cases +== Sample Use Cases for Graph Traversal -[[GraphTraversal-CalculateMarketBasketCo-occurrence]] === Calculate Market Basket Co-occurrence It is often useful to know which products are most frequently purchased with a particular product. This example uses a simple market basket table (indexed in Solr) to store past shopping baskets. The schema for the table is very simple with each row containing a `basketID` and a `productID`. This can be seen as a graph with each row in the table representing an edge. And it can be traversed very quickly to calculate basket co-occurrence, even when the graph contains billions of edges. @@ -378,15 +366,13 @@ Let's break down exactly what this traversal is doing. In a nutshell this expression finds the products that most frequently co-occur with product "ABC" in past shopping baskets. -[[GraphTraversal-UsingthescoreNodesFunctiontoMakeaRecommendation]] === Using the scoreNodes Function to Make a Recommendation -This use case builds on the market basket example <> that calculates which products co-occur most frequently with productID:ABC. The ranked co-occurrence counts provide candidates for a recommendation. The `scoreNodes` function can be used to score the candidates to find the best recommendation. +This use case builds on the market basket example <> that calculates which products co-occur most frequently with productID:ABC. The ranked co-occurrence counts provide candidates for a recommendation. The `scoreNodes` function can be used to score the candidates to find the best recommendation. Before diving into the syntax of the `scoreNodes` function it's useful to understand why the raw co-occurrence counts may not produce the best recommendation. The reason is that raw co-occurrence counts favor items that occur frequently across all baskets. A better recommendation would find the product that has the most significant relationship with productID ABC. The `scoreNodes` function uses a term frequency-inverse document frequency (TF-IDF) algorithm to find the most significant relationship. -[[GraphTraversal-HowItWorks]] -==== *How It Works* +==== How scoreNodes Works The `scoreNodes` function assigns a score to each node emitted by the nodes expression. By default the `scoreNodes` function uses the `count(*)` aggregation, which is the co-occurrence count, as the TF value. The IDF value for each node is fetched from the collection where the node was gathered. Each node is then scored using the TF*IDF formula, which provides a boost to nodes with a lower frequency across all market baskets. @@ -394,8 +380,7 @@ Combining the co-occurrence count with the IDF provides a score that shows how i The `scoreNodes` function adds the score to each node in the `nodeScore` field. -[[GraphTraversal-ExampleSyntax]] -==== *Example Syntax* +==== Example scoreNodes Syntax [source,plain] ---- @@ -417,7 +402,6 @@ This example builds on the earlier example "Calculate market basket co-occurrenc . The `scoreNodes` function then assigns a score to the candidates based on the TF*IDF of each node. . The outer `top` expression selects the highest scoring node. This is the recommendation. -[[GraphTraversal-RecommendContentBasedonCollaborativeFilter]] === Recommend Content Based on Collaborative Filter In this example we'll recommend content for a user based on a collaborative filter. This recommendation is made using log records that contain the `userID` and `articleID` and the action performed. In this scenario each log record can be viewed as an edge in a graph. The userID and articleID are the nodes and the action is an edge property used to filter the traversal. @@ -458,7 +442,6 @@ Note that it skips high frequency nodes using the `maxDocFreq` param to filter o Any article selected in step 1 (user1 reading list), will not appear in this step due to cycle detection. So this step returns the articles read by the users with the most similar readings habits to "user1" that "user1" has not read yet. It also counts the number of times each article has been read across this user group. . The outer `top` expression takes the top articles emitted from step 4. This is the recommendation. -[[GraphTraversal-ProteinPathwayTraversal]] === Protein Pathway Traversal In recent years, scientists have become increasingly able to rationally design drugs that target the mutated proteins, called oncogenes, responsible for some cancers. Proteins typically act through long chains of chemical interactions between multiple proteins, called pathways, and, while the oncogene in the pathway may not have a corresponding drug, another protein in the pathway may. Graph traversal on a protein collection that records protein interactions and drugs may yield possible candidates. (Thanks to Lewis Geer of the NCBI, for providing this example). @@ -481,7 +464,6 @@ Let's break down exactly what this traversal is doing. . The outer `nodes` expression also works with the `proteins` collection. It gathers all the drugs that correspond to proteins emitted from step 1. . Using this stepwise approach you can gather the drugs along the pathway of interactions any number of steps away from the root protein. -[[GraphTraversal-ExportingGraphMLtoSupportGraphVisualization]] == Exporting GraphML to Support Graph Visualization In the examples above, the `nodes` expression was sent to Solr's `/stream` handler like any other streaming expression. This approach outputs the nodes in the same JSON tuple format as other streaming expressions so that it can be treated like any other streaming expression. You can use the `/stream` handler when you need to operate directly on the tuples, such as in the recommendation use cases above. @@ -496,8 +478,7 @@ There are a few things to keep mind when exporting a graph in GraphML: . The `/graph` handler currently accepts an arbitrarily complex streaming expression which includes a `nodes` expression. If the streaming expression doesn't include a `nodes` expression, the `/graph` handler will not properly output GraphML. . The `/graph` handler currently accepts a single arbitrarily complex, nested `nodes` expression per request. This means you cannot send in a streaming expression that joins or intersects the node sets from multiple `nodes` expressions. The `/graph` handler does support any level of nesting within a single `nodes` expression. The `/stream` handler does support joining and intersecting node sets, but the `/graph` handler currently does not. -[[GraphTraversal-SampleRequest]] -=== Sample Request +=== Sample GraphML Request [source,bash] ---- @@ -512,7 +493,6 @@ curl --data-urlencode 'expr=nodes(enron_emails, gather="to")' http://localhost:8983/solr/enron_emails/graph ---- -[[GraphTraversal-SampleGraphMLOutput]] === Sample GraphML Output [source,xml] diff --git a/solr/solr-ref-guide/src/hadoop-authentication-plugin.adoc b/solr/solr-ref-guide/src/hadoop-authentication-plugin.adoc index 1c17fbca029..7ed0a156f29 100644 --- a/solr/solr-ref-guide/src/hadoop-authentication-plugin.adoc +++ b/solr/solr-ref-guide/src/hadoop-authentication-plugin.adoc @@ -30,7 +30,7 @@ For some of the authentication schemes (e.g., Kerberos), Solr provides a native There are two plugin classes: -* `HadoopAuthPlugin`: This can be used with standalone Solr as well as Solrcloud with <> for internode communication. +* `HadoopAuthPlugin`: This can be used with standalone Solr as well as Solrcloud with <> for internode communication. * `ConfigurableInternodeAuthHadoopPlugin`: This is an extension of HadoopAuthPlugin that allows you to configure the authentication scheme for internode communication. [TIP] @@ -38,7 +38,6 @@ There are two plugin classes: For most SolrCloud or standalone Solr setups, the `HadoopAuthPlugin` should suffice. ==== -[[HadoopAuthenticationPlugin-PluginConfiguration]] == Plugin Configuration `class`:: @@ -70,11 +69,8 @@ Configures proxy users for the underlying Hadoop authentication mechanism. This `clientBuilderFactory`:: No | The `HttpClientBuilderFactory` implementation used for the Solr internal communication. Only applicable for `ConfigurableInternodeAuthHadoopPlugin`. - -[[HadoopAuthenticationPlugin-ExampleConfigurations]] == Example Configurations -[[HadoopAuthenticationPlugin-KerberosAuthenticationusingHadoopAuthenticationPlugin]] === Kerberos Authentication using Hadoop Authentication Plugin This example lets you configure Solr to use Kerberos Authentication, similar to how you would use the <>. @@ -105,7 +101,6 @@ To setup this plugin, use the following in your `security.json` file. } ---- -[[HadoopAuthenticationPlugin-SimpleAuthenticationwithDelegationTokens]] === Simple Authentication with Delegation Tokens Similar to the previous example, this is an example of setting up a Solr cluster that uses delegation tokens. Refer to the parameters in the Hadoop authentication library's https://hadoop.apache.org/docs/stable/hadoop-auth/Configuration.html[documentation] or refer to the section <> for further details. Please note that this example does not use Kerberos and the requests made to Solr must contain valid delegation tokens. diff --git a/solr/solr-ref-guide/src/highlighting.adoc b/solr/solr-ref-guide/src/highlighting.adoc index b0d094d5971..dbad2d6b22f 100644 --- a/solr/solr-ref-guide/src/highlighting.adoc +++ b/solr/solr-ref-guide/src/highlighting.adoc @@ -24,7 +24,6 @@ The fragments are included in a special section of the query response (the `high Highlighting is extremely configurable, perhaps more than any other part of Solr. There are many parameters each for fragment sizing, formatting, ordering, backup/alternate behavior, and more options that are hard to categorize. Nonetheless, highlighting is very simple to use. -[[Highlighting-Usage]] == Usage === Common Highlighter Parameters @@ -36,7 +35,7 @@ Use this parameter to enable or disable highlighting. The default is `false`. If `hl.method`:: The highlighting implementation to use. Acceptable values are: `unified`, `original`, `fastVector`. The default is `original`. + -See the <> section below for more details on the differences between the available highlighters. +See the <> section below for more details on the differences between the available highlighters. `hl.fl`:: Specifies a list of fields to highlight. Accepts a comma- or space-delimited list of fields for which Solr should generate highlighted snippets. @@ -92,7 +91,6 @@ The default is `51200` characters. There are more parameters supported as well depending on the highlighter (via `hl.method`) chosen. -[[Highlighting-HighlightingintheQueryResponse]] === Highlighting in the Query Response In the response to a query, Solr includes highlighting data in a section separate from the documents. It is up to a client to determine how to process this response and display the highlights to users. @@ -136,7 +134,6 @@ Note the two sections `docs` and `highlighting`. The `docs` section contains the The `highlighting` section includes the ID of each document, and the field that contains the highlighted portion. In this example, we used the `hl.fl` parameter to say we wanted query terms highlighted in the "manu" field. When there is a match to the query term in that field, it will be included for each document ID in the list. -[[Highlighting-ChoosingaHighlighter]] == Choosing a Highlighter Solr provides a `HighlightComponent` (a `SearchComponent`) and it's in the default list of components for search handlers. It offers a somewhat unified API over multiple actual highlighting implementations (or simply "highlighters") that do the business of highlighting. @@ -173,7 +170,6 @@ The Unified Highlighter is exclusively configured via search parameters. In cont In addition to further information below, more information can be found in the {solr-javadocs}/solr-core/org/apache/solr/highlight/package-summary.html[Solr javadocs]. -[[Highlighting-SchemaOptionsandPerformanceConsiderations]] === Schema Options and Performance Considerations Fundamental to the internals of highlighting are detecting the _offsets_ of the individual words that match the query. Some of the highlighters can run the stored text through the analysis chain defined in the schema, some can look them up from _postings_, and some can look them up from _term vectors._ These choices have different trade-offs: @@ -198,7 +194,6 @@ This is definitely the fastest option for highlighting wildcard queries on large + This adds substantial weight to the index – similar in size to the compressed stored text. If you are using the Unified Highlighter then this is not a recommended configuration since it's slower and heavier than postings with light term vectors. However, this could make sense if full term vectors are already needed for another use-case. -[[Highlighting-TheUnifiedHighlighter]] == The Unified Highlighter The Unified Highlighter supports these following additional parameters to the ones listed earlier: @@ -243,7 +238,6 @@ Indicates which character to break the text on. Use only if you have defined `hl This is useful when the text has already been manipulated in advance to have a special delineation character at desired highlight passage boundaries. This character will still appear in the text as the last character of a passage. -[[Highlighting-TheOriginalHighlighter]] == The Original Highlighter The Original Highlighter supports these following additional parameters to the ones listed earlier: @@ -314,7 +308,6 @@ If this may happen and you know you don't need them for highlighting (i.e. your The Original Highlighter has a plugin architecture that enables new functionality to be registered in `solrconfig.xml`. The "```techproducts```" configset shows most of these settings explicitly. You can use it as a guide to provide your own components to include a `SolrFormatter`, `SolrEncoder`, and `SolrFragmenter.` -[[Highlighting-TheFastVectorHighlighter]] == The FastVector Highlighter The FastVector Highlighter (FVH) can be used in conjunction with the Original Highlighter if not all fields should be highlighted with the FVH. In such a mode, set `hl.method=original` and `f.yourTermVecField.hl.method=fastVector` for all fields that should use the FVH. One annoyance to keep in mind is that the Original Highlighter uses `hl.simple.pre` whereas the FVH (and other highlighters) use `hl.tag.pre`. @@ -349,15 +342,12 @@ The maximum number of phrases to analyze when searching for the highest-scoring `hl.multiValuedSeparatorChar`:: Text to use to separate one value from the next for a multi-valued field. The default is " " (a space). - -[[Highlighting-UsingBoundaryScannerswiththeFastVectorHighlighter]] === Using Boundary Scanners with the FastVector Highlighter The FastVector Highlighter will occasionally truncate highlighted words. To prevent this, implement a boundary scanner in `solrconfig.xml`, then use the `hl.boundaryScanner` parameter to specify the boundary scanner for highlighting. Solr supports two boundary scanners: `breakIterator` and `simple`. -[[Highlighting-ThebreakIteratorBoundaryScanner]] ==== The breakIterator Boundary Scanner The `breakIterator` boundary scanner offers excellent performance right out of the box by taking locale and boundary type into account. In most cases you will want to use the `breakIterator` boundary scanner. To implement the `breakIterator` boundary scanner, add this code to the `highlighting` section of your `solrconfig.xml` file, adjusting the type, language, and country values as appropriate to your application: @@ -375,7 +365,6 @@ The `breakIterator` boundary scanner offers excellent performance right out of t Possible values for the `hl.bs.type` parameter are WORD, LINE, SENTENCE, and CHARACTER. -[[Highlighting-ThesimpleBoundaryScanner]] ==== The simple Boundary Scanner The `simple` boundary scanner scans term boundaries for a specified maximum character value (`hl.bs.maxScan`) and for common delimiters such as punctuation marks (`hl.bs.chars`). The `simple` boundary scanner may be useful for some custom To implement the `simple` boundary scanner, add this code to the `highlighting` section of your `solrconfig.xml` file, adjusting the values as appropriate to your application: diff --git a/solr/solr-ref-guide/src/how-solrcloud-works.adoc b/solr/solr-ref-guide/src/how-solrcloud-works.adoc index 519a88897c0..5e364ce6307 100644 --- a/solr/solr-ref-guide/src/how-solrcloud-works.adoc +++ b/solr/solr-ref-guide/src/how-solrcloud-works.adoc @@ -27,13 +27,11 @@ The following sections cover provide general information about how various SolrC If you are already familiar with SolrCloud concepts and basic functionality, you can skip to the section covering <>. -[[HowSolrCloudWorks-KeySolrCloudConcepts]] == Key SolrCloud Concepts A SolrCloud cluster consists of some "logical" concepts layered on top of some "physical" concepts. -[[HowSolrCloudWorks-Logical]] -=== Logical +=== Logical Concepts * A Cluster can host multiple Collections of Solr Documents. * A collection can be partitioned into multiple Shards, which contain a subset of the Documents in the Collection. @@ -41,8 +39,7 @@ A SolrCloud cluster consists of some "logical" concepts layered on top of some " ** The theoretical limit to the number of Documents that Collection can reasonably contain. ** The amount of parallelization that is possible for an individual search request. -[[HowSolrCloudWorks-Physical]] -=== Physical +=== Physical Concepts * A Cluster is made up of one or more Solr Nodes, which are running instances of the Solr server process. * Each Node can host multiple Cores. diff --git a/solr/solr-ref-guide/src/implicit-requesthandlers.adoc b/solr/solr-ref-guide/src/implicit-requesthandlers.adoc index 3c87f8c9554..c10d93bc0d9 100644 --- a/solr/solr-ref-guide/src/implicit-requesthandlers.adoc +++ b/solr/solr-ref-guide/src/implicit-requesthandlers.adoc @@ -20,7 +20,6 @@ Solr ships with many out-of-the-box RequestHandlers, which are called implicit because they are not configured in `solrconfig.xml`. -[[ImplicitRequestHandlers-ListofImplicitlyAvailableEndpoints]] == List of Implicitly Available Endpoints // TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed @@ -44,19 +43,18 @@ Solr ships with many out-of-the-box RequestHandlers, which are called implicit b |`/debug/dump` |{solr-javadocs}/solr-core/org/apache/solr/handler/DumpRequestHandler.html[DumpRequestHandler] |`_DEBUG_DUMP` |Echo the request contents back to the client. |<> |{solr-javadocs}/solr-core/org/apache/solr/handler/component/SearchHandler.html[SearchHandler] |`_EXPORT` |Export full sorted result sets. |<> |{solr-javadocs}/solr-core/org/apache/solr/handler/RealTimeGetHandler.html[RealTimeGetHandler] |`_GET` |Real-time get: low-latency retrieval of the latest version of a document. -|<> |{solr-javadocs}/solr-core/org/apache/solr/handler/GraphHandler.html[GraphHandler] |`_ADMIN_GRAPH` |Return http://graphml.graphdrawing.org/[GraphML] formatted output from a <>. +|<> |{solr-javadocs}/solr-core/org/apache/solr/handler/GraphHandler.html[GraphHandler] |`_ADMIN_GRAPH` |Return http://graphml.graphdrawing.org/[GraphML] formatted output from a <>. |<> |{solr-javadocs}/solr-core/org/apache/solr/handler/ReplicationHandler.html[ReplicationHandler] |`_REPLICATION` |Replicate indexes for SolrCloud recovery and Master/Slave index distribution. |<> |{solr-javadocs}/solr-core/org/apache/solr/handler/SchemaHandler.html[SchemaHandler] |`_SCHEMA` |Retrieve/modify Solr schema. |<> |{solr-javadocs}/solr-core/org/apache/solr/handler/SQLHandler.html[SQLHandler] |`_SQL` |Front end of the Parallel SQL interface. -|<> |{solr-javadocs}/solr-core/org/apache/solr/handler/StreamHandler.html[StreamHandler] |`_STREAM` |Distributed stream processing. -|<> |{solr-javadocs}/solr-core/org/apache/solr/handler/component/SearchHandler.html[SearchHandler] |`_TERMS` |Return a field's indexed terms and the number of documents containing each term. +|<> |{solr-javadocs}/solr-core/org/apache/solr/handler/StreamHandler.html[StreamHandler] |`_STREAM` |Distributed stream processing. +|<> |{solr-javadocs}/solr-core/org/apache/solr/handler/component/SearchHandler.html[SearchHandler] |`_TERMS` |Return a field's indexed terms and the number of documents containing each term. |<> |{solr-javadocs}/solr-core/org/apache/solr/handler/UpdateRequestHandler.html[UpdateRequestHandler] |`_UPDATE` |Add, delete and update indexed documents formatted as SolrXML, CSV, SolrJSON or javabin. -|<> |{solr-javadocs}/solr-core/org/apache/solr/handler/UpdateRequestHandler.html[UpdateRequestHandler] |`_UPDATE_CSV` |Add and update CSV-formatted documents. -|<> |{solr-javadocs}/solr-core/org/apache/solr/handler/UpdateRequestHandler.html[UpdateRequestHandler] |`_UPDATE_JSON` |Add, delete and update SolrJSON-formatted documents. +|<> |{solr-javadocs}/solr-core/org/apache/solr/handler/UpdateRequestHandler.html[UpdateRequestHandler] |`_UPDATE_CSV` |Add and update CSV-formatted documents. +|<> |{solr-javadocs}/solr-core/org/apache/solr/handler/UpdateRequestHandler.html[UpdateRequestHandler] |`_UPDATE_JSON` |Add, delete and update SolrJSON-formatted documents. |<> |{solr-javadocs}/solr-core/org/apache/solr/handler/UpdateRequestHandler.html[UpdateRequestHandler] |`_UPDATE_JSON_DOCS` |Add and update custom JSON-formatted documents. |=== -[[ImplicitRequestHandlers-HowtoViewtheConfiguration]] == How to View the Configuration You can see configuration for all request handlers, including the implicit request handlers, via the <>. E.g. for the `gettingstarted` collection: @@ -71,7 +69,6 @@ To include the expanded paramset in the response, as well as the effective param `curl "http://localhost:8983/solr/gettingstarted/config/requestHandler?componentName=/export&expandParams=true"` -[[ImplicitRequestHandlers-HowtoEdittheConfiguration]] == How to Edit the Configuration Because implicit request handlers are not present in `solrconfig.xml`, configuration of their associated `default`, `invariant` and `appends` parameters may be edited via<> using the paramset listed in the above table. However, other parameters, including SearchHandler components, may not be modified. The invariants and appends specified in the implicit configuration cannot be overridden. diff --git a/solr/solr-ref-guide/src/index-replication.adoc b/solr/solr-ref-guide/src/index-replication.adoc index 774b78cde75..8c5134154b1 100644 --- a/solr/solr-ref-guide/src/index-replication.adoc +++ b/solr/solr-ref-guide/src/index-replication.adoc @@ -26,7 +26,6 @@ The figure below shows a Solr configuration using index replication. The master image::images/index-replication/worddav2b7e14725d898b4104cdd9c502fc77cd.png[image,width=159,height=235] -[[IndexReplication-IndexReplicationinSolr]] == Index Replication in Solr Solr includes a Java implementation of index replication that works over HTTP: @@ -46,7 +45,6 @@ Although there is no explicit concept of "master/slave" nodes in a <> parameters corresponding with any request parameters supported by the `ReplicationHandler` when <>. +* Similar to most other request handlers in Solr you may configure a set of <> parameters corresponding with any request parameters supported by the `ReplicationHandler` when <>. -[[IndexReplication-ConfiguringtheReplicationRequestHandleronaMasterServer]] === Configuring the Replication RequestHandler on a Master Server Before running a replication, you should set the following parameters on initialization of the handler: @@ -125,7 +121,6 @@ The example below shows a possible 'master' configuration for the `ReplicationHa ---- -[[IndexReplication-Replicatingsolrconfig.xml]] ==== Replicating solrconfig.xml In the configuration file on the master server, include a line like the following: @@ -139,7 +134,6 @@ This ensures that the local configuration `solrconfig_slave.xml` will be saved a On the master server, the file name of the slave configuration file can be anything, as long as the name is correctly identified in the `confFiles` string; then it will be saved as whatever file name appears after the colon ':'. -[[IndexReplication-ConfiguringtheReplicationRequestHandleronaSlaveServer]] === Configuring the Replication RequestHandler on a Slave Server The code below shows how to configure a ReplicationHandler on a slave. @@ -188,7 +182,6 @@ The code below shows how to configure a ReplicationHandler on a slave. ---- -[[IndexReplication-SettingUpaRepeaterwiththeReplicationHandler]] == Setting Up a Repeater with the ReplicationHandler A master may be able to serve only so many slaves without affecting performance. Some organizations have deployed slave servers across multiple data centers. If each slave downloads the index from a remote data center, the resulting download may consume too much network bandwidth. To avoid performance degradation in cases like this, you can configure one or more slaves as repeaters. A repeater is simply a node that acts as both a master and a slave. @@ -213,7 +206,6 @@ Here is an example of a ReplicationHandler configuration for a repeater: ---- -[[IndexReplication-CommitandOptimizeOperations]] == Commit and Optimize Operations When a commit or optimize operation is performed on the master, the RequestHandler reads the list of file names which are associated with each commit point. This relies on the `replicateAfter` parameter in the configuration to decide which types of events should trigger replication. @@ -233,7 +225,6 @@ The `replicateAfter` parameter can accept multiple arguments. For example: optimize ---- -[[IndexReplication-SlaveReplication]] == Slave Replication The master is totally unaware of the slaves. @@ -246,7 +237,6 @@ The slave continuously keeps polling the master (depending on the `pollInterval` * After the download completes, all the new files are moved to the live index directory and the file's timestamp is same as its counterpart on the master. * A commit command is issued on the slave by the Slave's ReplicationHandler and the new index is loaded. -[[IndexReplication-ReplicatingConfigurationFiles]] === Replicating Configuration Files To replicate configuration files, list them using using the `confFiles` parameter. Only files found in the `conf` directory of the master's Solr instance will be replicated. @@ -259,7 +249,6 @@ As a precaution when replicating configuration files, Solr copies configuration If a replication involved downloading of at least one configuration file, the ReplicationHandler issues a core-reload command instead of a commit command. -[[IndexReplication-ResolvingCorruptionIssuesonSlaveServers]] === Resolving Corruption Issues on Slave Servers If documents are added to the slave, then the slave is no longer in sync with its master. However, the slave will not undertake any action to put itself in sync, until the master has new index data. @@ -268,7 +257,6 @@ When a commit operation takes place on the master, the index version of the mast To correct this problem, the slave then copies all the index files from master to a new index directory and asks the core to load the fresh index from the new directory. -[[IndexReplication-HTTPAPICommandsfortheReplicationHandler]] == HTTP API Commands for the ReplicationHandler You can use the HTTP commands below to control the ReplicationHandler's operations. @@ -355,7 +343,6 @@ There are two supported parameters: * `location`: Location where the snapshot is created. -[[IndexReplication-DistributionandOptimization]] == Distribution and Optimization Optimizing an index is not something most users should generally worry about - but in particular users should be aware of the impacts of optimizing an index when using the `ReplicationHandler`. diff --git a/solr/solr-ref-guide/src/indexconfig-in-solrconfig.adoc b/solr/solr-ref-guide/src/indexconfig-in-solrconfig.adoc index 63ab26dda8a..a592a2daf02 100644 --- a/solr/solr-ref-guide/src/indexconfig-in-solrconfig.adoc +++ b/solr/solr-ref-guide/src/indexconfig-in-solrconfig.adoc @@ -29,10 +29,8 @@ By default, the settings are commented out in the sample `solrconfig.xml` includ ---- -[[IndexConfiginSolrConfig-WritingNewSegments]] == Writing New Segments -[[IndexConfiginSolrConfig-ramBufferSizeMB]] === ramBufferSizeMB Once accumulated document updates exceed this much memory space (defined in megabytes), then the pending updates are flushed. This can also create new segments or trigger a merge. Using this setting is generally preferable to `maxBufferedDocs`. If both `maxBufferedDocs` and `ramBufferSizeMB` are set in `solrconfig.xml`, then a flush will occur when either limit is reached. The default is 100Mb. @@ -42,7 +40,6 @@ Once accumulated document updates exceed this much memory space (defined in mega 100 ---- -[[IndexConfiginSolrConfig-maxBufferedDocs]] === maxBufferedDocs Sets the number of document updates to buffer in memory before they are flushed as a new segment. This may also trigger a merge. The default Solr configuration sets to flush by RAM usage (`ramBufferSizeMB`). @@ -52,20 +49,17 @@ Sets the number of document updates to buffer in memory before they are flushed 1000 ---- -[[IndexConfiginSolrConfig-useCompoundFile]] === useCompoundFile -Controls whether newly written (and not yet merged) index segments should use the <> format. The default is false. +Controls whether newly written (and not yet merged) index segments should use the <> format. The default is false. [source,xml] ---- false ---- -[[IndexConfiginSolrConfig-MergingIndexSegments]] == Merging Index Segments -[[IndexConfiginSolrConfig-mergePolicyFactory]] === mergePolicyFactory Defines how merging segments is done. @@ -99,7 +93,6 @@ Choosing the best merge factors is generally a trade-off of indexing speed vs. s Conversely, keeping more segments can accelerate indexing, because merges happen less often, making an update is less likely to trigger a merge. But searches become more computationally expensive and will likely be slower, because search terms must be looked up in more index segments. Faster index updates also means shorter commit turnaround times, which means more timely search results. -[[IndexConfiginSolrConfig-CustomizingMergePolicies]] === Customizing Merge Policies If the configuration options for the built-in merge policies do not fully suit your use case, you can customize them: either by creating a custom merge policy factory that you specify in your configuration, or by configuring a {solr-javadocs}/solr-core/org/apache/solr/index/WrapperMergePolicyFactory.html[merge policy wrapper] which uses a `wrapped.prefix` configuration option to control how the factory it wraps will be configured: @@ -117,7 +110,6 @@ If the configuration options for the built-in merge policies do not fully suit y The example above shows Solr's {solr-javadocs}/solr-core/org/apache/solr/index/SortingMergePolicyFactory.html[`SortingMergePolicyFactory`] being configured to sort documents in merged segments by `"timestamp desc"`, and wrapped around a `TieredMergePolicyFactory` configured to use the values `maxMergeAtOnce=10` and `segmentsPerTier=10` via the `inner` prefix defined by `SortingMergePolicyFactory` 's `wrapped.prefix` option. For more information on using `SortingMergePolicyFactory`, see <>. -[[IndexConfiginSolrConfig-mergeScheduler]] === mergeScheduler The merge scheduler controls how merges are performed. The default `ConcurrentMergeScheduler` performs merges in the background using separate threads. The alternative, `SerialMergeScheduler`, does not perform merges with separate threads. @@ -127,7 +119,6 @@ The merge scheduler controls how merges are performed. The default `ConcurrentMe ---- -[[IndexConfiginSolrConfig-mergedSegmentWarmer]] === mergedSegmentWarmer When using Solr in for <> a merged segment warmer can be configured to warm the reader on the newly merged segment, before the merge commits. This is not required for near real-time search, but will reduce search latency on opening a new near real-time reader after a merge completes. @@ -137,7 +128,6 @@ When using Solr in for < ---- -[[IndexConfiginSolrConfig-CompoundFileSegments]] == Compound File Segments Each Lucene segment is typically comprised of a dozen or so files. Lucene can be configured to bundle all of the files for a segment into a single compound file using a file extension of `.cfs`; it's an abbreviation for Compound File Segment. @@ -149,16 +139,14 @@ On systems where the number of open files allowed per process is limited, CFS ma .CFS: New Segments vs Merged Segments [NOTE] ==== -To configure whether _newly written segments_ should use CFS, see the <> setting described above. To configure whether _merged segments_ use CFS, review the Javadocs for your <> . +To configure whether _newly written segments_ should use CFS, see the <> setting described above. To configure whether _merged segments_ use CFS, review the Javadocs for your <> . -Many <> implementations support `noCFSRatio` and `maxCFSSegmentSizeMB` settings with default values that prevent compound files from being used for large segments, but do use compound files for small segments. +Many <> implementations support `noCFSRatio` and `maxCFSSegmentSizeMB` settings with default values that prevent compound files from being used for large segments, but do use compound files for small segments. ==== -[[IndexConfiginSolrConfig-IndexLocks]] == Index Locks -[[IndexConfiginSolrConfig-lockType]] === lockType The LockFactory options specify the locking implementation to use. @@ -177,7 +165,6 @@ For more information on the nuances of each LockFactory, see http://wiki.apache. native ---- -[[IndexConfiginSolrConfig-writeLockTimeout]] === writeLockTimeout The maximum time to wait for a write lock on an IndexWriter. The default is 1000, expressed in milliseconds. @@ -187,7 +174,6 @@ The maximum time to wait for a write lock on an IndexWriter. The default is 1000 1000 ---- -[[IndexConfiginSolrConfig-OtherIndexingSettings]] == Other Indexing Settings There are a few other parameters that may be important to configure for your implementation. These settings affect how or when updates are made to an index. diff --git a/solr/solr-ref-guide/src/indexing-and-basic-data-operations.adoc b/solr/solr-ref-guide/src/indexing-and-basic-data-operations.adoc index 932ac8e6627..ece3989a6e6 100644 --- a/solr/solr-ref-guide/src/indexing-and-basic-data-operations.adoc +++ b/solr/solr-ref-guide/src/indexing-and-basic-data-operations.adoc @@ -43,7 +43,6 @@ This section describes how Solr adds data to its index. It covers the following * *<>*: Information about integrating Solr with Apache's Unstructured Information Management Architecture (UIMA). UIMA lets you define custom pipelines of Analysis Engines that incrementally add metadata to your documents as annotations. -[[IndexingandBasicDataOperations-IndexingUsingClientAPIs]] == Indexing Using Client APIs Using client APIs, such as <>, from your applications is an important option for updating Solr indexes. See the <> section for more information. diff --git a/solr/solr-ref-guide/src/initparams-in-solrconfig.adoc b/solr/solr-ref-guide/src/initparams-in-solrconfig.adoc index 1120e4332d6..180f424f3bf 100644 --- a/solr/solr-ref-guide/src/initparams-in-solrconfig.adoc +++ b/solr/solr-ref-guide/src/initparams-in-solrconfig.adoc @@ -55,8 +55,7 @@ For example, if an `` section has the name "myParams", you can call [source,xml] -[[InitParamsinSolrConfig-Wildcards]] -== Wildcards +== Wildcards in initParams An `` section can support wildcards to define nested paths that should use the parameters defined. A single asterisk (\*) denotes that a nested path one level deeper should use the parameters. Double asterisks (**) denote all nested paths no matter how deep should use the parameters. diff --git a/solr/solr-ref-guide/src/introduction-to-solr-indexing.adoc b/solr/solr-ref-guide/src/introduction-to-solr-indexing.adoc index 888d8db9b1a..83a9378145b 100644 --- a/solr/solr-ref-guide/src/introduction-to-solr-indexing.adoc +++ b/solr/solr-ref-guide/src/introduction-to-solr-indexing.adoc @@ -38,12 +38,10 @@ If the field name is defined in the Schema that is associated with the index, th For more information on indexing in Solr, see the https://wiki.apache.org/solr/FrontPage[Solr Wiki]. -[[IntroductiontoSolrIndexing-TheSolrExampleDirectory]] == The Solr Example Directory When starting Solr with the "-e" option, the `example/` directory will be used as base directory for the example Solr instances that are created. This directory also includes an `example/exampledocs/` subdirectory containing sample documents in a variety of formats that you can use to experiment with indexing into the various examples. -[[IntroductiontoSolrIndexing-ThecurlUtilityforTransferringFiles]] == The curl Utility for Transferring Files Many of the instructions and examples in this section make use of the `curl` utility for transferring content through a URL. `curl` posts and retrieves data over HTTP, FTP, and many other protocols. Most Linux distributions include a copy of `curl`. You'll find curl downloads for Linux, Windows, and many other operating systems at http://curl.haxx.se/download.html. Documentation for `curl` is available here: http://curl.haxx.se/docs/manpage.html. diff --git a/solr/solr-ref-guide/src/jvm-settings.adoc b/solr/solr-ref-guide/src/jvm-settings.adoc index 56560da8a66..532e1a7c6bd 100644 --- a/solr/solr-ref-guide/src/jvm-settings.adoc +++ b/solr/solr-ref-guide/src/jvm-settings.adoc @@ -24,7 +24,6 @@ Configuring your JVM can be a complex topic and a full discussion is beyond the For more general information about improving Solr performance, see https://wiki.apache.org/solr/SolrPerformanceFactors. -[[JVMSettings-ChoosingMemoryHeapSettings]] == Choosing Memory Heap Settings The most important JVM configuration settings are those that determine the amount of memory it is allowed to allocate. There are two primary command-line options that set memory limits for the JVM. These are `-Xms`, which sets the initial size of the JVM's memory heap, and `-Xmx`, which sets the maximum size to which the heap is allowed to grow. @@ -41,12 +40,10 @@ When setting the maximum heap size, be careful not to let the JVM consume all av On systems with many CPUs/cores, it can also be beneficial to tune the layout of the heap and/or the behavior of the garbage collector. Adjusting the relative sizes of the generational pools in the heap can affect how often GC sweeps occur and whether they run concurrently. Configuring the various settings of how the garbage collector should behave can greatly reduce the overall performance impact when it does run. There is a lot of good information on this topic available on Sun's website. A good place to start is here: http://www.oracle.com/technetwork/java/javase/tech/index-jsp-140228.html[Oracle's Java HotSpot Garbage Collection]. -[[JVMSettings-UsetheServerHotSpotVM]] == Use the Server HotSpot VM If you are using Sun's JVM, add the `-server` command-line option when you start Solr. This tells the JVM that it should optimize for a long running, server process. If the Java runtime on your system is a JRE, rather than a full JDK distribution (including `javac` and other development tools), then it is possible that it may not support the `-server` JVM option. Test this by running `java -help` and look for `-server` as an available option in the displayed usage message. -[[JVMSettings-CheckingJVMSettings]] == Checking JVM Settings A great way to see what JVM settings your server is using, along with other useful information, is to use the admin RequestHandler, `solr/admin/system`. This request handler will display a wealth of server statistics and settings. diff --git a/solr/solr-ref-guide/src/kerberos-authentication-plugin.adoc b/solr/solr-ref-guide/src/kerberos-authentication-plugin.adoc index da963166f37..a7fa9c181de 100644 --- a/solr/solr-ref-guide/src/kerberos-authentication-plugin.adoc +++ b/solr/solr-ref-guide/src/kerberos-authentication-plugin.adoc @@ -29,17 +29,14 @@ Support for the Kerberos authentication plugin is available in SolrCloud mode or If you are using Solr with a Hadoop cluster secured with Kerberos and intend to store your Solr indexes in HDFS, also see the section <> for additional steps to configure Solr for that purpose. The instructions on this page apply only to scenarios where Solr will be secured with Kerberos. If you only need to store your indexes in a Kerberized HDFS system, please see the other section referenced above. ==== -[[KerberosAuthenticationPlugin-HowSolrWorksWithKerberos]] == How Solr Works With Kerberos When setting up Solr to use Kerberos, configurations are put in place for Solr to use a _service principal_, or a Kerberos username, which is registered with the Key Distribution Center (KDC) to authenticate requests. The configurations define the service principal name and the location of the keytab file that contains the credentials. -[[KerberosAuthenticationPlugin-security.json]] === security.json The Solr authentication model uses a file called `security.json`. A description of this file and how it is created and maintained is covered in the section <>. If this file is created after an initial startup of Solr, a restart of each node of the system is required. -[[KerberosAuthenticationPlugin-ServicePrincipalsandKeytabFiles]] === Service Principals and Keytab Files Each Solr node must have a service principal registered with the Key Distribution Center (KDC). The Kerberos plugin uses SPNego to negotiate authentication. @@ -56,7 +53,6 @@ Along with the service principal, each Solr node needs a keytab file which shoul Since a Solr cluster requires internode communication, each node must also be able to make Kerberos enabled requests to other nodes. By default, Solr uses the same service principal and keytab as a 'client principal' for internode communication. You may configure a distinct client principal explicitly, but doing so is not recommended and is not covered in the examples below. -[[KerberosAuthenticationPlugin-KerberizedZooKeeper]] === Kerberized ZooKeeper When setting up a kerberized SolrCloud cluster, it is recommended to enable Kerberos security for ZooKeeper as well. @@ -65,15 +61,13 @@ In such a setup, the client principal used to authenticate requests with ZooKeep See the <> section below for an example of starting ZooKeeper in Kerberos mode. -[[KerberosAuthenticationPlugin-BrowserConfiguration]] === Browser Configuration In order for your browser to access the Solr Admin UI after enabling Kerberos authentication, it must be able to negotiate with the Kerberos authenticator service to allow you access. Each browser supports this differently, and some (like Chrome) do not support it at all. If you see 401 errors when trying to access the Solr Admin UI after enabling Kerberos authentication, it's likely your browser has not been configured properly to know how or where to negotiate the authentication request. Detailed information on how to set up your browser is beyond the scope of this documentation; please see your system administrators for Kerberos for details on how to configure your browser. -[[KerberosAuthenticationPlugin-PluginConfiguration]] -== Plugin Configuration +== Kerberos Authentication Configuration .Consult Your Kerberos Admins! [WARNING] @@ -97,7 +91,6 @@ We'll walk through each of these steps below. To use host names instead of IP addresses, use the `SOLR_HOST` configuration in `bin/solr.in.sh` or pass a `-Dhost=` system parameter during Solr startup. This guide uses IP addresses. If you specify a hostname, replace all the IP addresses in the guide with the Solr hostname as appropriate. ==== -[[KerberosAuthenticationPlugin-GetServicePrincipalsandKeytabs]] === Get Service Principals and Keytabs Before configuring Solr, make sure you have a Kerberos service principal for each Solr host and ZooKeeper (if ZooKeeper has not already been configured) available in the KDC server, and generate a keytab file as shown below. @@ -128,7 +121,6 @@ Copy the keytab file from the KDC server’s `/tmp/107.keytab` location to the S You might need to take similar steps to create a ZooKeeper service principal and keytab if it has not already been set up. In that case, the example below shows a different service principal for ZooKeeper, so the above might be repeated with `zookeeper/host1` as the service principal for one of the nodes -[[KerberosAuthenticationPlugin-ZooKeeperConfiguration]] === ZooKeeper Configuration If you are using a ZooKeeper that has already been configured to use Kerberos, you can skip the ZooKeeper-related steps shown here. @@ -173,7 +165,6 @@ Once all of the pieces are in place, start ZooKeeper with the following paramete bin/zkServer.sh start -Djava.security.auth.login.config=/etc/zookeeper/conf/jaas-client.conf ---- -[[KerberosAuthenticationPlugin-Createsecurity.json]] === Create security.json Create the `security.json` file. @@ -194,7 +185,6 @@ More details on how to use a `/security.json` file in Solr are available in the If you already have a `/security.json` file in ZooKeeper, download the file, add or modify the authentication section and upload it back to ZooKeeper using the <> available in Solr. ==== -[[KerberosAuthenticationPlugin-DefineaJAASConfigurationFile]] === Define a JAAS Configuration File The JAAS configuration file defines the properties to use for authentication, such as the service principal and the location of the keytab file. Other properties can also be set to ensure ticket caching and other features. @@ -227,7 +217,6 @@ The main properties we are concerned with are the `keyTab` and `principal` prope * `debug`: this boolean property will output debug messages for help in troubleshooting. * `principal`: the name of the service principal to be used. -[[KerberosAuthenticationPlugin-SolrStartupParameters]] === Solr Startup Parameters While starting up Solr, the following host-specific parameters need to be passed. These parameters can be passed at the command line with the `bin/solr` start command (see <> for details on how to pass system parameters) or defined in `bin/solr.in.sh` or `bin/solr.in.cmd` as appropriate for your operating system. @@ -252,7 +241,6 @@ The app name (section name) within the JAAS configuration file which is required `java.security.auth.login.config`:: Path to the JAAS configuration file for configuring a Solr client for internode communication. This parameter is required. - Here is an example that could be added to `bin/solr.in.sh`. Make sure to change this example to use the right hostname and the keytab file path. [source,bash] @@ -273,7 +261,6 @@ For Java 1.8, this is available here: http://www.oracle.com/technetwork/java/jav Replace the `local_policy.jar` present in `JAVA_HOME/jre/lib/security/` with the new `local_policy.jar` from the downloaded package and restart the Solr node. ==== -[[KerberosAuthenticationPlugin-UsingDelegationTokens]] === Using Delegation Tokens The Kerberos plugin can be configured to use delegation tokens, which allow an application to reuse the authentication of an end-user or another application. @@ -304,7 +291,6 @@ The ZooKeeper path where the secret provider information is stored. This is in t `solr.kerberos.delegation.token.secret.manager.znode.working.path`:: The ZooKeeper path where token information is stored. This is in the form of the path + /security/zkdtsm. The path can include the chroot or the chroot can be omitted if you are not using it. This example includes the chroot: `server1:9983,server2:9983,server3:9983/solr/security/zkdtsm`. -[[KerberosAuthenticationPlugin-StartSolr]] === Start Solr Once the configuration is complete, you can start Solr with the `bin/solr` script, as in the example below, which is for users in SolrCloud mode only. This example assumes you modified `bin/solr.in.sh` or `bin/solr.in.cmd`, with the proper values, but if you did not, you would pass the system parameters along with the start command. Note you also need to customize the `-z` property as appropriate for the location of your ZooKeeper nodes. @@ -314,7 +300,6 @@ Once the configuration is complete, you can start Solr with the `bin/solr` scrip bin/solr -c -z server1:2181,server2:2181,server3:2181/solr ---- -[[KerberosAuthenticationPlugin-TesttheConfiguration]] === Test the Configuration . Do a `kinit` with your username. For example, `kinit \user@EXAMPLE.COM`. @@ -325,7 +310,6 @@ bin/solr -c -z server1:2181,server2:2181,server3:2181/solr curl --negotiate -u : "http://192.168.0.107:8983/solr/" ---- -[[KerberosAuthenticationPlugin-UsingSolrJwithaKerberizedSolr]] == Using SolrJ with a Kerberized Solr To use Kerberos authentication in a SolrJ application, you need the following two lines before you create a SolrClient: @@ -353,7 +337,6 @@ SolrJClient { }; ---- -[[KerberosAuthenticationPlugin-DelegationTokenswithSolrJ]] === Delegation Tokens with SolrJ Delegation tokens are also supported with SolrJ, in the following ways: diff --git a/solr/solr-ref-guide/src/language-analysis.adoc b/solr/solr-ref-guide/src/language-analysis.adoc index c2b02ff069a..a6d04da6b41 100644 --- a/solr/solr-ref-guide/src/language-analysis.adoc +++ b/solr/solr-ref-guide/src/language-analysis.adoc @@ -26,7 +26,6 @@ In other languages the tokenization rules are often not so simple. Some European For information about language detection at index time, see <>. -[[LanguageAnalysis-KeywordMarkerFilterFactory]] == KeywordMarkerFilterFactory Protects words from being modified by stemmers. A customized protected word list may be specified with the "protected" attribute in the schema. Any words in the protected word list will not be modified by any stemmer in Solr. @@ -44,7 +43,6 @@ A sample Solr `protwords.txt` with comments can be found in the `sample_techprod ---- -[[LanguageAnalysis-KeywordRepeatFilterFactory]] == KeywordRepeatFilterFactory Emits each token twice, one with the `KEYWORD` attribute and once without. @@ -69,8 +67,6 @@ A sample fieldType configuration could look like this: IMPORTANT: When adding the same token twice, it will also score twice (double), so you may have to re-tune your ranking rules. - -[[LanguageAnalysis-StemmerOverrideFilterFactory]] == StemmerOverrideFilterFactory Overrides stemming algorithms by applying a custom mapping, then protecting these terms from being modified by stemmers. @@ -90,7 +86,6 @@ A sample http://svn.apache.org/repos/asf/lucene/dev/trunk/solr/core/src/test-fil ---- -[[LanguageAnalysis-DictionaryCompoundWordTokenFilter]] == Dictionary Compound Word Token Filter This filter splits, or _decompounds_, compound words into individual words using a dictionary of the component words. Each input token is passed through unchanged. If it can also be decompounded into subwords, each subword is also added to the stream at the same logical position. @@ -129,7 +124,6 @@ Assume that `germanwords.txt` contains at least the following words: `dumm kopf *Out:* "Donaudampfschiff"(1), "Donau"(1), "dampf"(1), "schiff"(1), "dummkopf"(2), "dumm"(2), "kopf"(2) -[[LanguageAnalysis-UnicodeCollation]] == Unicode Collation Unicode Collation is a language-sensitive method of sorting text that can also be used for advanced search purposes. @@ -175,7 +169,6 @@ Expert options: `variableTop`:: Single character or contraction. Controls what is variable for `alternate`. -[[LanguageAnalysis-SortingTextforaSpecificLanguage]] === Sorting Text for a Specific Language In this example, text is sorted according to the default German rules provided by ICU4J. @@ -223,7 +216,6 @@ An example using the "city_sort" field to sort: q=*:*&fl=city&sort=city_sort+asc ---- -[[LanguageAnalysis-SortingTextforMultipleLanguages]] === Sorting Text for Multiple Languages There are two approaches to supporting multiple languages: if there is a small list of languages you wish to support, consider defining collated fields for each language and using `copyField`. However, adding a large number of sort fields can increase disk and indexing costs. An alternative approach is to use the Unicode `default` collator. @@ -237,7 +229,6 @@ The Unicode `default` or `ROOT` locale has rules that are designed to work well strength="primary" /> ---- -[[LanguageAnalysis-SortingTextwithCustomRules]] === Sorting Text with Custom Rules You can define your own set of sorting rules. It's easiest to take existing rules that are close to what you want and customize them. @@ -277,7 +268,6 @@ This rule set can now be used for custom collation in Solr: strength="primary" /> ---- -[[LanguageAnalysis-JDKCollation]] === JDK Collation As mentioned above, ICU Unicode Collation is better in several ways than JDK Collation, but if you cannot use ICU4J for some reason, you can use `solr.CollationField`. @@ -321,7 +311,6 @@ Using a Tailored ruleset: == ASCII & Decimal Folding Filters -[[LanguageAnalysis-AsciiFolding]] === ASCII Folding This filter converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127 ASCII characters (the "Basic Latin" Unicode block) into their ASCII equivalents, if one exists. Only those characters with reasonable ASCII alternatives are converted. @@ -348,7 +337,6 @@ This can increase recall by causing more matches. On the other hand, it can redu *Out:* "Bjorn", "Angstrom" -[[LanguageAnalysis-DecimalDigitFolding]] === Decimal Digit Folding This filter converts any character in the Unicode "Decimal Number" general category (`Nd`) into their equivalent Basic Latin digits (0-9). @@ -369,7 +357,6 @@ This can increase recall by causing more matches. On the other hand, it can redu ---- -[[LanguageAnalysis-Language-SpecificFactories]] == Language-Specific Factories These factories are each designed to work with specific languages. The languages covered here are: @@ -380,8 +367,8 @@ These factories are each designed to work with specific languages. The languages * <> * <> * <> -* <> -* <> +* <> +* <> * <> * <> @@ -389,7 +376,7 @@ These factories are each designed to work with specific languages. The languages * <> * <> * <> -* <> +* <> * <> * <> * <> @@ -410,7 +397,6 @@ These factories are each designed to work with specific languages. The languages * <> * <> -[[LanguageAnalysis-Arabic]] === Arabic Solr provides support for the http://www.mtholyoke.edu/~lballest/Pubs/arab_stem05.pdf[Light-10] (PDF) stemming algorithm, and Lucene includes an example stopword list. @@ -432,7 +418,6 @@ This algorithm defines both character normalization and stemming, so these are s ---- -[[LanguageAnalysis-BrazilianPortuguese]] === Brazilian Portuguese This is a Java filter written specifically for stemming the Brazilian dialect of the Portuguese language. It uses the Lucene class `org.apache.lucene.analysis.br.BrazilianStemmer`. Although that stemmer can be configured to use a list of protected words (which should not be stemmed), this factory does not accept any arguments to specify such a list. @@ -457,7 +442,6 @@ This is a Java filter written specifically for stemming the Brazilian dialect of *Out:* "pra", "pra" -[[LanguageAnalysis-Bulgarian]] === Bulgarian Solr includes a light stemmer for Bulgarian, following http://members.unine.ch/jacques.savoy/Papers/BUIR.pdf[this algorithm] (PDF), and Lucene includes an example stopword list. @@ -477,7 +461,6 @@ Solr includes a light stemmer for Bulgarian, following http://members.unine.ch/j ---- -[[LanguageAnalysis-Catalan]] === Catalan Solr can stem Catalan using the Snowball Porter Stemmer with an argument of `language="Catalan"`. Solr includes a set of contractions for Catalan, which can be stripped using `solr.ElisionFilterFactory`. @@ -507,14 +490,13 @@ Solr can stem Catalan using the Snowball Porter Stemmer with an argument of `lan *Out:* "llengu"(1), "llengu"(2) -[[LanguageAnalysis-TraditionalChinese]] === Traditional Chinese -The default configuration of the <> is suitable for Traditional Chinese text. It follows the Word Break rules from the Unicode Text Segmentation algorithm for non-Chinese text, and uses a dictionary to segment Chinese words. To use this tokenizer, you must add additional .jars to Solr's classpath (as described in the section <>). See the `solr/contrib/analysis-extras/README.txt` for information on which jars you need to add to your `SOLR_HOME/lib`. +The default configuration of the <> is suitable for Traditional Chinese text. It follows the Word Break rules from the Unicode Text Segmentation algorithm for non-Chinese text, and uses a dictionary to segment Chinese words. To use this tokenizer, you must add additional .jars to Solr's classpath (as described in the section <>). See the `solr/contrib/analysis-extras/README.txt` for information on which jars you need to add to your `SOLR_HOME/lib`. -<> can also be used to tokenize Traditional Chinese text. Following the Word Break rules from the Unicode Text Segmentation algorithm, it produces one token per Chinese character. When combined with <>, overlapping bigrams of Chinese characters are formed. +<> can also be used to tokenize Traditional Chinese text. Following the Word Break rules from the Unicode Text Segmentation algorithm, it produces one token per Chinese character. When combined with <>, overlapping bigrams of Chinese characters are formed. -<> folds fullwidth ASCII variants into the equivalent Basic Latin forms. +<> folds fullwidth ASCII variants into the equivalent Basic Latin forms. *Examples:* @@ -537,10 +519,9 @@ The default configuration of the < ---- -[[LanguageAnalysis-CJKBigramFilter]] === CJK Bigram Filter -Forms bigrams (overlapping 2-character sequences) of CJK characters that are generated from <> or <>. +Forms bigrams (overlapping 2-character sequences) of CJK characters that are generated from <> or <>. By default, all CJK characters produce bigrams, but finer grained control is available by specifying orthographic type arguments `han`, `hiragana`, `katakana`, and `hangul`. When set to `false`, characters of the corresponding type will be passed through as unigrams, and will not be included in any bigrams. @@ -560,18 +541,17 @@ In all cases, all non-CJK input is passed through unmodified. `outputUnigrams`:: (true/false) If true, in addition to forming bigrams, all characters are also passed through as unigrams. Default is false. -See the example under <>. +See the example under <>. -[[LanguageAnalysis-SimplifiedChinese]] === Simplified Chinese -For Simplified Chinese, Solr provides support for Chinese sentence and word segmentation with the <>. This component includes a large dictionary and segments Chinese text into words with the Hidden Markov Model. To use this tokenizer, you must add additional .jars to Solr's classpath (as described in the section <>). See the `solr/contrib/analysis-extras/README.txt` for information on which jars you need to add to your `SOLR_HOME/lib`. +For Simplified Chinese, Solr provides support for Chinese sentence and word segmentation with the <>. This component includes a large dictionary and segments Chinese text into words with the Hidden Markov Model. To use this tokenizer, you must add additional .jars to Solr's classpath (as described in the section <>). See the `solr/contrib/analysis-extras/README.txt` for information on which jars you need to add to your `SOLR_HOME/lib`. -The default configuration of the <> is also suitable for Simplified Chinese text. It follows the Word Break rules from the Unicode Text Segmentation algorithm for non-Chinese text, and uses a dictionary to segment Chinese words. To use this tokenizer, you must add additional .jars to Solr's classpath (as described in the section <>). See the `solr/contrib/analysis-extras/README.txt` for information on which jars you need to add to your `SOLR_HOME/lib`. +The default configuration of the <> is also suitable for Simplified Chinese text. It follows the Word Break rules from the Unicode Text Segmentation algorithm for non-Chinese text, and uses a dictionary to segment Chinese words. To use this tokenizer, you must add additional .jars to Solr's classpath (as described in the section <>). See the `solr/contrib/analysis-extras/README.txt` for information on which jars you need to add to your `SOLR_HOME/lib`. Also useful for Chinese analysis: -<> folds fullwidth ASCII variants into the equivalent Basic Latin forms, and folds halfwidth Katakana variants into their equivalent fullwidth forms. +<> folds fullwidth ASCII variants into the equivalent Basic Latin forms, and folds halfwidth Katakana variants into their equivalent fullwidth forms. *Examples:* @@ -598,7 +578,6 @@ Also useful for Chinese analysis: ---- -[[LanguageAnalysis-HMMChineseTokenizer]] === HMM Chinese Tokenizer For Simplified Chinese, Solr provides support for Chinese sentence and word segmentation with the `solr.HMMChineseTokenizerFactory` in the `analysis-extras` contrib module. This component includes a large dictionary and segments Chinese text into words with the Hidden Markov Model. To use this tokenizer, see `solr/contrib/analysis-extras/README.txt` for instructions on which jars you need to add to your `solr_home/lib`. @@ -613,9 +592,8 @@ To use the default setup with fallback to English Porter stemmer for English wor `` -Or to configure your own analysis setup, use the `solr.HMMChineseTokenizerFactory` along with your custom filter setup. See an example of this in the <> section. +Or to configure your own analysis setup, use the `solr.HMMChineseTokenizerFactory` along with your custom filter setup. See an example of this in the <> section. -[[LanguageAnalysis-Czech]] === Czech Solr includes a light stemmer for Czech, following https://dl.acm.org/citation.cfm?id=1598600[this algorithm], and Lucene includes an example stopword list. @@ -641,12 +619,11 @@ Solr includes a light stemmer for Czech, following https://dl.acm.org/citation.c *Out:* "preziden", "preziden", "preziden" -[[LanguageAnalysis-Danish]] === Danish Solr can stem Danish using the Snowball Porter Stemmer with an argument of `language="Danish"`. -Also relevant are the <>. +Also relevant are the <>. *Factory class:* `solr.SnowballPorterFilterFactory` @@ -671,8 +648,6 @@ Also relevant are the < ---- -[[LanguageAnalysis-Hindi]] === Hindi Solr includes support for stemming Hindi following http://computing.open.ac.uk/Sites/EACLSouthAsia/Papers/p6-Ramanathan.pdf[this algorithm] (PDF), support for common spelling differences through the `solr.HindiNormalizationFilterFactory`, support for encoding differences through the `solr.IndicNormalizationFilterFactory` following http://ldc.upenn.edu/myl/IndianScriptsUnicode.html[this algorithm], and Lucene includes an example stopword list. @@ -914,8 +879,6 @@ Solr includes support for stemming Hindi following http://computing.open.ac.uk/S ---- - -[[LanguageAnalysis-Indonesian]] === Indonesian Solr includes support for stemming Indonesian (Bahasa Indonesia) following http://www.illc.uva.nl/Publications/ResearchReports/MoL-2003-02.text.pdf[this algorithm] (PDF), and Lucene includes an example stopword list. @@ -941,7 +904,6 @@ Solr includes support for stemming Indonesian (Bahasa Indonesia) following http: *Out:* "bagai", "bagai" -[[LanguageAnalysis-Italian]] === Italian Solr includes two stemmers for Italian: one in the `solr.SnowballPorterFilterFactory language="Italian"`, and a lighter stemmer called `solr.ItalianLightStemFilterFactory`. Lucene includes an example stopword list. @@ -969,7 +931,6 @@ Solr includes two stemmers for Italian: one in the `solr.SnowballPorterFilterFac *Out:* "propag", "propag", "propag" -[[LanguageAnalysis-Irish]] === Irish Solr can stem Irish using the Snowball Porter Stemmer with an argument of `language="Irish"`. Solr includes `solr.IrishLowerCaseFilterFactory`, which can handle Irish-specific constructs. Solr also includes a set of contractions for Irish which can be stripped using `solr.ElisionFilterFactory`. @@ -999,22 +960,20 @@ Solr can stem Irish using the Snowball Porter Stemmer with an argument of `langu *Out:* "siopadóir", "síceapaite", "fearr", "athair" -[[LanguageAnalysis-Japanese]] === Japanese Solr includes support for analyzing Japanese, via the Lucene Kuromoji morphological analyzer, which includes several analysis components - more details on each below: -* <> normalizes Japanese horizontal iteration marks (odoriji) to their expanded form. -* <> tokenizes Japanese using morphological analysis, and annotates each term with part-of-speech, base form (a.k.a. lemma), reading and pronunciation. -* <> replaces original terms with their base forms (a.k.a. lemmas). -* <> removes terms that have one of the configured parts-of-speech. -* <> normalizes common katakana spelling variations ending in a long sound character (U+30FC) by removing the long sound character. +* <> normalizes Japanese horizontal iteration marks (odoriji) to their expanded form. +* <> tokenizes Japanese using morphological analysis, and annotates each term with part-of-speech, base form (a.k.a. lemma), reading and pronunciation. +* <> replaces original terms with their base forms (a.k.a. lemmas). +* <> removes terms that have one of the configured parts-of-speech. +* <> normalizes common katakana spelling variations ending in a long sound character (U+30FC) by removing the long sound character. Also useful for Japanese analysis, from lucene-analyzers-common: -* <> folds fullwidth ASCII variants into the equivalent Basic Latin forms, and folds halfwidth Katakana variants into their equivalent fullwidth forms. +* <> folds fullwidth ASCII variants into the equivalent Basic Latin forms, and folds halfwidth Katakana variants into their equivalent fullwidth forms. -[[LanguageAnalysis-JapaneseIterationMarkCharFilter]] ==== Japanese Iteration Mark CharFilter Normalizes horizontal Japanese iteration marks (odoriji) to their expanded form. Vertical iteration marks are not supported. @@ -1027,7 +986,6 @@ Normalizes horizontal Japanese iteration marks (odoriji) to their expanded form. `normalizeKana`:: set to `false` to not normalize kana iteration marks (default is `true`) -[[LanguageAnalysis-JapaneseTokenizer]] ==== Japanese Tokenizer Tokenizer for Japanese that uses morphological analysis, and annotates each term with part-of-speech, base form (a.k.a. lemma), reading and pronunciation. @@ -1052,7 +1010,6 @@ For some applications it might be good to use `search` mode for indexing and `no `discardPunctuation`:: set to `false` to keep punctuation, `true` to discard (the default) -[[LanguageAnalysis-JapaneseBaseFormFilter]] ==== Japanese Base Form Filter Replaces original terms' text with the corresponding base form (lemma). (`JapaneseTokenizer` annotates each term with its base form.) @@ -1061,7 +1018,6 @@ Replaces original terms' text with the corresponding base form (lemma). (`Japane (no arguments) -[[LanguageAnalysis-JapanesePartOfSpeechStopFilter]] ==== Japanese Part Of Speech Stop Filter Removes terms with one of the configured parts-of-speech. `JapaneseTokenizer` annotates terms with parts-of-speech. @@ -1074,12 +1030,11 @@ Removes terms with one of the configured parts-of-speech. `JapaneseTokenizer` an `enablePositionIncrements`:: if `luceneMatchVersion` is `4.3` or earlier and `enablePositionIncrements="false"`, no position holes will be left by this filter when it removes tokens. *This argument is invalid if `luceneMatchVersion` is `5.0` or later.* -[[LanguageAnalysis-JapaneseKatakanaStemFilter]] ==== Japanese Katakana Stem Filter Normalizes common katakana spelling variations ending in a long sound character (U+30FC) by removing the long sound character. -<> should be specified prior to this filter to normalize half-width katakana to full-width. +<> should be specified prior to this filter to normalize half-width katakana to full-width. *Factory class:* `JapaneseKatakanaStemFilterFactory` @@ -1087,7 +1042,6 @@ Normalizes common katakana spelling variations ending in a long sound character `minimumLength`:: terms below this length will not be stemmed. Default is 4, value must be 2 or more. -[[LanguageAnalysis-CJKWidthFilter]] ==== CJK Width Filter Folds fullwidth ASCII variants into the equivalent Basic Latin forms, and folds halfwidth Katakana variants into their equivalent fullwidth forms. @@ -1115,14 +1069,13 @@ Example: ---- -[[LanguageAnalysis-Hebrew_Lao_Myanmar_Khmer]] +[[hebrew-lao-myanmar-khmer]] === Hebrew, Lao, Myanmar, Khmer Lucene provides support, in addition to UAX#29 word break rules, for Hebrew's use of the double and single quote characters, and for segmenting Lao, Myanmar, and Khmer into syllables with the `solr.ICUTokenizerFactory` in the `analysis-extras` contrib module. To use this tokenizer, see `solr/contrib/analysis-extras/README.txt for` instructions on which jars you need to add to your `solr_home/lib`. -See <> for more information. +See <> for more information. -[[LanguageAnalysis-Latvian]] === Latvian Solr includes support for stemming Latvian, and Lucene includes an example stopword list. @@ -1150,16 +1103,14 @@ Solr includes support for stemming Latvian, and Lucene includes an example stopw *Out:* "tirg", "tirg" -[[LanguageAnalysis-Norwegian]] === Norwegian Solr includes two classes for stemming Norwegian, `NorwegianLightStemFilterFactory` and `NorwegianMinimalStemFilterFactory`. Lucene includes an example stopword list. Another option is to use the Snowball Porter Stemmer with an argument of language="Norwegian". -Also relevant are the <>. +Also relevant are the <>. -[[LanguageAnalysis-NorwegianLightStemmer]] ==== Norwegian Light Stemmer The `NorwegianLightStemFilterFactory` requires a "two-pass" sort for the -dom and -het endings. This means that in the first pass the word "kristendom" is stemmed to "kristen", and then all the general rules apply so it will be further stemmed to "krist". The effect of this is that "kristen," "kristendom," "kristendommen," and "kristendommens" will all be stemmed to "krist." @@ -1209,7 +1160,6 @@ The second pass is to pick up -dom and -het endings. Consider this example: *Out:* "forelske" -[[LanguageAnalysis-NorwegianMinimalStemmer]] ==== Norwegian Minimal Stemmer The `NorwegianMinimalStemFilterFactory` stems plural forms of Norwegian nouns only. @@ -1244,10 +1194,8 @@ The `NorwegianMinimalStemFilterFactory` stems plural forms of Norwegian nouns on *Out:* "bil" -[[LanguageAnalysis-Persian]] === Persian -[[LanguageAnalysis-PersianFilterFactories]] ==== Persian Filter Factories Solr includes support for normalizing Persian, and Lucene includes an example stopword list. @@ -1267,7 +1215,6 @@ Solr includes support for normalizing Persian, and Lucene includes an example st ---- -[[LanguageAnalysis-Polish]] === Polish Solr provides support for Polish stemming with the `solr.StempelPolishStemFilterFactory`, and `solr.MorphologikFilterFactory` for lemmatization, in the `contrib/analysis-extras` module. The `solr.StempelPolishStemFilterFactory` component includes an algorithmic stemmer with tables for Polish. To use either of these filters, see `solr/contrib/analysis-extras/README.txt` for instructions on which jars you need to add to your `solr_home/lib`. @@ -1308,7 +1255,6 @@ Note the lower case filter is applied _after_ the Morfologik stemmer; this is be The Morfologik dictionary parameter value is a constant specifying which dictionary to choose. The dictionary resource must be named `path/to/_language_.dict` and have an associated `.info` metadata file. See http://morfologik.blogspot.com/[the Morfologik project] for details. If the dictionary attribute is not provided, the Polish dictionary is loaded and used by default. -[[LanguageAnalysis-Portuguese]] === Portuguese Solr includes four stemmers for Portuguese: one in the `solr.SnowballPorterFilterFactory`, an alternative stemmer called `solr.PortugueseStemFilterFactory`, a lighter stemmer called `solr.PortugueseLightStemFilterFactory`, and an even less aggressive stemmer called `solr.PortugueseMinimalStemFilterFactory`. Lucene includes an example stopword list. @@ -1352,8 +1298,6 @@ Solr includes four stemmers for Portuguese: one in the `solr.SnowballPorterFilte *Out:* "pra", "pra" - -[[LanguageAnalysis-Romanian]] === Romanian Solr can stem Romanian using the Snowball Porter Stemmer with an argument of `language="Romanian"`. @@ -1375,11 +1319,8 @@ Solr can stem Romanian using the Snowball Porter Stemmer with an argument of `la ---- - -[[LanguageAnalysis-Russian]] === Russian -[[LanguageAnalysis-RussianStemFilter]] ==== Russian Stem Filter Solr includes two stemmers for Russian: one in the `solr.SnowballPorterFilterFactory language="Russian"`, and a lighter stemmer called `solr.RussianLightStemFilterFactory`. Lucene includes an example stopword list. @@ -1399,11 +1340,9 @@ Solr includes two stemmers for Russian: one in the `solr.SnowballPorterFilterFac ---- - -[[LanguageAnalysis-Scandinavian]] === Scandinavian -Scandinavian is a language group spanning three languages <>, <> and <> which are very similar. +Scandinavian is a language group spanning three languages <>, <> and <> which are very similar. Swedish å, ä, ö are in fact the same letters as Norwegian and Danish å, æ, ø and thus interchangeable when used between these languages. They are however folded differently when people type them on a keyboard lacking these characters. @@ -1413,7 +1352,6 @@ There are two filters for helping with normalization between Scandinavian langua See also each language section for other relevant filters. -[[LanguageAnalysis-ScandinavianNormalizationFilter]] ==== Scandinavian Normalization Filter This filter normalize use of the interchangeable Scandinavian characters æÆäÄöÖøØ and folded variants (aa, ao, ae, oe and oo) by transforming them to åÅæÆøØ. @@ -1441,7 +1379,6 @@ It's a semantically less destructive solution than `ScandinavianFoldingFilter`, *Out:* "blåbærsyltetøj", "blåbærsyltetøj", "blåbærsyltetøj", "blabarsyltetoj" -[[LanguageAnalysis-ScandinavianFoldingFilter]] ==== Scandinavian Folding Filter This filter folds Scandinavian characters åÅäæÄÆ\->a and öÖøØ\->o. It also discriminate against use of double vowels aa, ae, ao, oe and oo, leaving just the first one. @@ -1469,10 +1406,8 @@ It's a semantically more destructive solution than `ScandinavianNormalizationFil *Out:* "blabarsyltetoj", "blabarsyltetoj", "blabarsyltetoj", "blabarsyltetoj" -[[LanguageAnalysis-Serbian]] === Serbian -[[LanguageAnalysis-SerbianNormalizationFilter]] ==== Serbian Normalization Filter Solr includes a filter that normalizes Serbian Cyrillic and Latin characters. Note that this filter only works with lowercased input. @@ -1499,7 +1434,6 @@ See the Solr wiki for tips & advice on using this filter: https://wiki.apache.or ---- -[[LanguageAnalysis-Spanish]] === Spanish Solr includes two stemmers for Spanish: one in the `solr.SnowballPorterFilterFactory language="Spanish"`, and a lighter stemmer called `solr.SpanishLightStemFilterFactory`. Lucene includes an example stopword list. @@ -1526,15 +1460,13 @@ Solr includes two stemmers for Spanish: one in the `solr.SnowballPorterFilterFac *Out:* "tor", "tor", "tor" -[[LanguageAnalysis-Swedish]] === Swedish -[[LanguageAnalysis-SwedishStemFilter]] ==== Swedish Stem Filter Solr includes two stemmers for Swedish: one in the `solr.SnowballPorterFilterFactory language="Swedish"`, and a lighter stemmer called `solr.SwedishLightStemFilterFactory`. Lucene includes an example stopword list. -Also relevant are the <>. +Also relevant are the <>. *Factory class:* `solr.SwedishStemFilterFactory` @@ -1557,8 +1489,6 @@ Also relevant are the < ---- -[[LanguageAnalysis-Turkish]] === Turkish Solr includes support for stemming Turkish with the `solr.SnowballPorterFilterFactory`; support for case-insensitive search with the `solr.TurkishLowerCaseFilterFactory`; support for stripping apostrophes and following suffixes with `solr.ApostropheFilterFactory` (see http://www.ipcsit.com/vol57/015-ICNI2012-M021.pdf[Role of Apostrophes in Turkish Information Retrieval]); support for a form of stemming that truncating tokens at a configurable maximum length through the `solr.TruncateTokenFilterFactory` (see http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf[Information Retrieval on Turkish Texts]); and Lucene includes an example stopword list. @@ -1613,10 +1542,6 @@ Solr includes support for stemming Turkish with the `solr.SnowballPorterFilterFa ---- -[[LanguageAnalysis-BacktoTop#main]] -=== - -[[LanguageAnalysis-Ukrainian]] === Ukrainian Solr provides support for Ukrainian lemmatization with the `solr.MorphologikFilterFactory`, in the `contrib/analysis-extras` module. To use this filter, see `solr/contrib/analysis-extras/README.txt` for instructions on which jars you need to add to your `solr_home/lib`. diff --git a/solr/solr-ref-guide/src/learning-to-rank.adoc b/solr/solr-ref-guide/src/learning-to-rank.adoc index 64a461bd6e4..d2687c124e7 100644 --- a/solr/solr-ref-guide/src/learning-to-rank.adoc +++ b/solr/solr-ref-guide/src/learning-to-rank.adoc @@ -22,21 +22,17 @@ With the *Learning To Rank* (or *LTR* for short) contrib module you can configur The module also supports feature extraction inside Solr. The only thing you need to do outside Solr is train your own ranking model. -[[LearningToRank-Concepts]] -== Concepts +== Learning to Rank Concepts -[[LearningToRank-Re-Ranking]] === Re-Ranking -Re-Ranking allows you to run a simple query for matching documents and then re-rank the top N documents using the scores from a different, complex query. This page describes the use of *LTR* complex queries, information on other rank queries included in the Solr distribution can be found on the <> page. +Re-Ranking allows you to run a simple query for matching documents and then re-rank the top N documents using the scores from a different, more complex query. This page describes the use of *LTR* complex queries, information on other rank queries included in the Solr distribution can be found on the <> page. -[[LearningToRank-LearningToRank]] -=== Learning To Rank +=== Learning To Rank Models In information retrieval systems, https://en.wikipedia.org/wiki/Learning_to_rank[Learning to Rank] is used to re-rank the top N retrieved documents using trained machine learning models. The hope is that such sophisticated models can make more nuanced ranking decisions than standard ranking functions like https://en.wikipedia.org/wiki/Tf%E2%80%93idf[TF-IDF] or https://en.wikipedia.org/wiki/Okapi_BM25[BM25]. -[[LearningToRank-Model]] -==== Model +==== Ranking Model A ranking model computes the scores used to rerank documents. Irrespective of any particular algorithm or implementation, a ranking model's computation can use three types of inputs: @@ -44,27 +40,23 @@ A ranking model computes the scores used to rerank documents. Irrespective of an * features that represent the document being scored * features that represent the query for which the document is being scored -[[LearningToRank-Feature]] ==== Feature A feature is a value, a number, that represents some quantity or quality of the document being scored or of the query for which documents are being scored. For example documents often have a 'recency' quality and 'number of past purchases' might be a quantity that is passed to Solr as part of the search query. -[[LearningToRank-Normalizer]] ==== Normalizer Some ranking models expect features on a particular scale. A normalizer can be used to translate arbitrary feature values into normalized values e.g. on a 0..1 or 0..100 scale. -[[LearningToRank-Training]] -=== Training +=== Training Models -[[LearningToRank-Featureengineering]] -==== Feature engineering +==== Feature Engineering The LTR contrib module includes several feature classes as well as support for custom features. Each feature class's javadocs contain an example to illustrate use of that class. The process of https://en.wikipedia.org/wiki/Feature_engineering[feature engineering] itself is then entirely up to your domain expertise and creativity. [cols=",,,",options="header",] |=== -|Feature |Class |Example parameters |<> +|Feature |Class |Example parameters |<> |field length |{solr-javadocs}/solr-ltr/org/apache/solr/ltr/feature/FieldLengthFeature.html[FieldLengthFeature] |`{"field":"title"}` |not (yet) supported |field value |{solr-javadocs}/solr-ltr/org/apache/solr/ltr/feature/FieldValueFeature.html[FieldValueFeature] |`{"field":"hits"}` |not (yet) supported |original score |{solr-javadocs}/solr-ltr/org/apache/solr/ltr/feature/OriginalScoreFeature.html[OriginalScoreFeature] |`{}` |not applicable @@ -84,12 +76,10 @@ The LTR contrib module includes several feature classes as well as support for c |(custom) |(custom class extending {solr-javadocs}/solr-ltr/org/apache/solr/ltr/norm/Normalizer.html[Normalizer]) | |=== -[[LearningToRank-Featureextraction]] ==== Feature Extraction The ltr contrib module includes a <> transformer] to support the calculation and return of feature values for https://en.wikipedia.org/wiki/Feature_extraction[feature extraction] purposes including and especially when you do not yet have an actual reranking model. -[[LearningToRank-Featureselectionandmodeltraining]] ==== Feature Selection and Model Training Feature selection and model training take place offline and outside Solr. The ltr contrib module supports two generalized forms of models as well as custom models. Each model class's javadocs contain an example to illustrate configuration of that class. In the form of JSON files your trained model or models (e.g. different models for different customer geographies) can then be directly uploaded into Solr using provided REST APIs. @@ -102,8 +92,7 @@ Feature selection and model training take place offline and outside Solr. The lt |(custom) |(custom class extending {solr-javadocs}/solr-ltr/org/apache/solr/ltr/model/LTRScoringModel.html[LTRScoringModel]) |(not applicable) |=== -[[LearningToRank-QuickStartExample]] -== Quick Start Example +== Quick Start with LTR The `"techproducts"` example included with Solr is pre-configured with the plugins required for learning-to-rank, but they are disabled by default. @@ -114,7 +103,6 @@ To enable the plugins, please specify the `solr.ltr.enabled` JVM System Property bin/solr start -e techproducts -Dsolr.ltr.enabled=true ---- -[[LearningToRank-Uploadingfeatures]] === Uploading Features To upload features in a `/path/myFeatures.json` file, please run: @@ -154,7 +142,6 @@ To view the features you just uploaded please open the following URL in a browse ] ---- -[[LearningToRank-Extractingfeatures]] === Extracting Features To extract features as part of a query, add `[features]` to the `fl` parameter, for example: @@ -184,7 +171,6 @@ The output XML will include feature values as a comma-separated list, resembling }} ---- -[[LearningToRank-Uploadingamodel]] === Uploading a Model To upload the model in a `/path/myModel.json` file, please run: @@ -219,7 +205,6 @@ To view the model you just uploaded please open the following URL in a browser: } ---- -[[LearningToRank-Runningarerankquery]] === Running a Rerank Query To rerank the results of a query, add the `rq` parameter to your search, for example: @@ -258,12 +243,10 @@ The output XML will include feature values as a comma-separated list, resembling }} ---- -[[LearningToRank-ExternalFeatureInformation]] === External Feature Information The {solr-javadocs}/solr-ltr/org/apache/solr/ltr/feature/ValueFeature.html[ValueFeature] and {solr-javadocs}/solr-ltr/org/apache/solr/ltr/feature/SolrFeature.html[SolrFeature] classes support the use of external feature information, `efi` for short. -[[LearningToRank-Uploadingfeatures.1]] ==== Uploading Features To upload features in a `/path/myEfiFeatures.json` file, please run: @@ -308,9 +291,8 @@ To view the features you just uploaded please open the following URL in a browse ] ---- -As an aside, you may have noticed that the `myEfiFeatures.json` example uses `"store":"myEfiFeatureStore"` attributes: read more about feature `store` in the <> section of this page. +As an aside, you may have noticed that the `myEfiFeatures.json` example uses `"store":"myEfiFeatureStore"` attributes: read more about feature `store` in the <> section of this page. -[[LearningToRank-Extractingfeatures.1]] ==== Extracting Features To extract `myEfiFeatureStore` features as part of a query, add `efi.*` parameters to the `[features]` part of the `fl` parameter, for example: @@ -321,7 +303,6 @@ http://localhost:8983/solr/techproducts/query?q=test&fl=id,cat,manu,score,[featu [source,text] http://localhost:8983/solr/techproducts/query?q=test&fl=id,cat,manu,score,[features store=myEfiFeatureStore efi.text=test efi.preferredManufacturer=Apache efi.fromMobile=0 efi.answer=13] -[[LearningToRank-Uploadingamodel.1]] ==== Uploading a Model To upload the model in a `/path/myEfiModel.json` file, please run: @@ -359,7 +340,6 @@ To view the model you just uploaded please open the following URL in a browser: } ---- -[[LearningToRank-Runningarerankquery.1]] ==== Running a Rerank Query To obtain the feature values computed during reranking, add `[features]` to the `fl` parameter and `efi.*` parameters to the `rq` parameter, for example: @@ -368,39 +348,34 @@ To obtain the feature values computed during reranking, add `[features]` to the http://localhost:8983/solr/techproducts/query?q=test&rq=\{!ltr model=myEfiModel efi.text=test efi.preferredManufacturer=Apache efi.fromMobile=1}&fl=id,cat,manu,score,[features]] link:[] [source,text] -http://localhost:8983/solr/techproducts/query?q=test&rq=\{!ltr model=myEfiModel efi.text=test efi.preferredManufacturer=Apache efi.fromMobile=0 efi.answer=13}&fl=id,cat,manu,score,[features]] +http://localhost:8983/solr/techproducts/query?q=test&rq={!ltr model=myEfiModel efi.text=test efi.preferredManufacturer=Apache efi.fromMobile=0 efi.answer=13}&fl=id,cat,manu,score,[features] Notice the absence of `efi.*` parameters in the `[features]` part of the `fl` parameter. -[[LearningToRank-Extractingfeatureswhilstreranking]] ==== Extracting Features While Reranking To extract features for `myEfiFeatureStore` features while still reranking with `myModel`: [source,text] -http://localhost:8983/solr/techproducts/query?q=test&rq=\{!ltr model=myModel}&fl=id,cat,manu,score,[features store=myEfiFeatureStore efi.text=test efi.preferredManufacturer=Apache efi.fromMobile=1]] link:[] +http://localhost:8983/solr/techproducts/query?q=test&rq={!ltr model=myModel}&fl=id,cat,manu,score,[features store=myEfiFeatureStore efi.text=test efi.preferredManufacturer=Apache efi.fromMobile=1] -Notice the absence of `efi.*` parameters in the `rq` parameter (because `myModel` does not use `efi` feature) and the presence of `efi.*` parameters in the `[features]` part of the `fl` parameter (because `myEfiFeatureStore` contains `efi` features). +Notice the absence of `efi.\*` parameters in the `rq` parameter (because `myModel` does not use `efi` feature) and the presence of `efi.*` parameters in the `[features]` part of the `fl` parameter (because `myEfiFeatureStore` contains `efi` features). -Read more about model evolution in the <> section of this page. +Read more about model evolution in the <> section of this page. -[[LearningToRank-Trainingexample]] === Training Example Example training data and a demo 'train and upload model' script can be found in the `solr/contrib/ltr/example` folder in the https://git-wip-us.apache.org/repos/asf?p=lucene-solr.git[Apache lucene-solr git repository] which is mirrored on https://github.com/apache/lucene-solr/tree/releases/lucene-solr/6.4.0/solr/contrib/ltr/example[github.com] (the `solr/contrib/ltr/example` folder is not shipped in the solr binary release). -[[LearningToRank-Installation]] -== Installation +== Installation of LTR The ltr contrib module requires the `dist/solr-ltr-*.jar` JARs. -[[LearningToRank-Configuration]] -== Configuration +== LTR Configuration Learning-To-Rank is a contrib module and therefore its plugins must be configured in `solrconfig.xml`. -[[LearningToRank-Minimumrequirements]] -=== Minimum requirements +=== Minimum Requirements * Include the required contrib JARs. Note that by default paths are relative to the Solr core so they may need adjustments to your configuration, or an explicit specification of the `$solr.install.dir`. + @@ -437,15 +412,12 @@ Learning-To-Rank is a contrib module and therefore its plugins must be configure ---- -[[LearningToRank-Advancedoptions]] === Advanced Options -[[LearningToRank-LTRThreadModule]] ==== LTRThreadModule A thread module can be configured for the query parser and/or the transformer to parallelize the creation of feature weights. For details, please refer to the {solr-javadocs}/solr-ltr/org/apache/solr/ltr/LTRThreadModule.html[LTRThreadModule] javadocs. -[[LearningToRank-Featurevectorcustomization]] ==== Feature Vector Customization The features transformer returns dense CSV values such as `featureA=0.1,featureB=0.2,featureC=0.3,featureD=0.0`. @@ -462,7 +434,6 @@ For sparse CSV output such as `featureA:0.1 featureB:0.2 featureC:0.3` you can c ---- -[[LearningToRank-Implementationandcontributions]] ==== Implementation and Contributions .How does Solr Learning-To-Rank work under the hood? @@ -481,10 +452,8 @@ Contributions for further models, features and normalizers are welcome. Related * http://wiki.apache.org/lucene-java/HowToContribute ==== -[[LearningToRank-Lifecycle]] -== Lifecycle +== LTR Lifecycle -[[LearningToRank-Featurestores]] === Feature Stores It is recommended that you organise all your features into stores which are akin to namespaces: @@ -501,7 +470,6 @@ To inspect the content of the `commonFeatureStore` feature store: `\http://localhost:8983/solr/techproducts/schema/feature-store/commonFeatureStore` -[[LearningToRank-Models]] === Models * A model uses features from exactly one feature store. @@ -537,13 +505,11 @@ To delete the `currentFeatureStore` feature store: curl -XDELETE 'http://localhost:8983/solr/techproducts/schema/feature-store/currentFeatureStore' ---- -[[LearningToRank-Applyingchanges]] === Applying Changes The feature store and the model store are both <>. Changes made to managed resources are not applied to the active Solr components until the Solr collection (or Solr core in single server mode) is reloaded. -[[LearningToRank-Examples]] -=== Examples +=== LTR Examples ==== One Feature Store, Multiple Ranking Models @@ -628,7 +594,6 @@ The feature store and the model store are both <>. Using an active-passive model, a SolrCloud cluster can be replicated to another data center, and monitored with a new API. -=== Graph Query Parser +=== Graph QueryParser -A new <> makes it possible to to graph traversal queries of Directed (Cyclic) Graphs modelled using Solr documents. +A new <> makes it possible to to graph traversal queries of Directed (Cyclic) Graphs modelled using Solr documents. [[major-5-6-docvalues]] === DocValues diff --git a/solr/solr-ref-guide/src/making-and-restoring-backups.adoc b/solr/solr-ref-guide/src/making-and-restoring-backups.adoc index 6f3383c1b45..38da729051d 100644 --- a/solr/solr-ref-guide/src/making-and-restoring-backups.adoc +++ b/solr/solr-ref-guide/src/making-and-restoring-backups.adoc @@ -28,12 +28,12 @@ Support for backups when running SolrCloud is provided with the <>. -* `action=RESTORE`: This command restores Solr indexes and configurations. More information is available in the section <>. +* `action=BACKUP`: This command backs up Solr indexes and configurations. More information is available in the section <>. +* `action=RESTORE`: This command restores Solr indexes and configurations. More information is available in the section <>. == Standalone Mode Backups -Backups and restoration uses Solr's replication handler. Out of the box, Solr includes implicit support for replication so this API can be used. Configuration of the replication handler can, however, be customized by defining your own replication handler in `solrconfig.xml` . For details on configuring the replication handler, see the section <>. +Backups and restoration uses Solr's replication handler. Out of the box, Solr includes implicit support for replication so this API can be used. Configuration of the replication handler can, however, be customized by defining your own replication handler in `solrconfig.xml` . For details on configuring the replication handler, see the section <>. === Backup API @@ -58,7 +58,7 @@ The path where the backup will be created. If the path is not absolute then the |name |The snapshot will be created in a directory called `snapshot.`. If a name is not specified then the directory name would have the following format: `snapshot.`. `numberToKeep`:: -The number of backups to keep. If `maxNumberOfBackups` has been specified on the replication handler in `solrconfig.xml`, `maxNumberOfBackups` is always used and attempts to use `numberToKeep` will cause an error. Also, this parameter is not taken into consideration if the backup name is specified. More information about `maxNumberOfBackups` can be found in the section <>. +The number of backups to keep. If `maxNumberOfBackups` has been specified on the replication handler in `solrconfig.xml`, `maxNumberOfBackups` is always used and attempts to use `numberToKeep` will cause an error. Also, this parameter is not taken into consideration if the backup name is specified. More information about `maxNumberOfBackups` can be found in the section <>. `repository`:: The name of the repository to be used for the backup. If no repository is specified then the local filesystem repository will be used automatically. diff --git a/solr/solr-ref-guide/src/managed-resources.adoc b/solr/solr-ref-guide/src/managed-resources.adoc index 72b879a2548..deb10cc112e 100644 --- a/solr/solr-ref-guide/src/managed-resources.adoc +++ b/solr/solr-ref-guide/src/managed-resources.adoc @@ -33,15 +33,13 @@ All of the examples in this section assume you are running the "techproducts" So bin/solr -e techproducts ---- -[[ManagedResources-Overview]] -== Overview +== Managed Resources Overview Let's begin learning about managed resources by looking at a couple of examples provided by Solr for managing stop words and synonyms using a REST API. After reading this section, you'll be ready to dig into the details of how managed resources are implemented in Solr so you can start building your own implementation. -[[ManagedResources-Stopwords]] -=== Stop Words +=== Managing Stop Words -To begin, you need to define a field type that uses the <>, such as: +To begin, you need to define a field type that uses the <>, such as: [source,xml,subs="verbatim,callouts"] ---- @@ -56,7 +54,7 @@ To begin, you need to define a field type that uses the < The filter implementation class is `solr.ManagedStopFilterFactory`. This is a special implementation of the <> that uses a set of stop words that are managed from a REST API. +<1> The filter implementation class is `solr.ManagedStopFilterFactory`. This is a special implementation of the <> that uses a set of stop words that are managed from a REST API. <2> The `managed=”english”` attribute gives a name to the set of managed stop words, in this case indicating the stop words are for English text. @@ -134,8 +132,7 @@ curl -X DELETE "http://localhost:8983/solr/techproducts/schema/analysis/stopword NOTE: PUT/POST is used to add terms to an existing list instead of replacing the list entirely. This is because it is more common to add a term to an existing list than it is to replace a list altogether, so the API favors the more common approach of incrementally adding terms especially since deleting individual terms is also supported. -[[ManagedResources-Synonyms]] -=== Synonyms +=== Managing Synonyms For the most part, the API for managing synonyms behaves similar to the API for stop words, except instead of working with a list of words, it uses a map, where the value for each entry in the map is a set of synonyms for a term. As with stop words, the `sample_techproducts_configs` <> includes a pre-built set of synonym mappings suitable for the sample data that is activated by the following field type definition in schema.xml: @@ -209,8 +206,7 @@ Note that the expansion is performed when processing the PUT request so the unde Lastly, you can delete a mapping by sending a DELETE request to the managed endpoint. -[[ManagedResources-ApplyingChanges]] -== Applying Changes +== Applying Managed Resource Changes Changes made to managed resources via this REST API are not applied to the active Solr components until the Solr collection (or Solr core in single server mode) is reloaded. @@ -227,7 +223,6 @@ However, the intent of this API implementation is that changes will be applied u Changing things like stop words and synonym mappings typically require re-indexing existing documents if being used by index-time analyzers. The RestManager framework does not guard you from this, it simply makes it possible to programmatically build up a set of stop words, synonyms etc. ==== -[[ManagedResources-RestManagerEndpoint]] == RestManager Endpoint Metadata about registered ManagedResources is available using the `/schema/managed` endpoint for each collection. diff --git a/solr/solr-ref-guide/src/mbean-request-handler.adoc b/solr/solr-ref-guide/src/mbean-request-handler.adoc index eebd082e9b3..8a3b9182816 100644 --- a/solr/solr-ref-guide/src/mbean-request-handler.adoc +++ b/solr/solr-ref-guide/src/mbean-request-handler.adoc @@ -34,8 +34,7 @@ Specifies whether statistics are returned with results. You can override the `st `wt`:: The output format. This operates the same as the <>. The default is `xml`. -[[MBeanRequestHandler-Examples]] -== Examples +== MBeanRequestHandler Examples The following examples assume you are running Solr's `techproducts` example configuration: diff --git a/solr/solr-ref-guide/src/merging-indexes.adoc b/solr/solr-ref-guide/src/merging-indexes.adoc index 49afe4e3115..cf1cd372850 100644 --- a/solr/solr-ref-guide/src/merging-indexes.adoc +++ b/solr/solr-ref-guide/src/merging-indexes.adoc @@ -27,7 +27,6 @@ To merge indexes, they must meet these requirements: Optimally, the two indexes should be built using the same schema. -[[MergingIndexes-UsingIndexMergeTool]] == Using IndexMergeTool To merge the indexes, do the following: @@ -43,9 +42,8 @@ java -cp $SOLR/server/solr-webapp/webapp/WEB-INF/lib/lucene-core-VERSION.jar:$SO This will create a new index at `/path/to/newindex` that contains both index1 and index2. . Copy this new directory to the location of your application's solr index (move the old one aside first, of course) and start Solr. -[[MergingIndexes-UsingCoreAdmin]] == Using CoreAdmin -The `MERGEINDEXES` command of the <> can be used to merge indexes into a new core – either from one or more arbitrary `indexDir` directories or by merging from one or more existing `srcCore` core names. +The `MERGEINDEXES` command of the <> can be used to merge indexes into a new core – either from one or more arbitrary `indexDir` directories or by merging from one or more existing `srcCore` core names. -See the <> section for details. +See the <> section for details. diff --git a/solr/solr-ref-guide/src/morelikethis.adoc b/solr/solr-ref-guide/src/morelikethis.adoc index e0756cbbc55..a5bdb4f5f31 100644 --- a/solr/solr-ref-guide/src/morelikethis.adoc +++ b/solr/solr-ref-guide/src/morelikethis.adoc @@ -28,7 +28,6 @@ The second is to use it as a search component. This is less desirable since it p The final approach is to use it as a request handler but with externally supplied text. This case, also referred to as the MoreLikeThisHandler, will supply information about similar documents in the index based on the text of the input document. -[[MoreLikeThis-HowMoreLikeThisWorks]] == How MoreLikeThis Works `MoreLikeThis` constructs a Lucene query based on terms in a document. It does this by pulling terms from the defined list of fields ( see the `mlt.fl` parameter, below). For best results, the fields should have stored term vectors in `schema.xml`. For example: @@ -42,7 +41,6 @@ If term vectors are not stored, `MoreLikeThis` will generate terms from stored f The next phase filters terms from the original document using thresholds defined with the MoreLikeThis parameters. Finally, a query is run with these terms, and any other query parameters that have been defined (see the `mlt.qf` parameter, below) and a new document set is returned. -[[MoreLikeThis-CommonParametersforMoreLikeThis]] == Common Parameters for MoreLikeThis The table below summarizes the `MoreLikeThis` parameters supported by Lucene/Solr. These parameters can be used with any of the three possible MoreLikeThis approaches. @@ -77,8 +75,6 @@ Specifies if the query will be boosted by the interesting term relevance. It can `mlt.qf`:: Query fields and their boosts using the same format as that used by the <>. These fields must also be specified in `mlt.fl`. - -[[MoreLikeThis-ParametersfortheMoreLikeThisComponent]] == Parameters for the MoreLikeThisComponent Using MoreLikeThis as a search component returns similar documents for each document in the response set. In addition to the common parameters, these additional options are available: @@ -89,8 +85,6 @@ If set to `true`, activates the `MoreLikeThis` component and enables Solr to ret `mlt.count`:: Specifies the number of similar documents to be returned for each result. The default value is 5. - -[[MoreLikeThis-ParametersfortheMoreLikeThisHandler]] == Parameters for the MoreLikeThisHandler The table below summarizes parameters accessible through the `MoreLikeThisHandler`. It supports faceting, paging, and filtering using common query parameters, but does not work well with alternate query parsers. @@ -105,7 +99,6 @@ Specifies an offset into the main query search results to locate the document on Controls how the `MoreLikeThis` component presents the "interesting" terms (the top TF/IDF terms) for the query. Supports three settings. The setting list lists the terms. The setting none lists no terms. The setting details lists the terms along with the boost value used for each term. Unless `mlt.boost=true`, all terms will have `boost=1.0`. -[[MoreLikeThis-MoreLikeThisQueryParser]] -== More Like This Query Parser +== MoreLikeThis Query Parser The `mlt` query parser provides a mechanism to retrieve documents similar to a given document, like the handler. More information on the usage of the mlt query parser can be found in the section <>. diff --git a/solr/solr-ref-guide/src/near-real-time-searching.adoc b/solr/solr-ref-guide/src/near-real-time-searching.adoc index fccf7b3f9f1..6556398e200 100644 --- a/solr/solr-ref-guide/src/near-real-time-searching.adoc +++ b/solr/solr-ref-guide/src/near-real-time-searching.adoc @@ -26,7 +26,6 @@ With NRT, you can modify a `commit` command to be a *soft commit*, which avoids However, pay special attention to cache and autowarm settings as they can have a significant impact on NRT performance. -[[NearRealTimeSearching-CommitsandOptimizing]] == Commits and Optimizing A commit operation makes index changes visible to new search requests. A *hard commit* uses the transaction log to get the id of the latest document changes, and also calls `fsync` on the index files to ensure they have been flushed to stable storage and no data loss will result from a power failure. The current transaction log is closed and a new one is opened. See the "transaction log" discussion below for data loss issues. @@ -45,7 +44,6 @@ The number of milliseconds to wait before pushing documents to the index. It wor Use `maxDocs` and `maxTime` judiciously to fine-tune your commit strategies. -[[NearRealTimeSearching-TransactionLogs]] === Transaction Logs (tlogs) Transaction logs are a "rolling window" of at least the last `N` (default 100) documents indexed. Tlogs are configured in solrconfig.xml, including the value of `N`. The current transaction log is closed and a new one opened each time any variety of hard commit occurs. Soft commits have no effect on the transaction log. @@ -54,7 +52,6 @@ When tlogs are enabled, documents being added to the index are written to the tl When Solr is shut down gracefully (i.e. using the `bin/solr stop` command and the like) Solr will close the tlog file and index segments so no replay will be necessary on startup. -[[NearRealTimeSearching-AutoCommits]] === AutoCommits An autocommit also uses the parameters `maxDocs` and `maxTime`. However it's useful in many strategies to use both a hard `autocommit` and `autosoftcommit` to achieve more flexible commits. @@ -72,7 +69,6 @@ For example: It's better to use `maxTime` rather than `maxDocs` to modify an `autoSoftCommit`, especially when indexing a large number of documents through the commit operation. It's also better to turn off `autoSoftCommit` for bulk indexing. -[[NearRealTimeSearching-OptionalAttributesforcommitandoptimize]] === Optional Attributes for commit and optimize `waitSearcher`:: @@ -99,7 +95,6 @@ Example of `commit` and `optimize` with optional attributes: ---- -[[NearRealTimeSearching-PassingcommitandcommitWithinparametersaspartoftheURL]] === Passing commit and commitWithin Parameters as Part of the URL Update handlers can also get `commit`-related parameters as part of the update URL, if the `stream.body` feature is enabled. This example adds a small test document and causes an explicit commit to happen immediately afterwards: @@ -132,10 +127,9 @@ curl http://localhost:8983/solr/my_collection/update?commitWithin=10000 -H "Content-Type: text/xml" --data-binary 'testdoc' ---- -WARNING: While the `stream.body` feature is great for development and testing, it should normally not be enabled in production systems, as it lets a user with READ permissions post data that may alter the system state. The feature is disabled by default. See <> for details. +WARNING: While the `stream.body` feature is great for development and testing, it should normally not be enabled in production systems, as it lets a user with READ permissions post data that may alter the system state. The feature is disabled by default. See <> for details. -[[NearRealTimeSearching-ChangingdefaultcommitWithinBehavior]] -=== Changing default commitWithin Behavior +=== Changing Default commitWithin Behavior The `commitWithin` settings allow forcing document commits to happen in a defined time period. This is used most frequently with <>, and for that reason the default is to perform a soft commit. This does not, however, replicate new documents to slave servers in a master/slave environment. If that's a requirement for your implementation, you can force a hard commit by adding a parameter, as in this example: diff --git a/solr/solr-ref-guide/src/other-parsers.adoc b/solr/solr-ref-guide/src/other-parsers.adoc index 271c33b29c9..db484193775 100644 --- a/solr/solr-ref-guide/src/other-parsers.adoc +++ b/solr/solr-ref-guide/src/other-parsers.adoc @@ -24,7 +24,6 @@ This section details the other parsers, and gives examples for how they might be Many of these parsers are expressed the same way as <>. -[[OtherParsers-BlockJoinQueryParsers]] == Block Join Query Parsers There are two query parsers that support block joins. These parsers allow indexing and searching for relational content that has been<>. @@ -55,7 +54,6 @@ The example usage of the query parsers below assumes these two documents and eac ---- -[[OtherParsers-BlockJoinChildrenQueryParser]] === Block Join Children Query Parser This parser takes a query that matches some parent documents and returns their children. @@ -80,16 +78,16 @@ Using the example documents above, we can construct a query such as `q={!child o Note that the query for `someParents` should match only parent documents passed by `allParents` or you may get an exception: -.... +[literal] Parent query must not match any docs besides parent filter. Combine them as must (+) and must-not (-) clauses to find a problem doc. -.... + In older version the error is: -.... + +[literal] Parent query yields document which is not matched by parents filter. -.... + You can search for `q=+(someParents) -(allParents)` to find a cause. -[[OtherParsers-BlockJoinParentQueryParser]] === Block Join Parent Query Parser This parser takes a query that matches child documents and returns their parents. @@ -101,13 +99,15 @@ The parameter `allParents` is a filter that matches *only parent documents*; her The parameter `someChildren` is a query that matches some or all of the child documents. Note that the query for `someChildren` should match only child documents or you may get an exception: -.... + +[literal] Child query must not match same docs with parent filter. Combine them as must clauses (+) to find a problem doc. -.... -In older version it's: -.... + +In older version the error is: + +[literal] child query must only match non-parent docs. -.... + You can search for `q=+(parentFilter) +(someChildren)` to find a cause . Again using the example documents above, we can construct a query such as `q={!parent which="content_type:parentDocument"}comments:SolrCloud`. We get this document in response: @@ -133,20 +133,17 @@ A common mistake is to try to filter parents with a `which` filter, as in this b Instead, you should use a sibling mandatory clause as a filter: `q= *+title:join* +{!parent which="*content_type:parentDocument*"}comments:SolrCloud` - ==== -[[OtherParsers-Scoring]] -=== Scoring +=== Scoring with the Block Join Parent Query Parser You can optionally use the `score` local parameter to return scores of the subordinate query. The values to use for this parameter define the type of aggregation, which are `avg` (average), `max` (maximum), `min` (minimum), `total (sum)`. Implicit default is `none` which returns `0.0`. -[[OtherParsers-BoostQueryParser]] == Boost Query Parser `BoostQParser` extends the `QParserPlugin` and creates a boosted query from the input value. The main value is the query to be boosted. Parameter `b` is the function query to use as the boost. The query to be boosted may be of any type. -Examples: +=== Boost Query Parser Examples Creates a query "foo" which is boosted (scores are multiplied) by the function query `log(popularity)`: @@ -162,7 +159,7 @@ Creates a query "foo" which is boosted by the date boosting function referenced {!boost b=recip(ms(NOW,mydatefield),3.16e-11,1,1)}foo ---- -[[OtherParsers-CollapsingQueryParser]] +[[other-collapsing]] == Collapsing Query Parser The `CollapsingQParser` is really a _post filter_ that provides more performant field collapsing than Solr's standard approach when the number of distinct groups in the result set is high. @@ -171,7 +168,6 @@ This parser collapses the result set to a single document per group before it fo Details about using the `CollapsingQParser` can be found in the section <>. -[[OtherParsers-ComplexPhraseQueryParser]] == Complex Phrase Query Parser The `ComplexPhraseQParser` provides support for wildcards, ORs, etc., inside phrase queries using Lucene's {lucene-javadocs}/queryparser/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.html[`ComplexPhraseQueryParser`]. @@ -204,15 +200,13 @@ A mix of ordered and unordered complex phrase queries: +_query_:"{!complexphrase inOrder=true}manu:\"a* c*\"" +_query_:"{!complexphrase inOrder=false df=name}\"bla* pla*\"" ---- -[[OtherParsers-Limitations]] -=== Limitations +=== Complex Phrase Parser Limitations Performance is sensitive to the number of unique terms that are associated with a pattern. For instance, searching for "a*" will form a large OR clause (technically a SpanOr with many terms) for all of the terms in your index for the indicated field that start with the single letter 'a'. It may be prudent to restrict wildcards to at least two or preferably three letters as a prefix. Allowing very short prefixes may result in to many low-quality documents being returned. Notice that it also supports leading wildcards "*a" as well with consequent performance implications. Applying <> in index-time analysis is usually a good idea. -[[OtherParsers-MaxBooleanClauses]] -==== MaxBooleanClauses +==== MaxBooleanClauses with Complex Phrase Parser You may need to increase MaxBooleanClauses in `solrconfig.xml` as a result of the term expansion above: @@ -221,10 +215,9 @@ You may need to increase MaxBooleanClauses in `solrconfig.xml` as a result of th 4096 ---- -This property is described in more detail in the section <>. +This property is described in more detail in the section <>. -[[OtherParsers-Stopwords]] -==== Stopwords +==== Stopwords with Complex Phrase Parser It is recommended not to use stopword elimination with this query parser. @@ -246,12 +239,10 @@ the document is returned. The next query that _does_ use the Complex Phrase Quer does _not_ return that document because SpanNearQuery has no good way to handle stopwords in a way analogous to PhraseQuery. If you must remove stopwords for your use case, use a custom filter factory or perhaps a customized synonyms filter that reduces given stopwords to some impossible token. -[[OtherParsers-Escaping]] -==== Escaping +==== Escaping with Complex Phrase Parser Special care has to be given when escaping: clauses between double quotes (usually whole query) is parsed twice, these parts have to be escaped as twice. eg `"foo\\: bar\\^"`. -[[OtherParsers-FieldQueryParser]] == Field Query Parser The `FieldQParser` extends the `QParserPlugin` and creates a field query from the input value, applying text analysis and constructing a phrase query if appropriate. The parameter `f` is the field to be queried. @@ -265,7 +256,6 @@ Example: This example creates a phrase query with "foo" followed by "bar" (assuming the analyzer for `myfield` is a text field with an analyzer that splits on whitespace and lowercase terms). This is generally equivalent to the Lucene query parser expression `myfield:"Foo Bar"`. -[[OtherParsers-FunctionQueryParser]] == Function Query Parser The `FunctionQParser` extends the `QParserPlugin` and creates a function query from the input value. This is only one way to use function queries in Solr; for another, more integrated, approach, see the section on <>. @@ -277,7 +267,6 @@ Example: {!func}log(foo) ---- -[[OtherParsers-FunctionRangeQueryParser]] == Function Range Query Parser The `FunctionRangeQParser` extends the `QParserPlugin` and creates a range query over a function. This is also referred to as `frange`, as seen in the examples below. @@ -312,15 +301,13 @@ Both of these examples restrict the results by a range of values found in a decl For more information about range queries over functions, see Yonik Seeley's introductory blog post https://lucidworks.com/2009/07/06/ranges-over-functions-in-solr-14/[Ranges over Functions in Solr 1.4]. -[[OtherParsers-GraphQueryParser]] == Graph Query Parser The `graph` query parser does a breadth first, cyclic aware, graph traversal of all documents that are "reachable" from a starting set of root documents identified by a wrapped query. The graph is built according to linkages between documents based on the terms found in `from` and `to` fields that you specify as part of the query. -[[OtherParsers-Parameters]] -=== Parameters +=== Graph Query Parameters `to`:: The field name of matching documents to inspect to identify outgoing edges for graph traversal. Defaults to `edge_ids`. @@ -342,17 +329,15 @@ Boolean that indicates if the results of the query should be filtered so that on `useAutn`:: Boolean that indicates if an Automatons should be compiled for each iteration of the breadth first search, which may be faster for some graphs. Defaults to `false`. -[[OtherParsers-Limitations.1]] -=== Limitations +=== Graph Query Limitations The `graph` parser only works in single node Solr installations, or with <> collections that use exactly 1 shard. -[[OtherParsers-Examples]] -=== Examples +=== Graph Query Examples To understand how the graph parser works, consider the following Directed Cyclic Graph, containing 8 nodes (A to H) and 9 edges (1 to 9): -image::images/other-parsers/graph_qparser_example.png[image,height=200] +image::images/other-parsers/graph_qparser_example.png[image,height=100] One way to model this graph as Solr documents, would be to create one document per node, with mutivalued fields identifying the incoming and outgoing edges for each node: @@ -426,7 +411,6 @@ http://localhost:8983/solr/my_graph/query?fl=id&q={!graph+from=in_edge+to=out_ed } ---- -[[OtherParsers-SimplifiedModels]] === Simplified Models The Document & Field modeling used in the above examples enumerated all of the outgoing and income edges for each node explicitly, to help demonstrate exactly how the "from" and "to" params work, and to give you an idea of what is possible. With multiple sets of fields like these for identifying incoming and outgoing edges, it's possible to model many independent Directed Graphs that contain some or all of the documents in your collection. @@ -469,7 +453,6 @@ http://localhost:8983/solr/alt_graph/query?fl=id&q={!graph+from=id+to=out_edge+m } ---- -[[OtherParsers-JoinQueryParser]] == Join Query Parser `JoinQParser` extends the `QParserPlugin`. It allows normalizing relationships between documents with a join operation. This is different from the concept of a join in a relational database because no information is being truly joined. An appropriate SQL analogy would be an "inner query". @@ -493,8 +476,7 @@ fq = price:[* TO 12] The join operation is done on a term basis, so the "from" and "to" fields must use compatible field types. For example: joining between a `StrField` and a `TrieIntField` will not work, likewise joining between a `StrField` and a `TextField` that uses `LowerCaseFilterFactory` will only work for values that are already lower cased in the string field. -[[OtherParsers-Scoring.1]] -=== Scoring +=== Join Parser Scoring You can optionally use the `score` parameter to return scores of the subordinate query. The values to use for this parameter define the type of aggregation, which are `avg` (average), `max` (maximum), `min` (minimum) `total`, or `none`. @@ -504,7 +486,6 @@ You can optionally use the `score` parameter to return scores of the subordinate Specifying `score` local parameter switches the join algorithm. This might have performance implication on large indices, but it's more important that this algorithm won't work for single value numeric field starting from 7.0. Users are encouraged to change field types to string and rebuild indexes during migration. ==== -[[OtherParsers-JoiningAcrossCollections]] === Joining Across Collections You can also specify a `fromIndex` parameter to join with a field from another core or collection. If running in SolrCloud mode, then the collection specified in the `fromIndex` parameter must have a single shard and a replica on all Solr nodes where the collection you're joining to has a replica. @@ -548,7 +529,6 @@ At query time, the `JoinQParser` will access the local replica of the *movie_dir For more information about join queries, see the Solr Wiki page on http://wiki.apache.org/solr/Join[Joins]. Erick Erickson has also written a blog post about join performance titled https://lucidworks.com/2012/06/20/solr-and-joins/[Solr and Joins]. -[[OtherParsers-LuceneQueryParser]] == Lucene Query Parser The `LuceneQParser` extends the `QParserPlugin` by parsing Solr's variant on the Lucene QueryParser syntax. This is effectively the same query parser that is used in Lucene. It uses the operators `q.op`, the default operator ("OR" or "AND") and `df`, the default field name. @@ -562,7 +542,6 @@ Example: For more information about the syntax for the Lucene Query Parser, see the {lucene-javadocs}/queryparser/org/apache/lucene/queryparser/classic/package-summary.html[Classic QueryParser javadocs]. -[[OtherParsers-LearningToRankQueryParser]] == Learning To Rank Query Parser The `LTRQParserPlugin` is a special purpose parser for reranking the top results of a simple query using a more complex ranking query which is based on a machine learnt model. @@ -576,7 +555,6 @@ Example: Details about using the `LTRQParserPlugin` can be found in the <> section. -[[OtherParsers-MaxScoreQueryParser]] == Max Score Query Parser The `MaxScoreQParser` extends the `LuceneQParser` but returns the Max score from the clauses. It does this by wrapping all `SHOULD` clauses in a `DisjunctionMaxQuery` with tie=1.0. Any `MUST` or `PROHIBITED` clauses are passed through as-is. Non-boolean queries, e.g., NumericRange falls-through to the `LuceneQParser` parser behavior. @@ -588,7 +566,6 @@ Example: {!maxscore tie=0.01}C OR (D AND E) ---- -[[OtherParsers-MoreLikeThisQueryParser]] == More Like This Query Parser `MLTQParser` enables retrieving documents that are similar to a given document. It uses Lucene's existing `MoreLikeThis` logic and also works in SolrCloud mode. The document identifier used here is the unique id value and not the Lucene internal document id. The list of returned documents excludes the queried document. @@ -638,7 +615,6 @@ Adding more constraints to what qualifies as similar using mintf and mindf. {!mlt qf=name mintf=2 mindf=3}1 ---- -[[OtherParsers-NestedQueryParser]] == Nested Query Parser The `NestedParser` extends the `QParserPlugin` and creates a nested query, with the ability for that query to redefine its type via local parameters. This is useful in specifying defaults in configuration and letting clients indirectly reference them. @@ -662,7 +638,6 @@ If the `q1` parameter is price, then the query would be a function query on the For more information about the possibilities of nested queries, see Yonik Seeley's blog post https://lucidworks.com/2009/03/31/nested-queries-in-solr/[Nested Queries in Solr]. -[[OtherParsers-PayloadQueryParsers]] == Payload Query Parsers These query parsers utilize payloads encoded on terms during indexing. @@ -672,7 +647,6 @@ The main query, for both of these parsers, is parsed straightforwardly from the * `PayloadScoreQParser` * `PayloadCheckQParser` -[[OtherParsers-PayloadScoreParser]] === Payload Score Parser `PayloadScoreQParser` incorporates each matching term's numeric (integer or float) payloads into the scores. @@ -695,7 +669,6 @@ If `true`, multiples computed payload factor by the score of the original query. {!payload_score f=my_field_dpf v=some_term func=max} ---- -[[OtherParsers-PayloadCheckParser]] === Payload Check Parser `PayloadCheckQParser` only matches when the matching terms also have the specified payloads. @@ -719,7 +692,6 @@ Each specified payload will be encoded using the encoder determined from the fie {!payload_check f=words_dps payloads="VERB NOUN"}searching stuff ---- -[[OtherParsers-PrefixQueryParser]] == Prefix Query Parser `PrefixQParser` extends the `QParserPlugin` by creating a prefix query from the input value. Currently no analysis or value transformation is done to create this prefix query. @@ -735,7 +707,6 @@ Example: This would be generally equivalent to the Lucene query parser expression `myfield:foo*`. -[[OtherParsers-RawQueryParser]] == Raw Query Parser `RawQParser` extends the `QParserPlugin` by creating a term query from the input value without any text analysis or transformation. This is useful in debugging, or when raw terms are returned from the terms component (this is not the default). @@ -751,18 +722,16 @@ Example: This example constructs the query: `TermQuery(Term("myfield","Foo Bar"))`. -For easy filter construction to drill down in faceting, the <> is recommended. +For easy filter construction to drill down in faceting, the <> is recommended. -For full analysis on all fields, including text fields, you may want to use the <>. +For full analysis on all fields, including text fields, you may want to use the <>. -[[OtherParsers-Re-RankingQueryParser]] == Re-Ranking Query Parser The `ReRankQParserPlugin` is a special purpose parser for Re-Ranking the top results of a simple query using a more complex ranking query. Details about using the `ReRankQParserPlugin` can be found in the <> section. -[[OtherParsers-SimpleQueryParser]] == Simple Query Parser The Simple query parser in Solr is based on Lucene's SimpleQueryParser. This query parser is designed to allow users to enter queries however they want, and it will do its best to interpret the query and return results. @@ -811,14 +780,12 @@ Defines the default field if none is defined in the Schema, or overrides the def Any errors in syntax are ignored and the query parser will interpret queries as best it can. However, this can lead to odd results in some cases. -[[OtherParsers-SpatialQueryParsers]] == Spatial Query Parsers There are two spatial QParsers in Solr: `geofilt` and `bbox`. But there are other ways to query spatially: using the `frange` parser with a distance function, using the standard (lucene) query parser with the range syntax to pick the corners of a rectangle, or with RPT and BBoxField you can use the standard query parser but use a special syntax within quotes that allows you to pick the spatial predicate. All these options are documented further in the section <>. -[[OtherParsers-SurroundQueryParser]] == Surround Query Parser The `SurroundQParser` enables the Surround query syntax, which provides proximity search functionality. There are two positional operators: `w` creates an ordered span query and `n` creates an unordered one. Both operators take a numeric value to indicate distance between two terms. The default is 1, and the maximum is 99. @@ -838,7 +805,6 @@ This query parser will also accept boolean operators (`AND`, `OR`, and `NOT`, in The non-unary operators (everything but `NOT`) support both infix `(a AND b AND c)` and prefix `AND(a, b, c)` notation. -[[OtherParsers-SwitchQueryParser]] == Switch Query Parser `SwitchQParser` is a `QParserPlugin` that acts like a "switch" or "case" statement. @@ -895,7 +861,6 @@ Using the example configuration below, clients can optionally specify the custom ---- -[[OtherParsers-TermQueryParser]] == Term Query Parser `TermQParser` extends the `QParserPlugin` by creating a single term query from the input value equivalent to `readableToIndexed()`. This is useful for generating filter queries from the external human readable terms returned by the faceting or terms components. The only parameter is `f`, for the field. @@ -907,14 +872,13 @@ Example: {!term f=weight}1.5 ---- -For text fields, no analysis is done since raw terms are already returned from the faceting and terms components. To apply analysis to text fields as well, see the <>, above. +For text fields, no analysis is done since raw terms are already returned from the faceting and terms components. To apply analysis to text fields as well, see the <>, above. -If no analysis or transformation is desired for any type of field, see the <>, above. +If no analysis or transformation is desired for any type of field, see the <>, above. -[[OtherParsers-TermsQueryParser]] == Terms Query Parser -`TermsQParser` functions similarly to the <> but takes in multiple values separated by commas and returns documents matching any of the specified values. +`TermsQParser` functions similarly to the <> but takes in multiple values separated by commas and returns documents matching any of the specified values. This can be useful for generating filter queries from the external human readable terms returned by the faceting or terms components, and may be more efficient in some cases than using the <> to generate an boolean query since the default implementation `method` avoids scoring. @@ -929,7 +893,6 @@ Separator to use when parsing the input. If set to " " (a single blank space), w `method`:: The internal query-building implementation: `termsFilter`, `booleanQuery`, `automaton`, or `docValuesTermsFilter`. Defaults to `termsFilter`. - *Examples* [source,text] @@ -942,7 +905,6 @@ The internal query-building implementation: `termsFilter`, `booleanQuery`, `auto {!terms f=categoryId method=booleanQuery separator=" "}8 6 7 5309 ---- -[[OtherParsers-XMLQueryParser]] == XML Query Parser The {solr-javadocs}/solr-core/org/apache/solr/search/XmlQParserPlugin.html[XmlQParserPlugin] extends the {solr-javadocs}/solr-core/org/apache/solr/search/QParserPlugin.html[QParserPlugin] and supports the creation of queries from XML. Example: @@ -1002,7 +964,6 @@ The XmlQParser implementation uses the {solr-javadocs}/solr-core/org/apache/solr | |LegacyNumericRangeQuery(Builder) is deprecated |=== -[[OtherParsers-CustomizingXMLQueryParser]] === Customizing XML Query Parser You can configure your own custom query builders for additional XML elements. The custom builders need to extend the {solr-javadocs}/solr-core/org/apache/solr/search/SolrQueryBuilder.html[SolrQueryBuilder] or the {solr-javadocs}/solr-core/org/apache/solr/search/SolrSpanQueryBuilder.html[SolrSpanQueryBuilder] class. Example solrconfig.xml snippet: diff --git a/solr/solr-ref-guide/src/other-schema-elements.adoc b/solr/solr-ref-guide/src/other-schema-elements.adoc index 029cd641185..cd39401b238 100644 --- a/solr/solr-ref-guide/src/other-schema-elements.adoc +++ b/solr/solr-ref-guide/src/other-schema-elements.adoc @@ -20,7 +20,6 @@ This section describes several other important elements of `schema.xml` not covered in earlier sections. -[[OtherSchemaElements-UniqueKey]] == Unique Key The `uniqueKey` element specifies which field is a unique identifier for documents. Although `uniqueKey` is not required, it is nearly always warranted by your application design. For example, `uniqueKey` should be used if you will ever update a document in the index. @@ -37,7 +36,6 @@ Schema defaults and `copyFields` cannot be used to populate the `uniqueKey` fiel Further, the operation will fail if the `uniqueKey` field is used, but is multivalued (or inherits the multivalue-ness from the `fieldtype`). However, `uniqueKey` will continue to work, as long as the field is properly used. -[[OtherSchemaElements-Similarity]] == Similarity Similarity is a Lucene class used to score a document in searching. diff --git a/solr/solr-ref-guide/src/overview-of-searching-in-solr.adoc b/solr/solr-ref-guide/src/overview-of-searching-in-solr.adoc index 60ae891eee8..4389c5f3bec 100644 --- a/solr/solr-ref-guide/src/overview-of-searching-in-solr.adoc +++ b/solr/solr-ref-guide/src/overview-of-searching-in-solr.adoc @@ -54,7 +54,7 @@ Faceting makes use of fields defined when the search applications were indexed. Solr also supports a feature called <>, which enables users to submit new queries that focus on particular terms returned in an earlier query. MoreLikeThis queries can make use of faceting or clustering to provide additional aid to users. -A Solr component called a <> manages the final presentation of the query response. Solr includes a variety of response writers, including an <> and a <>. +A Solr component called a <> manages the final presentation of the query response. Solr includes a variety of response writers, including an <> and a <>. The diagram below summarizes some key elements of the search process. diff --git a/solr/solr-ref-guide/src/pagination-of-results.adoc b/solr/solr-ref-guide/src/pagination-of-results.adoc index a9c83680205..130a6c70cfd 100644 --- a/solr/solr-ref-guide/src/pagination-of-results.adoc +++ b/solr/solr-ref-guide/src/pagination-of-results.adoc @@ -24,7 +24,7 @@ In most search applications, the "top" matching results (sorted by score, or som In many applications the UI for these sorted results are displayed to the user in "pages" containing a fixed number of matching results, and users don't typically look at results past the first few pages worth of results. == Basic Pagination -In Solr, this basic paginated searching is supported using the `start` and `rows` parameters, and performance of this common behaviour can be tuned by utilizing the <> and adjusting the <> configuration options based on your expected page sizes. +In Solr, this basic paginated searching is supported using the `start` and `rows` parameters, and performance of this common behaviour can be tuned by utilizing the <> and adjusting the <> configuration options based on your expected page sizes. === Basic Pagination Examples @@ -103,7 +103,7 @@ There are a few important constraints to be aware of when using `cursorMark` par * If `id` is your uniqueKey field, then sort params like `id asc` and `name asc, id desc` would both work fine, but `name asc` by itself would not . Sorts including <> based functions that involve calculations relative to `NOW` will cause confusing results, since every document will get a new sort value on every subsequent request. This can easily result in cursors that never end, and constantly return the same documents over and over – even if the documents are never updated. + -In this situation, choose & re-use a fixed value for the <> in all of your cursor requests. +In this situation, choose & re-use a fixed value for the <> in all of your cursor requests. Cursor mark values are computed based on the sort values of each document in the result, which means multiple documents with identical sort values will produce identical Cursor mark values if one of them is the last document on a page of results. In that situation, the subsequent request using that `cursorMark` would not know which of the documents with the identical mark values should be skipped. Requiring that the uniqueKey field be used as a clause in the sort criteria guarantees that a deterministic ordering will be returned, and that every `cursorMark` value will identify a unique point in the sequence of documents. diff --git a/solr/solr-ref-guide/src/performance-statistics-reference.adoc b/solr/solr-ref-guide/src/performance-statistics-reference.adoc index 50bc6011b64..99878504578 100644 --- a/solr/solr-ref-guide/src/performance-statistics-reference.adoc +++ b/solr/solr-ref-guide/src/performance-statistics-reference.adoc @@ -24,7 +24,7 @@ The same statistics are also exposed via the <> in the Filter Descriptions section. +For examples of how to use this encoding in your analyzer, see <> in the Filter Descriptions section. Beider-Morse Phonetic Matching (BMPM) is a "soundalike" tool that lets you search using a new phonetic matching system. BMPM helps you search for personal names (or just surnames) in a Solr/Lucene index, and is far superior to the existing phonetic codecs, such as regular soundex, metaphone, caverphone, etc. @@ -59,7 +57,7 @@ For more information, see here: http://stevemorse.org/phoneticinfo.htm and http: == Daitch-Mokotoff Soundex -To use this encoding in your analyzer, see <> in the Filter Descriptions section. +To use this encoding in your analyzer, see <> in the Filter Descriptions section. The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and American Soundex algorithms, yielding greater accuracy in matching especially Slavic and Yiddish surnames with similar pronunciation but differences in spelling. @@ -76,13 +74,13 @@ For more information, see http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_S == Double Metaphone -To use this encoding in your analyzer, see <> in the Filter Descriptions section. Alternatively, you may specify `encoding="DoubleMetaphone"` with the <>, but note that the Phonetic Filter version will *not* provide the second ("alternate") encoding that is generated by the Double Metaphone Filter for some tokens. +To use this encoding in your analyzer, see <> in the Filter Descriptions section. Alternatively, you may specify `encoding="DoubleMetaphone"` with the <>, but note that the Phonetic Filter version will *not* provide the second ("alternate") encoding that is generated by the Double Metaphone Filter for some tokens. Encodes tokens using the double metaphone algorithm by Lawrence Philips. See the original article at http://www.drdobbs.com/the-double-metaphone-search-algorithm/184401251?pgno=2 == Metaphone -To use this encoding in your analyzer, specify `encoding="Metaphone"` with the <>. +To use this encoding in your analyzer, specify `encoding="Metaphone"` with the <>. Encodes tokens using the Metaphone algorithm by Lawrence Philips, described in "Hanging on the Metaphone" in Computer Language, Dec. 1990. @@ -91,7 +89,7 @@ Another reference for more information is http://www.drdobbs.com/the-double-meta == Soundex -To use this encoding in your analyzer, specify `encoding="Soundex"` with the <>. +To use this encoding in your analyzer, specify `encoding="Soundex"` with the <>. Encodes tokens using the Soundex algorithm, which is used to relate similar names, but can also be used as a general purpose scheme to find words with similar phonemes. @@ -99,7 +97,7 @@ See also http://en.wikipedia.org/wiki/Soundex. == Refined Soundex -To use this encoding in your analyzer, specify `encoding="RefinedSoundex"` with the <>. +To use this encoding in your analyzer, specify `encoding="RefinedSoundex"` with the <>. Encodes tokens using an improved version of the Soundex algorithm. @@ -107,7 +105,7 @@ See http://en.wikipedia.org/wiki/Soundex. == Caverphone -To use this encoding in your analyzer, specify `encoding="Caverphone"` with the <>. +To use this encoding in your analyzer, specify `encoding="Caverphone"` with the <>. Caverphone is an algorithm created by the Caversham Project at the University of Otago. The algorithm is optimised for accents present in the southern part of the city of Dunedin, New Zealand. @@ -115,7 +113,7 @@ See http://en.wikipedia.org/wiki/Caverphone and the Caverphone 2.0 specification == Kölner Phonetik a.k.a. Cologne Phonetic -To use this encoding in your analyzer, specify `encoding="ColognePhonetic"` with the <>. +To use this encoding in your analyzer, specify `encoding="ColognePhonetic"` with the <>. The Kölner Phonetik, an algorithm published by Hans Joachim Postel in 1969, is optimized for the German language. @@ -123,7 +121,7 @@ See http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik == NYSIIS -To use this encoding in your analyzer, specify `encoding="Nysiis"` with the <>. +To use this encoding in your analyzer, specify `encoding="Nysiis"` with the <>. NYSIIS is an encoding used to relate similar names, but can also be used as a general purpose scheme to find words with similar phonemes. diff --git a/solr/solr-ref-guide/src/post-tool.adoc b/solr/solr-ref-guide/src/post-tool.adoc index 80e74d443d1..1cbaa92f8e3 100644 --- a/solr/solr-ref-guide/src/post-tool.adoc +++ b/solr/solr-ref-guide/src/post-tool.adoc @@ -20,7 +20,7 @@ Solr includes a simple command line tool for POSTing various types of content to a Solr server. -The tool is `bin/post`. The bin/post tool is a Unix shell script; for Windows (non-Cygwin) usage, see the <> below. +The tool is `bin/post`. The bin/post tool is a Unix shell script; for Windows (non-Cygwin) usage, see the section <> below. To run it, open a window and enter: @@ -31,7 +31,6 @@ bin/post -c gettingstarted example/films/films.json This will contact the server at `localhost:8983`. Specifying the `collection/core name` is *mandatory*. The `-help` (or simply `-h`) option will output information on its usage (i.e., `bin/post -help)`. - == Using the bin/post Tool Specifying either the `collection/core name` or the full update `url` is *mandatory* when using `bin/post`. @@ -74,8 +73,7 @@ OPTIONS ... ---- -[[bin_post_examples]] -== Examples +== Examples Using bin/post There are several ways to use `bin/post`. This section presents several examples. @@ -118,7 +116,7 @@ Index a tab-separated file into `gettingstarted`: bin/post -c signals -params "separator=%09" -type text/csv data.tsv ---- -The content type (`-type`) parameter is required to treat the file as the proper type, otherwise it will be ignored and a WARNING logged as it does not know what type of content a .tsv file is. The <> supports the `separator` parameter, and is passed through using the `-params` setting. +The content type (`-type`) parameter is required to treat the file as the proper type, otherwise it will be ignored and a WARNING logged as it does not know what type of content a .tsv file is. The <> supports the `separator` parameter, and is passed through using the `-params` setting. === Indexing JSON @@ -161,8 +159,7 @@ Index a pdf as the user solr with password `SolrRocks`: bin/post -u solr:SolrRocks -c gettingstarted a.pdf ---- -[[PostTool-WindowsSupport]] -== Windows Support +== Post Tool Windows Support `bin/post` exists currently only as a Unix shell script, however it delegates its work to a cross-platform capable Java program. The <> can be run directly in supported environments, including Windows. diff --git a/solr/solr-ref-guide/src/query-settings-in-solrconfig.adoc b/solr/solr-ref-guide/src/query-settings-in-solrconfig.adoc index 1a6b315ea0a..09a8f0a4753 100644 --- a/solr/solr-ref-guide/src/query-settings-in-solrconfig.adoc +++ b/solr/solr-ref-guide/src/query-settings-in-solrconfig.adoc @@ -29,7 +29,6 @@ These settings are all configured in child elements of the `` element in ---- -[[QuerySettingsinSolrConfig-Caches]] == Caches Solr caches are associated with a specific instance of an Index Searcher, a specific view of an index that doesn't change during the lifetime of that searcher. As long as that Index Searcher is being used, any items in its cache will be valid and available for reuse. Caching in Solr differs from caching in many other applications in that cached Solr objects do not expire after a time interval; instead, they remain valid for the lifetime of the Index Searcher. @@ -54,7 +53,6 @@ FastLRUCache and LFUCache support `showItems` attribute. This is the number of c Details of each cache are described below. -[[QuerySettingsinSolrConfig-filterCache]] === filterCache This cache is used by `SolrIndexSearcher` for filters (DocSets) for unordered sets of all documents that match a query. The numeric attributes control the number of entries in the cache. @@ -71,7 +69,6 @@ Solr also uses this cache for faceting when the configuration parameter `facet.m autowarmCount="128"/> ---- -[[QuerySettingsinSolrConfig-queryResultCache]] === queryResultCache This cache holds the results of previous searches: ordered lists of document IDs (DocList) based on a query, a sort, and the range of documents requested. @@ -87,7 +84,6 @@ The `queryResultCache` has an additional (optional) setting to limit the maximum maxRamMB="1000"/> ---- -[[QuerySettingsinSolrConfig-documentCache]] === documentCache This cache holds Lucene Document objects (the stored fields for each document). Since Lucene internal document IDs are transient, this cache is not auto-warmed. The size for the `documentCache` should always be greater than `max_results` times the `max_concurrent_queries`, to ensure that Solr does not need to refetch a document during a request. The more fields you store in your documents, the higher the memory usage of this cache will be. @@ -100,7 +96,6 @@ This cache holds Lucene Document objects (the stored fields for each document). autowarmCount="0"/> ---- -[[QuerySettingsinSolrConfig-UserDefinedCaches]] === User Defined Caches You can also define named caches for your own application code to use. You can locate and use your cache object by name by calling the `SolrIndexSearcher` methods `getCache()`, `cacheLookup()` and `cacheInsert()`. @@ -116,10 +111,8 @@ You can also define named caches for your own application code to use. You can l If you want auto-warming of your cache, include a `regenerator` attribute with the fully qualified name of a class that implements `solr.search.CacheRegenerator`. You can also use the `NoOpRegenerator`, which simply repopulates the cache with old items. Define it with the `regenerator` parameter as`: regenerator="solr.NoOpRegenerator"`. -[[QuerySettingsinSolrConfig-QuerySizingandWarming]] == Query Sizing and Warming -[[QuerySettingsinSolrConfig-maxBooleanClauses]] === maxBooleanClauses This sets the maximum number of clauses allowed in a boolean query. This can affect range or prefix queries that expand to a query with a large number of boolean terms. If this limit is exceeded, an exception is thrown. @@ -134,7 +127,6 @@ This sets the maximum number of clauses allowed in a boolean query. This can aff This option modifies a global property that effects all Solr cores. If multiple `solrconfig.xml` files disagree on this property, the value at any point in time will be based on the last Solr core that was initialized. ==== -[[QuerySettingsinSolrConfig-enableLazyFieldLoading]] === enableLazyFieldLoading If this parameter is set to true, then fields that are not directly requested will be loaded lazily as needed. This can boost performance if the most common queries only need a small subset of fields, especially if infrequently accessed fields are large in size. @@ -144,7 +136,6 @@ If this parameter is set to true, then fields that are not directly requested wi true ---- -[[QuerySettingsinSolrConfig-useFilterForSortedQuery]] === useFilterForSortedQuery This parameter configures Solr to use a filter to satisfy a search. If the requested sort does not include "score", the `filterCache` will be checked for a filter matching the query. For most situations, this is only useful if the same search is requested often with different sort options and none of them ever use "score". @@ -154,7 +145,6 @@ This parameter configures Solr to use a filter to satisfy a search. If the reque true ---- -[[QuerySettingsinSolrConfig-queryResultWindowSize]] === queryResultWindowSize Used with the `queryResultCache`, this will cache a superset of the requested number of document IDs. For example, if the a search in response to a particular query requests documents 10 through 19, and `queryWindowSize` is 50, documents 0 through 49 will be cached. @@ -164,7 +154,6 @@ Used with the `queryResultCache`, this will cache a superset of the requested nu 20 ---- -[[QuerySettingsinSolrConfig-queryResultMaxDocsCached]] === queryResultMaxDocsCached This parameter sets the maximum number of documents to cache for any entry in the `queryResultCache`. @@ -174,7 +163,6 @@ This parameter sets the maximum number of documents to cache for any entry in th 200 ---- -[[QuerySettingsinSolrConfig-useColdSearcher]] === useColdSearcher This setting controls whether search requests for which there is not a currently registered searcher should wait for a new searcher to warm up (false) or proceed immediately (true). When set to "false", requests will block until the searcher has warmed its caches. @@ -184,7 +172,6 @@ This setting controls whether search requests for which there is not a currently false ---- -[[QuerySettingsinSolrConfig-maxWarmingSearchers]] === maxWarmingSearchers This parameter sets the maximum number of searchers that may be warming up in the background at any given time. Exceeding this limit will raise an error. For read-only slaves, a value of two is reasonable. Masters should probably be set a little higher. @@ -194,10 +181,9 @@ This parameter sets the maximum number of searchers that may be warming up in th 2 ---- -[[QuerySettingsinSolrConfig-Query-RelatedListeners]] == Query-Related Listeners -As described in the section on <>, new Index Searchers are cached. It's possible to use the triggers for listeners to perform query-related tasks. The most common use of this is to define queries to further "warm" the Index Searchers while they are starting. One benefit of this approach is that field caches are pre-populated for faster sorting. +As described in the section on <>, new Index Searchers are cached. It's possible to use the triggers for listeners to perform query-related tasks. The most common use of this is to define queries to further "warm" the Index Searchers while they are starting. One benefit of this approach is that field caches are pre-populated for faster sorting. Good query selection is key with this type of listener. It's best to choose your most common and/or heaviest queries and include not just the keywords used, but any other parameters such as sorting or filtering requests. diff --git a/solr/solr-ref-guide/src/read-and-write-side-fault-tolerance.adoc b/solr/solr-ref-guide/src/read-and-write-side-fault-tolerance.adoc index 947c76017f2..9f9f0412ec7 100644 --- a/solr/solr-ref-guide/src/read-and-write-side-fault-tolerance.adoc +++ b/solr/solr-ref-guide/src/read-and-write-side-fault-tolerance.adoc @@ -22,14 +22,12 @@ SolrCloud supports elasticity, high availability, and fault tolerance in reads a What this means, basically, is that when you have a large cluster, you can always make requests to the cluster: Reads will return results whenever possible, even if some nodes are down, and Writes will be acknowledged only if they are durable; i.e., you won't lose data. -[[ReadandWriteSideFaultTolerance-ReadSideFaultTolerance]] == Read Side Fault Tolerance In a SolrCloud cluster each individual node load balances read requests across all the replicas in collection. You still need a load balancer on the 'outside' that talks to the cluster, or you need a smart client which understands how to read and interact with Solr's metadata in ZooKeeper and only requests the ZooKeeper ensemble's address to start discovering to which nodes it should send requests. (Solr provides a smart Java SolrJ client called {solr-javadocs}/solr-solrj/org/apache/solr/client/solrj/impl/CloudSolrClient.html[CloudSolrClient].) Even if some nodes in the cluster are offline or unreachable, a Solr node will be able to correctly respond to a search request as long as it can communicate with at least one replica of every shard, or one replica of every _relevant_ shard if the user limited the search via the `shards` or `\_route_` parameters. The more replicas there are of every shard, the more likely that the Solr cluster will be able to handle search results in the event of node failures. -[[ReadandWriteSideFaultTolerance-zkConnected]] === zkConnected A Solr node will return the results of a search request as long as it can communicate with at least one replica of every shard that it knows about, even if it can _not_ communicate with ZooKeeper at the time it receives the request. This is normally the preferred behavior from a fault tolerance standpoint, but may result in stale or incorrect results if there have been major changes to the collection structure that the node has not been informed of via ZooKeeper (i.e., shards may have been added or removed, or split into sub-shards) @@ -56,7 +54,6 @@ A `zkConnected` header is included in every search response indicating if the no } ---- -[[ReadandWriteSideFaultTolerance-shards.tolerant]] === shards.tolerant In the event that one or more shards queried are completely unavailable, then Solr's default behavior is to fail the request. However, there are many use-cases where partial results are acceptable and so Solr provides a boolean `shards.tolerant` parameter (default `false`). @@ -89,12 +86,10 @@ Example response with `partialResults` flag set to 'true': } ---- -[[ReadandWriteSideFaultTolerance-WriteSideFaultTolerance]] == Write Side Fault Tolerance SolrCloud is designed to replicate documents to ensure redundancy for your data, and enable you to send update requests to any node in the cluster. That node will determine if it hosts the leader for the appropriate shard, and if not it will forward the request to the the leader, which will then forward it to all existing replicas, using versioning to make sure every replica has the most up-to-date version. If the leader goes down, another replica can take its place. This architecture enables you to be certain that your data can be recovered in the event of a disaster, even if you are using <>. -[[ReadandWriteSideFaultTolerance-Recovery]] === Recovery A Transaction Log is created for each node so that every change to content or organization is noted. The log is used to determine which content in the node should be included in a replica. When a new replica is created, it refers to the Leader and the Transaction Log to know which content to include. If it fails, it retries. @@ -105,7 +100,6 @@ If a leader goes down, it may have sent requests to some replicas and not others If an update fails because cores are reloading schemas and some have finished but others have not, the leader tells the nodes that the update failed and starts the recovery procedure. -[[ReadandWriteSideFaultTolerance-AchievedReplicationFactor]] === Achieved Replication Factor When using a replication factor greater than one, an update request may succeed on the shard leader but fail on one or more of the replicas. For instance, consider a collection with one shard and a replication factor of three. In this case, you have a shard leader and two additional replicas. If an update request succeeds on the leader but fails on both replicas, for whatever reason, the update request is still considered successful from the perspective of the client. The replicas that missed the update will sync with the leader when they recover. diff --git a/solr/solr-ref-guide/src/request-parameters-api.adoc b/solr/solr-ref-guide/src/request-parameters-api.adoc index 45275d0760e..81646a772d3 100644 --- a/solr/solr-ref-guide/src/request-parameters-api.adoc +++ b/solr/solr-ref-guide/src/request-parameters-api.adoc @@ -33,12 +33,10 @@ When might you want to use this feature? * To mix and match parameter sets at request time. * To avoid a reload of your collection for small parameter changes. -[[RequestParametersAPI-TheRequestParametersEndpoint]] == The Request Parameters Endpoint All requests are sent to the `/config/params` endpoint of the Config API. -[[RequestParametersAPI-SettingRequestParameters]] == Setting Request Parameters The request to set, unset, or update request parameters is sent as a set of Maps with names. These objects can be directly used in a request or a request handler definition. @@ -88,7 +86,6 @@ curl http://localhost:8983/solr/techproducts/config/params -H 'Content-type:appl }' ---- -[[RequestParametersAPI-UsingRequestParameterswithRequestHandlers]] == Using Request Parameters with RequestHandlers After creating the `my_handler_params` paramset in the above section, it is possible to define a request handler as follows: @@ -119,12 +116,10 @@ It will be equivalent to a standard request handler definition such as this one: ---- -[[RequestParametersAPI-ImplicitRequestHandlers]] -=== Implicit RequestHandlers +=== Implicit RequestHandlers with the Request Parameters API Solr ships with many out-of-the-box request handlers that may only be configured via the Request Parameters API, because their configuration is not present in `solrconfig.xml`. See <> for the paramset to use when configuring an implicit request handler. -[[RequestParametersAPI-ViewingExpandedParamsetsandEffectiveParameterswithRequestHandlers]] === Viewing Expanded Paramsets and Effective Parameters with RequestHandlers To see the expanded paramset and the resulting effective parameters for a RequestHandler defined with `useParams`, use the `expandParams` request param. E.g. for the `/export` request handler: @@ -134,7 +129,6 @@ To see the expanded paramset and the resulting effective parameters for a Reques curl http://localhost:8983/solr/techproducts/config/requestHandler?componentName=/export&expandParams=true ---- -[[RequestParametersAPI-ViewingRequestParameters]] == Viewing Request Parameters To see the paramsets that have been created, you can use the `/config/params` endpoint to read the contents of `params.json`, or use the name in the request: @@ -147,7 +141,6 @@ curl http://localhost:8983/solr/techproducts/config/params curl http://localhost:8983/solr/techproducts/config/params/myQueries ---- -[[RequestParametersAPI-TheuseParamsParameter]] == The useParams Parameter When making a request, the `useParams` parameter applies the request parameters sent to the request. This is translated at request time to the actual parameters. @@ -192,12 +185,10 @@ To summarize, parameters are applied in this order: * parameter sets defined in `params.json` that have been defined in the request handler. * parameters defined in `` in `solrconfig.xml`. -[[RequestParametersAPI-PublicAPIs]] == Public APIs The RequestParams Object can be accessed using the method `SolrConfig#getRequestParams()`. Each paramset can be accessed by their name using the method `RequestParams#getRequestParams(String name)`. -[[RequestParametersAPI-Examples]] -== Examples +== Examples Using the Request Parameters API -The Solr "films" example demonstrates the use of the parameters API. See https://github.com/apache/lucene-solr/tree/master/solr/example/films for details. +The Solr "films" example demonstrates the use of the parameters API. You can use this example in your Solr installation (in the `example/films` directory) or view the files in the Apache GitHub mirror at https://github.com/apache/lucene-solr/tree/master/solr/example/films. diff --git a/solr/solr-ref-guide/src/requestdispatcher-in-solrconfig.adoc b/solr/solr-ref-guide/src/requestdispatcher-in-solrconfig.adoc index e20b55ccc4f..6271cb68b9f 100644 --- a/solr/solr-ref-guide/src/requestdispatcher-in-solrconfig.adoc +++ b/solr/solr-ref-guide/src/requestdispatcher-in-solrconfig.adoc @@ -22,7 +22,6 @@ The `requestDispatcher` element of `solrconfig.xml` controls the way the Solr HT Included are parameters for defining if it should handle `/select` urls (for Solr 1.1 compatibility), if it will support remote streaming, the maximum size of file uploads and how it will respond to HTTP cache headers in requests. -[[RequestDispatcherinSolrConfig-handleSelectElement]] == handleSelect Element [IMPORTANT] @@ -41,7 +40,6 @@ In recent versions of Solr, a `/select` requestHandler is defined by default, so ---- -[[RequestDispatcherinSolrConfig-requestParsersElement]] == requestParsers Element The `` sub-element controls values related to parsing requests. This is an empty XML element that doesn't have any content, only attributes. @@ -67,7 +65,7 @@ The attribute `addHttpRequestToContext` can be used to indicate that the origina addHttpRequestToContext="false" /> ---- -The below command is an example of how to enable RemoteStreaming and BodyStreaming through <>: +The below command is an example of how to enable RemoteStreaming and BodyStreaming through <>: [source,bash] ---- @@ -77,7 +75,6 @@ curl http://localhost:8983/solr/gettingstarted/config -H 'Content-type:applicati }' ---- -[[RequestDispatcherinSolrConfig-httpCachingElement]] == httpCaching Element The `` element controls HTTP cache control headers. Do not confuse these settings with Solr's internal cache configuration. This element controls caching of HTTP responses as defined by the W3C HTTP specifications. @@ -102,7 +99,6 @@ This value of this attribute is sent as the value of the `ETag` header. Changing ---- -[[RequestDispatcherinSolrConfig-cacheControlElement]] === cacheControl Element In addition to these attributes, `` accepts one child element: ``. The content of this element will be sent as the value of the Cache-Control header on HTTP responses. This header is used to modify the default caching behavior of the requesting client. The possible values for the Cache-Control header are defined by the HTTP 1.1 specification in http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9[Section 14.9]. diff --git a/solr/solr-ref-guide/src/requesthandlers-and-searchcomponents-in-solrconfig.adoc b/solr/solr-ref-guide/src/requesthandlers-and-searchcomponents-in-solrconfig.adoc index 46d9c9ebece..10fababcf1c 100644 --- a/solr/solr-ref-guide/src/requesthandlers-and-searchcomponents-in-solrconfig.adoc +++ b/solr/solr-ref-guide/src/requesthandlers-and-searchcomponents-in-solrconfig.adoc @@ -26,7 +26,6 @@ A _search component_ is a feature of search, such as highlighting or faceting. T These are often referred to as "requestHandler" and "searchComponent", which is how they are defined in `solrconfig.xml`. -[[RequestHandlersandSearchComponentsinSolrConfig-RequestHandlers]] == Request Handlers Every request handler is defined with a name and a class. The name of the request handler is referenced with the request to Solr, typically as a path. For example, if Solr is installed at ` http://localhost:8983/solr/ `and you have a collection named "```gettingstarted```", you can make a request using URLs like this: @@ -44,7 +43,6 @@ Request handlers can also process requests for nested paths of their names, for It is also possible to configure defaults for request handlers with a section called `initParams`. These defaults can be used when you want to have common properties that will be used by each separate handler. For example, if you intend to create several request handlers that will all request the same list of fields in the response, you can configure an `initParams` section with your list of fields. For more information about `initParams`, see the section <>. -[[RequestHandlersandSearchComponentsinSolrConfig-SearchHandlers]] === SearchHandlers The primary request handler defined with Solr by default is the "SearchHandler", which handles search queries. The request handler is defined, and then a list of defaults for the handler are defined with a `defaults` list. @@ -91,33 +89,28 @@ In this example, the filter query "inStock:true" will always be added to every q + In this example, facet fields have been defined which limits the facets that will be returned by Solr. If the client requests facets, the facets defined with a configuration like this are the only facets they will see. -The final section of a request handler definition is `components`, which defines a list of search components that can be used with a request handler. They are only registered with the request handler. How to define a search component is discussed further on in the section on <>. The `components` element can only be used with a request handler that is a SearchHandler. +The final section of a request handler definition is `components`, which defines a list of search components that can be used with a request handler. They are only registered with the request handler. How to define a search component is discussed further on in the section on <> below. The `components` element can only be used with a request handler that is a SearchHandler. The `solrconfig.xml` file includes many other examples of SearchHandlers that can be used or modified as needed. -[[RequestHandlersandSearchComponentsinSolrConfig-UpdateRequestHandlers]] === UpdateRequestHandlers The UpdateRequestHandlers are request handlers which process updates to the index. In this guide, we've covered these handlers in detail in the section <>. -[[RequestHandlersandSearchComponentsinSolrConfig-ShardHandlers]] === ShardHandlers It is possible to configure a request handler to search across shards of a cluster, used with distributed search. More information about distributed search and how to configure the shardHandler is in the section <>. -[[RequestHandlersandSearchComponentsinSolrConfig-ImplicitRequestHandlers]] === Implicit Request Handlers Solr includes many out-of-the-box request handlers that are not configured in `solrconfig.xml`, and so are referred to as "implicit" - see <>. -[[RequestHandlersandSearchComponentsinSolrConfig-SearchComponents]] == Search Components Search components define the logic that is used by the SearchHandler to perform queries for users. -[[RequestHandlersandSearchComponentsinSolrConfig-DefaultComponents]] === Default Components There are several default search components that work with all SearchHandlers without any additional configuration. If no components are defined (with the exception of `first-components` and `last-components` - see below), these are executed by default, in the following order: @@ -138,7 +131,6 @@ There are several default search components that work with all SearchHandlers wi If you register a new search component with one of these default names, the newly defined component will be used instead of the default. -[[RequestHandlersandSearchComponentsinSolrConfig-First-ComponentsandLast-Components]] === First-Components and Last-Components It's possible to define some components as being used before (with `first-components`) or after (with `last-components`) the default components listed above. @@ -158,7 +150,6 @@ It's possible to define some components as being used before (with `first-compon ---- -[[RequestHandlersandSearchComponentsinSolrConfig-Components]] === Components If you define `components`, the default components (see above) will not be executed, and `first-components` and `last-components` are disallowed: @@ -172,7 +163,6 @@ If you define `components`, the default components (see above) will not be execu ---- -[[RequestHandlersandSearchComponentsinSolrConfig-OtherUsefulComponents]] === Other Useful Components Many of the other useful components are described in sections of this Guide for the features they support. These are: diff --git a/solr/solr-ref-guide/src/response-writers.adoc b/solr/solr-ref-guide/src/response-writers.adoc index 947c8ea91ea..2c6113b47d3 100644 --- a/solr/solr-ref-guide/src/response-writers.adoc +++ b/solr/solr-ref-guide/src/response-writers.adoc @@ -25,23 +25,22 @@ Solr supports a variety of Response Writers to ensure that query responses can b The `wt` parameter selects the Response Writer to be used. The list below describe shows the most common settings for the `wt` parameter, with links to further sections that discuss them in more detail. -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> -[[ResponseWriters-TheStandardXMLResponseWriter]] -== The Standard XML Response Writer +== Standard XML Response Writer The XML Response Writer is the most general purpose and reusable Response Writer currently included with Solr. It is the format used in most discussions and documentation about the response of Solr queries. @@ -49,7 +48,6 @@ Note that the XSLT Response Writer can be used to convert the XML produced by th The behavior of the XML Response Writer can be driven by the following query parameters. -[[ResponseWriters-TheversionParameter]] === The version Parameter The `version` parameter determines the XML protocol used in the response. Clients are strongly encouraged to _always_ specify the protocol version, so as to ensure that the format of the response they receive does not change unexpectedly if the Solr server is upgraded and a new default format is introduced. @@ -58,8 +56,7 @@ The only currently supported version value is `2.2`. The format of the `response The default value is the latest supported. -[[ResponseWriters-ThestylesheetParameter]] -=== The stylesheet Parameter +=== stylesheet Parameter The `stylesheet` parameter can be used to direct Solr to include a `` declaration in the XML response it returns. @@ -70,27 +67,23 @@ The default behavior is not to return any stylesheet declaration at all. Use of the `stylesheet` parameter is discouraged, as there is currently no way to specify external stylesheets, and no stylesheets are provided in the Solr distributions. This is a legacy parameter, which may be developed further in a future release. ==== -[[ResponseWriters-TheindentParameter]] -=== The indent Parameter +=== indent Parameter If the `indent` parameter is used, and has a non-blank value, then Solr will make some attempts at indenting its XML response to make it more readable by humans. The default behavior is not to indent. -[[ResponseWriters-TheXSLTResponseWriter]] -== The XSLT Response Writer +== XSLT Response Writer The XSLT Response Writer applies an XML stylesheet to output. It can be used for tasks such as formatting results for an RSS feed. -[[ResponseWriters-trParameter]] === tr Parameter The XSLT Response Writer accepts one parameter: the `tr` parameter, which identifies the XML transformation to use. The transformation must be found in the Solr `conf/xslt` directory. The Content-Type of the response is set according to the `` statement in the XSLT transform, for example: `` -[[ResponseWriters-Configuration]] -=== Configuration +=== XSLT Configuration The example below, from the `sample_techproducts_configs` <> in the Solr distribution, shows how the XSLT Response Writer is configured. @@ -108,7 +101,6 @@ The example below, from the `sample_techproducts_configs` < ---- -[[ResponseWriters-JSON-SpecificParameters]] === JSON-Specific Parameters -[[ResponseWriters-json.nl]] ==== json.nl This parameter controls the output format of NamedLists, where order is more important than access by name. NamedList is currently used for field faceting data. @@ -196,7 +186,6 @@ NamedList is represented as an array of Name Type Value JSON objects. + With input of `NamedList("a"=1, "bar"="foo", null=3, null=null)`, the output would be `[{"name":"a","type":"int","value":1}, {"name":"bar","type":"str","value":"foo"}, {"name":null,"type":"int","value":3}, {"name":null,"type":"null","value":null}]`. -[[ResponseWriters-json.wrf]] ==== json.wrf `json.wrf=function` adds a wrapper-function around the JSON response, useful in AJAX with dynamic script tags for specifying a JavaScript callback function. @@ -204,17 +193,14 @@ With input of `NamedList("a"=1, "bar"="foo", null=3, null=null)`, the output wou * http://www.xml.com/pub/a/2005/12/21/json-dynamic-script-tag.html * http://www.theurer.cc/blog/2005/12/15/web-services-json-dump-your-proxy/ -[[ResponseWriters-BinaryResponseWriter]] == Binary Response Writer This is a custom binary format used by Solr for inter-node communication as well as client-server communication. SolrJ uses this as the default for indexing as well as querying. See <> for more details. -[[ResponseWriters-GeoJSONResponseWriter]] == GeoJSON Response Writer Returns Solr results in http://geojson.org[GeoJSON] augmented with Solr-specific JSON. To use this, set `wt=geojson` and `geojson.field` to the name of a spatial Solr field. Not all spatial fields types are supported, and you'll get an error if you use an unsupported one. -[[ResponseWriters-PythonResponseWriter]] == Python Response Writer Solr has an optional Python response format that extends its JSON output in the following ways to allow the response to be safely evaluated by the python interpreter: @@ -225,7 +211,7 @@ Solr has an optional Python response format that extends its JSON output in the * newlines are escaped * null changed to None -[[ResponseWriters-PHPResponseWriterandPHPSerializedResponseWriter]] +[[php-writer]] == PHP Response Writer and PHP Serialized Response Writer Solr has a PHP response format that outputs an array (as PHP code) which can be evaluated. Setting the `wt` parameter to `php` invokes the PHP Response Writer. @@ -250,7 +236,6 @@ $result = unserialize($serializedResult); print_r($result); ---- -[[ResponseWriters-RubyResponseWriter]] == Ruby Response Writer Solr has an optional Ruby response format that extends its JSON output in the following ways to allow the response to be safely evaluated by Ruby's interpreter: @@ -274,14 +259,12 @@ puts 'number of matches = ' + rsp['response']['numFound'].to_s rsp['response']['docs'].each { |doc| puts 'name field = ' + doc['name'\] } ---- -[[ResponseWriters-CSVResponseWriter]] == CSV Response Writer The CSV response writer returns a list of documents in comma-separated values (CSV) format. Other information that would normally be included in a response, such as facet information, is excluded. The CSV response writer supports multi-valued fields, as well as<>, and the output of this CSV format is compatible with Solr's https://wiki.apache.org/solr/UpdateCSV[CSV update format]. -[[ResponseWriters-CSVParameters]] === CSV Parameters These parameters specify the CSV format that will be returned. You can accept the default values or specify your own. @@ -297,7 +280,6 @@ These parameters specify the CSV format that will be returned. You can accept th |csv.null |Defaults to a zero length string. Use this parameter when a document has no value for a particular field. |=== -[[ResponseWriters-Multi-ValuedFieldCSVParameters]] === Multi-Valued Field CSV Parameters These parameters specify how multi-valued fields are encoded. Per-field overrides for these values can be done using `f..csv.separator=|`. @@ -310,8 +292,7 @@ These parameters specify how multi-valued fields are encoded. Per-field override |csv.mv.separator |Defaults to the `csv.separator` value. |=== -[[ResponseWriters-Example]] -=== Example +=== CSV Writer Example `\http://localhost:8983/solr/techproducts/select?q=ipod&fl=id,cat,name,popularity,price,score&wt=csv` returns: @@ -323,19 +304,17 @@ F8V7067-APL-KIT,"electronics,connector",Belkin Mobile Power Cord for iPod w/ Doc MA147LL/A,"electronics,music",Apple 60 GB iPod with Video Playback Black,10,399.0,0.2446348 ---- -[[ResponseWriters-VelocityResponseWriter]] +[[velocity-writer]] == Velocity Response Writer The `VelocityResponseWriter` processes the Solr response and request context through Apache Velocity templating. -See <> section for details. +See the <> section for details. -[[ResponseWriters-SmileResponseWriter]] == Smile Response Writer The Smile format is a JSON-compatible binary format, described in detail here: http://wiki.fasterxml.com/SmileFormat. -[[ResponseWriters-XLSXResponseWriter]] == XLSX Response Writer Use this to get the response as a spreadsheet in the .xlsx (Microsoft Excel) format. It accepts parameters in the form `colwidth.` and `colname.` which helps you customize the column widths and column names. diff --git a/solr/solr-ref-guide/src/result-clustering.adoc b/solr/solr-ref-guide/src/result-clustering.adoc index db9a43ce608..c9bdf632b24 100644 --- a/solr/solr-ref-guide/src/result-clustering.adoc +++ b/solr/solr-ref-guide/src/result-clustering.adoc @@ -28,8 +28,7 @@ image::images/result-clustering/carrot2.png[image,width=900] The query issued to the system was _Solr_. It seems clear that faceting could not yield a similar set of groups, although the goals of both techniques are similar—to let the user explore the set of search results and either rephrase the query or narrow the focus to a subset of current documents. Clustering is also similar to <> in that it can help to look deeper into search results, beyond the top few hits. -[[ResultClustering-PreliminaryConcepts]] -== Preliminary Concepts +== Clustering Concepts Each *document* passed to the clustering component is composed of several logical parts: @@ -39,12 +38,11 @@ Each *document* passed to the clustering component is composed of several logica * the main content, * a language code of the title and content. -The identifier part is mandatory, everything else is optional but at least one of the text fields (title or content) will be required to make the clustering process reasonable. It is important to remember that logical document parts must be mapped to a particular schema and its fields. The content (text) for clustering can be sourced from either a stored text field or context-filtered using a highlighter, all these options are explained below in the <> section. +The identifier part is mandatory, everything else is optional but at least one of the text fields (title or content) will be required to make the clustering process reasonable. It is important to remember that logical document parts must be mapped to a particular schema and its fields. The content (text) for clustering can be sourced from either a stored text field or context-filtered using a highlighter, all these options are explained below in the <> section. A *clustering algorithm* is the actual logic (implementation) that discovers relationships among the documents in the search result and forms human-readable cluster labels. Depending on the choice of the algorithm the clusters may (and probably will) vary. Solr comes with several algorithms implemented in the open source http://carrot2.org[Carrot2] project, commercial alternatives also exist. -[[ResultClustering-QuickStartExample]] -== Quick Start Example +== Clustering Quick Start Example The "```techproducts```" example included with Solr is pre-configured with all the necessary components for result clustering -- but they are disabled by default. @@ -137,16 +135,13 @@ There were a few clusters discovered for this query (`\*:*`), separating search Depending on the quality of input documents, some clusters may not make much sense. Some documents may be left out and not be clustered at all; these will be assigned to the synthetic _Other Topics_ group, marked with the `other-topics` property set to `true` (see the XML dump above for an example). The score of the other topics group is zero. -[[ResultClustering-Installation]] -== Installation +== Installing the Clustering Contrib The clustering contrib extension requires `dist/solr-clustering-*.jar` and all JARs under `contrib/clustering/lib`. -[[ResultClustering-Configuration]] -== Configuration +== Clustering Configuration -[[ResultClustering-DeclarationoftheSearchComponentandRequestHandler]] -=== Declaration of the Search Component and Request Handler +=== Declaration of the Clustering Search Component and Request Handler Clustering extension is a search component and must be declared in `solrconfig.xml`. Such a component can be then appended to a request handler as the last component in the chain (because it requires search results which must be previously fetched by the search component). @@ -205,8 +200,6 @@ An example configuration could look as shown below. ---- - -[[ResultClustering-ConfigurationParametersoftheClusteringComponent]] === Configuration Parameters of the Clustering Component The following parameters of each clustering engine or the entire clustering component (depending where they are declared) are available. @@ -237,7 +230,6 @@ If `true` and the algorithm supports hierarchical clustering, sub-clusters will `carrot.numDescriptions`:: Maximum number of per-cluster labels to return (if the algorithm assigns more than one label to a cluster). - The `carrot.algorithm` parameter should contain a fully qualified class name of an algorithm supported by the http://project.carrot2.org[Carrot2] framework. Currently, the following algorithms are available: * `org.carrot2.clustering.lingo.LingoClusteringAlgorithm` (open source) @@ -253,7 +245,6 @@ For a comparison of characteristics of these algorithms see the following links: The question of which algorithm to choose depends on the amount of traffic (STC is faster than Lingo, but arguably produces less intuitive clusters, Lingo3G is the fastest algorithm but is not free or open source), expected result (Lingo3G provides hierarchical clusters, Lingo and STC provide flat clusters), and the input data (each algorithm will cluster the input slightly differently). There is no one answer which algorithm is "the best". -[[ResultClustering-ContextualandFullFieldClustering]] === Contextual and Full Field Clustering The clustering engine can apply clustering to the full content of (stored) fields or it can run an internal highlighter pass to extract context-snippets before clustering. Highlighting is recommended when the logical snippet field contains a lot of content (this would affect clustering performance). Highlighting can also increase the quality of clustering because the content passed to the algorithm will be more focused around the query (it will be query-specific context). The following parameters control the internal highlighter. @@ -266,10 +257,9 @@ The size, in characters, of the snippets (aka fragments) created by the highligh `carrot.summarySnippets`:: The number of summary snippets to generate for clustering. If not specified, the default highlighting snippet count (`hl.snippets`) will be used. -[[ResultClustering-LogicaltoDocumentFieldMapping]] === Logical to Document Field Mapping -As already mentioned in <>, the clustering component clusters "documents" consisting of logical parts that need to be mapped onto physical schema of data stored in Solr. The field mapping attributes provide a connection between fields and logical document parts. Note that the content of title and snippet fields must be *stored* so that it can be retrieved at search time. +As already mentioned in <>, the clustering component clusters "documents" consisting of logical parts that need to be mapped onto physical schema of data stored in Solr. The field mapping attributes provide a connection between fields and logical document parts. Note that the content of title and snippet fields must be *stored* so that it can be retrieved at search time. `carrot.title`:: The field (alternatively comma- or space-separated list of fields) that should be mapped to the logical document's title. The clustering algorithms typically give more weight to the content of the title field compared to the content (snippet). For best results, the field should contain concise, noise-free content. If there is no clear title in your data, you can leave this parameter blank. @@ -280,7 +270,6 @@ The field (alternatively comma- or space-separated list of fields) that should b `carrot.url`:: The field that should be mapped to the logical document's content URL. Leave blank if not required. -[[ResultClustering-ClusteringMultilingualContent]] === Clustering Multilingual Content The field mapping specification can include a `carrot.lang` parameter, which defines the field that stores http://www.loc.gov/standards/iso639-2/php/code_list.php[ISO 639-1] code of the language in which the title and content of the document are written. This information can be stored in the index based on apriori knowledge of the documents' source or a language detection filter applied at indexing time. All algorithms inside the Carrot2 framework will accept ISO codes of languages defined in https://github.com/carrot2/carrot2/blob/master/core/carrot2-core/src/org/carrot2/core/LanguageCode.java[LanguageCode enum]. @@ -295,15 +284,13 @@ A mapping of arbitrary strings into ISO 639 two-letter codes used by `carrot.lan The default language can also be set using Carrot2-specific algorithm attributes (in this case the http://doc.carrot2.org/#section.attribute.lingo.MultilingualClustering.defaultLanguage[MultilingualClustering.defaultLanguage] attribute). -[[ResultClustering-TweakingAlgorithmSettings]] == Tweaking Algorithm Settings The algorithms that come with Solr are using their default settings which may be inadequate for all data sets. All algorithms have lexical resources and resources (stop words, stemmers, parameters) that may require tweaking to get better clusters (and cluster labels). For Carrot2-based algorithms it is probably best to refer to a dedicated tuning application called Carrot2 Workbench (screenshot below). From this application one can export a set of algorithm attributes as an XML file, which can be then placed under the location pointed to by `carrot.resourcesDir`. image::images/result-clustering/carrot2-workbench.png[image,scaledwidth=75.0%] -[[ResultClustering-ProvidingDefaults]] -=== Providing Defaults +=== Providing Defaults for Clustering The default attributes for all engines (algorithms) declared in the clustering component are placed under `carrot.resourcesDir` and with an expected file name of `engineName-attributes.xml`. So for an engine named `lingo` and the default value of `carrot.resourcesDir`, the attributes would be read from a file in `conf/clustering/carrot2/lingo-attributes.xml`. @@ -323,8 +310,7 @@ An example XML file changing the default language of documents to Polish is show ---- -[[ResultClustering-TweakingatQuery-Time]] -=== Tweaking at Query-Time +=== Tweaking Algorithms at Query-Time The clustering component and Carrot2 clustering algorithms can accept query-time attribute overrides. Note that certain things (for example lexical resources) can only be initialized once (at startup, via the XML configuration files). @@ -332,8 +318,7 @@ An example query that changes the `LingoClusteringAlgorithm.desiredClusterCountB The clustering engine (the algorithm declared in `solrconfig.xml`) can also be changed at runtime by passing `clustering.engine=name` request attribute: http://localhost:8983/solr/techproducts/clustering?q=*:*&rows=100&clustering.engine=kmeans -[[ResultClustering-PerformanceConsiderations]] -== Performance Considerations +== Performance Considerations with Dynamic Clustering Dynamic clustering of search results comes with two major performance penalties: @@ -349,7 +334,6 @@ For simple queries, the clustering time will usually dominate the fetch time. If Some of these techniques are described in _Apache SOLR and Carrot2 integration strategies_ document, available at http://carrot2.github.io/solr-integration-strategies. The topic of improving performance is also included in the Carrot2 manual at http://doc.carrot2.org/#section.advanced-topics.fine-tuning.performance. -[[ResultClustering-AdditionalResources]] == Additional Resources The following resources provide additional information about the clustering component in Solr and its potential applications. diff --git a/solr/solr-ref-guide/src/result-grouping.adoc b/solr/solr-ref-guide/src/result-grouping.adoc index 89b3c339e56..a0bb076debf 100644 --- a/solr/solr-ref-guide/src/result-grouping.adoc +++ b/solr/solr-ref-guide/src/result-grouping.adoc @@ -54,8 +54,7 @@ Object 3 If you ask Solr to group these documents by "product_range", then the total amount of groups is 2, but the facets for ppm are 2 for 62 and 1 for 65. -[[ResultGrouping-RequestParameters]] -== Request Parameters +== Grouping Parameters Result Grouping takes the following request parameters. Any number of these request parameters can be included in a single request: @@ -68,7 +67,7 @@ The name of the field by which to group results. The field must be single-valued `group.func`:: Group based on the unique values of a function query. + -NOTE: This option does not work with <>. +NOTE: This option does not work with <>. `group.query`:: Return a single group of documents that match the given query. @@ -100,7 +99,7 @@ If `true`, the result of the first field grouping command is used as the main re `group.ngroups`:: If `true`, Solr includes the number of groups that have matched the query in the results. The default value is false. + -See below for <> when using sharded indexes +See below for <> when using sharded indexes. `group.truncate`:: If `true`, facet counts are based on the most relevant document of each group matching the query. The default value is `false`. @@ -110,7 +109,7 @@ Determines whether to compute grouped facets for the field facets specified in f + WARNING: There can be a heavy performance cost to this option. + -See below for <> when using sharded indexes. +See below for <> when using sharded indexes. `group.cache.percent`:: Setting this parameter to a number greater than 0 enables caching for result grouping. Result Grouping executes two searches; this option caches the second search. The default value is `0`. The maximum value is `100`. @@ -119,12 +118,10 @@ Testing has shown that group caching only improves search time with Boolean, wil Any number of group commands (e.g., `group.field`, `group.func`, `group.query`, etc.) may be specified in a single request. -[[ResultGrouping-Examples]] -== Examples +== Grouping Examples All of the following sample queries work with Solr's "`bin/solr -e techproducts`" example. -[[ResultGrouping-GroupingResultsbyField]] === Grouping Results by Field In this example, we will group results based on the `manu_exact` field, which specifies the manufacturer of the items in the sample dataset. @@ -217,7 +214,6 @@ We can run the same query with the request parameter `group.main=true`. This wil } ---- -[[ResultGrouping-GroupingbyQuery]] === Grouping by Query In this example, we will use the `group.query` parameter to find the top three results for "memory" in two different price ranges: 0.00 to 99.99, and over 100. @@ -267,7 +263,6 @@ In this example, we will use the `group.query` parameter to find the top three r In this case, Solr found five matches for "memory," but only returns four results grouped by price. This is because one result for "memory" did not have a price assigned to it. -[[ResultGrouping-DistributedResultGroupingCaveats]] == Distributed Result Grouping Caveats Grouping is supported for <>, with some caveats: diff --git a/solr/solr-ref-guide/src/rule-based-authorization-plugin.adoc b/solr/solr-ref-guide/src/rule-based-authorization-plugin.adoc index ee2fd88203e..3b84dc64be6 100644 --- a/solr/solr-ref-guide/src/rule-based-authorization-plugin.adoc +++ b/solr/solr-ref-guide/src/rule-based-authorization-plugin.adoc @@ -26,10 +26,9 @@ The roles can be used with any of the authentication plugins or with a custom au Once defined through the API, roles are stored in `security.json`. -[[Rule-BasedAuthorizationPlugin-EnabletheAuthorizationPlugin]] == Enable the Authorization Plugin -The plugin must be enabled in `security.json`. This file and where to put it in your system is described in detail in the section <>. +The plugin must be enabled in `security.json`. This file and where to put it in your system is described in detail in the section <>. This file has two parts, the `authentication` part and the `authorization` part. The `authentication` part stores information about the class being used for authentication. @@ -61,14 +60,12 @@ There are several things defined in this example: * The 'admin' role has been defined, and it has permission to edit security settings. * The 'solr' user has been defined to the 'admin' role. -[[Rule-BasedAuthorizationPlugin-PermissionAttributes]] == Permission Attributes Each role is comprised of one or more permissions which define what the user is allowed to do. Each permission is made up of several attributes that define the allowed activity. There are some pre-defined permissions which cannot be modified. The permissions are consulted in order they appear in `security.json`. The first permission that matches is applied for each user, so the strictest permissions should be at the top of the list. Permissions order can be controlled with a parameter of the Authorization API, as described below. -[[Rule-BasedAuthorizationPlugin-PredefinedPermissions]] === Predefined Permissions There are several permissions that are pre-defined. These have fixed default values, which cannot be modified, and new attributes cannot be added. To use these attributes, simply define a role that includes this permission, and then assign a user to that role. @@ -107,19 +104,16 @@ The pre-defined permissions are: ** OVERSEERSTATUS ** CLUSTERSTATUS ** REQUESTSTATUS -* *update*: this permission is allowed to perform any update action on any collection. This includes sending documents for indexing (using an <>). This applies to all collections by default (`collection:"*"`). -* *read*: this permission is allowed to perform any read action on any collection. This includes querying using search handlers (using <>) such as `/select`, `/get`, `/browse`, `/tvrh`, `/terms`, `/clustering`, `/elevate`, `/export`, `/spell`, `/clustering`, and `/sql`. This applies to all collections by default ( `collection:"*"` ). +* *update*: this permission is allowed to perform any update action on any collection. This includes sending documents for indexing (using an <>). This applies to all collections by default (`collection:"*"`). +* *read*: this permission is allowed to perform any read action on any collection. This includes querying using search handlers (using <>) such as `/select`, `/get`, `/browse`, `/tvrh`, `/terms`, `/clustering`, `/elevate`, `/export`, `/spell`, `/clustering`, and `/sql`. This applies to all collections by default ( `collection:"*"` ). * *all*: Any requests coming to Solr. -[[Rule-BasedAuthorizationPlugin-AuthorizationAPI]] == Authorization API -[[Rule-BasedAuthorizationPlugin-APIEndpoint]] -=== API Endpoint +=== Authorization API Endpoint `/admin/authorization`: takes a set of commands to create permissions, map permissions to roles, and map roles to users. -[[Rule-BasedAuthorizationPlugin-ManagePermissions]] === Manage Permissions Three commands control managing permissions: @@ -195,7 +189,6 @@ curl --user solr:SolrRocks -H 'Content-type:application/json' -d '{ "set-permission": {"name": "read", "role":"guest"} }' http://localhost:8983/solr/admin/authorization -[[Rule-BasedAuthorizationPlugin-UpdateorDeletePermissions]] === Update or Delete Permissions Permissions can be accessed using their index in the list. Use the `/admin/authorization` API to see the existing permissions and their indices. @@ -216,7 +209,6 @@ curl --user solr:SolrRocks -H 'Content-type:application/json' -d '{ }' http://localhost:8983/solr/admin/authorization -[[Rule-BasedAuthorizationPlugin-MapRolestoUsers]] === Map Roles to Users A single command allows roles to be mapped to users: diff --git a/solr/solr-ref-guide/src/rule-based-replica-placement.adoc b/solr/solr-ref-guide/src/rule-based-replica-placement.adoc index 30e15eb4d3e..2464606e052 100644 --- a/solr/solr-ref-guide/src/rule-based-replica-placement.adoc +++ b/solr/solr-ref-guide/src/rule-based-replica-placement.adoc @@ -31,7 +31,6 @@ This feature is used in the following instances: * Replica creation * Shard splitting -[[Rule-basedReplicaPlacement-CommonUseCases]] == Common Use Cases There are several situations where this functionality may be used. A few of the rules that could be implemented are listed below: @@ -43,7 +42,6 @@ There are several situations where this functionality may be used. A few of the * Assign replica in nodes hosting less than 5 cores. * Assign replicas in nodes hosting the least number of cores. -[[Rule-basedReplicaPlacement-RuleConditions]] == Rule Conditions A rule is a set of conditions that a node must satisfy before a replica core can be created there. @@ -52,9 +50,8 @@ There are three possible conditions. * *shard*: this is the name of a shard or a wild card (* means for all shards). If shard is not specified, then the rule applies to the entire collection. * *replica*: this can be a number or a wild-card (* means any number zero to infinity). -* *tag*: this is an attribute of a node in the cluster that can be used in a rule, e.g., “freedisk”, “cores”, “rack”, “dc”, etc. The tag name can be a custom string. If creating a custom tag, a snitch is responsible for providing tags and values. The section <> below describes how to add a custom tag, and defines six pre-defined tags (cores, freedisk, host, port, node, and sysprop). +* *tag*: this is an attribute of a node in the cluster that can be used in a rule, e.g., “freedisk”, “cores”, “rack”, “dc”, etc. The tag name can be a custom string. If creating a custom tag, a snitch is responsible for providing tags and values. The section <> below describes how to add a custom tag, and defines six pre-defined tags (cores, freedisk, host, port, node, and sysprop). -[[Rule-basedReplicaPlacement-RuleOperators]] === Rule Operators A condition can have one of the following operators to set the parameters for the rule. @@ -64,25 +61,20 @@ A condition can have one of the following operators to set the parameters for th * *less than (<)*: `tag:200~`, Solr will try to assign replicas of this collection on nodes with more than 200GB of free disk space. If that is not possible, the node which has the most free disk space will be chosen instead. -[[Rule-basedReplicaPlacement-ChoosingAmongEquals]] === Choosing Among Equals The nodes are sorted first and the rules are used to sort them. This ensures that even if many nodes match the rules, the best nodes are picked up for node assignment. For example, if there is a rule such as `freedisk:>20`, nodes are sorted first on disk space descending and the node with the most disk space is picked up first. Or, if the rule is `cores:<5`, nodes are sorted with number of cores ascending and the node with the least number of cores is picked up first. -[[Rule-basedReplicaPlacement-Rulesfornewshards]] -== Rules for new shards +== Rules for New Shards -The rules are persisted along with collection state. So, when a new replica is created, the system will assign replicas satisfying the rules. When a new shard is created as a result of using the Collection API's <>, ensure that you have created rules specific for that shard name. Rules can be altered using the <>. However, it is not required to do so if the rules do not specify explicit shard names. For example, a rule such as `shard:shard1,replica:*,ip_3:168:`, will not apply to any new shard created. But, if your rule is `replica:*,ip_3:168`, then it will apply to any new shard created. +The rules are persisted along with collection state. So, when a new replica is created, the system will assign replicas satisfying the rules. When a new shard is created as a result of using the Collection API's <>, ensure that you have created rules specific for that shard name. Rules can be altered using the <>. However, it is not required to do so if the rules do not specify explicit shard names. For example, a rule such as `shard:shard1,replica:*,ip_3:168:`, will not apply to any new shard created. But, if your rule is `replica:*,ip_3:168`, then it will apply to any new shard created. The same is applicable to shard splitting. Shard splitting is treated exactly the same way as shard creation. Even though `shard1_1` and `shard1_2` may be created from `shard1`, the rules treat them as distinct, unrelated shards. -[[Rule-basedReplicaPlacement-Snitches]] == Snitches Tag values come from a plugin called Snitch. If there is a tag named ‘rack’ in a rule, there must be Snitch which provides the value for ‘rack’ for each node in the cluster. A snitch implements the Snitch interface. Solr, by default, provides a default snitch which provides the following tags: @@ -96,7 +88,6 @@ Tag values come from a plugin called Snitch. If there is a tag named ‘rack’ * *ip_1, ip_2, ip_3, ip_4*: These are ip fragments for each node. For example, in a host with ip `192.168.1.2`, `ip_1 = 2`, `ip_2 =1`, `ip_3 = 168` and` ip_4 = 192` * *sysprop.{PROPERTY_NAME}*: These are values available from system properties. `sysprop.key` means a value that is passed to the node as `-Dkey=keyValue` during the node startup. It is possible to use rules like `sysprop.key:expectedVal,shard:*` -[[Rule-basedReplicaPlacement-HowSnitchesareConfigured]] === How Snitches are Configured It is possible to use one or more snitches for a set of rules. If the rules only need tags from default snitch it need not be explicitly configured. For example: @@ -114,11 +105,8 @@ snitch=class:fqn.ClassName,key1:val1,key2:val2,key3:val3 . After identifying the Snitches, they provide the tag values for each node in the cluster. . If the value for a tag is not obtained for a given node, it cannot participate in the assignment. -[[Rule-basedReplicaPlacement-Examples]] -== Examples +== Replica Placement Examples - -[[Rule-basedReplicaPlacement-Keeplessthan2replicas_atmost1replica_ofthiscollectiononanynode]] === Keep less than 2 replicas (at most 1 replica) of this collection on any node For this rule, we define the `replica` condition with operators for "less than 2", and use a pre-defined tag named `node` to define nodes with any name. @@ -129,8 +117,6 @@ replica:<2,node:* // this is equivalent to replica:<2,node:*,shard:**. We can omit shard:** because ** is the default value of shard ---- - -[[Rule-basedReplicaPlacement-Foragivenshard_keeplessthan2replicasonanynode]] === For a given shard, keep less than 2 replicas on any node For this rule, we use the `shard` condition to define any shard , the `replica` condition with operators for "less than 2", and finally a pre-defined tag named `node` to define nodes with any name. @@ -140,7 +126,6 @@ For this rule, we use the `shard` condition to define any shard , the `replica` shard:*,replica:<2,node:* ---- -[[Rule-basedReplicaPlacement-Assignallreplicasinshard1torack730]] === Assign all replicas in shard1 to rack 730 This rule limits the `shard` condition to 'shard1', but any number of replicas. We're also referencing a custom tag named `rack`. Before defining this rule, we will need to configure a custom Snitch which provides values for the tag `rack`. @@ -157,7 +142,6 @@ In this case, the default value of `replica` is * (or, all replicas). So, it can shard:shard1,rack:730 ---- -[[Rule-basedReplicaPlacement-Createreplicasinnodeswithlessthan5coresonly]] === Create replicas in nodes with less than 5 cores only This rule uses the `replica` condition to define any number of replicas, but adds a pre-defined tag named `core` and uses operators for "less than 5". @@ -174,7 +158,6 @@ Again, we can simplify this to use the default value for `replica`, like so: cores:<5 ---- -[[Rule-basedReplicaPlacement-Donotcreateanyreplicasinhost192.45.67.3]] === Do not create any replicas in host 192.45.67.3 This rule uses only the pre-defined tag `host` to define an IP address where replicas should not be placed. @@ -184,7 +167,6 @@ This rule uses only the pre-defined tag `host` to define an IP address where rep host:!192.45.67.3 ---- -[[Rule-basedReplicaPlacement-DefiningRules]] == Defining Rules Rules are specified per collection during collection creation as request parameters. It is possible to specify multiple ‘rule’ and ‘snitch’ params as in this example: @@ -194,4 +176,4 @@ Rules are specified per collection during collection creation as request paramet snitch=class:EC2Snitch&rule=shard:*,replica:1,dc:dc1&rule=shard:*,replica:<2,dc:dc3 ---- -These rules are persisted in `clusterstate.json` in ZooKeeper and are available throughout the lifetime of the collection. This enables the system to perform any future node allocation without direct user interaction. The rules added during collection creation can be modified later using the <> API. +These rules are persisted in `clusterstate.json` in ZooKeeper and are available throughout the lifetime of the collection. This enables the system to perform any future node allocation without direct user interaction. The rules added during collection creation can be modified later using the <> API. diff --git a/solr/solr-ref-guide/src/running-solr-on-hdfs.adoc b/solr/solr-ref-guide/src/running-solr-on-hdfs.adoc index 9f8e2dc300b..6ca5670a5e1 100644 --- a/solr/solr-ref-guide/src/running-solr-on-hdfs.adoc +++ b/solr/solr-ref-guide/src/running-solr-on-hdfs.adoc @@ -28,13 +28,11 @@ To use HDFS rather than a local filesystem, you must be using Hadoop 2.x and you * Modify `solr.in.sh` (or `solr.in.cmd` on Windows) to pass the JVM arguments automatically when using `bin/solr` without having to set them manually. * Define the properties in `solrconfig.xml`. These configuration changes would need to be repeated for every collection, so is a good option if you only want some of your collections stored in HDFS. -[[RunningSolronHDFS-StartingSolronHDFS]] == Starting Solr on HDFS -[[RunningSolronHDFS-StandaloneSolrInstances]] === Standalone Solr Instances -For standalone Solr instances, there are a few parameters you should be sure to modify before starting Solr. These can be set in `solrconfig.xml`(more on that <>), or passed to the `bin/solr` script at startup. +For standalone Solr instances, there are a few parameters you should be sure to modify before starting Solr. These can be set in `solrconfig.xml`(more on that <>), or passed to the `bin/solr` script at startup. * You need to use an `HdfsDirectoryFactory` and a data dir of the form `hdfs://host:port/path` * You need to specify an UpdateLog location of the form `hdfs://host:port/path` @@ -50,9 +48,8 @@ bin/solr start -Dsolr.directoryFactory=HdfsDirectoryFactory -Dsolr.updatelog=hdfs://host:port/path ---- -This example will start Solr in standalone mode, using the defined JVM properties (explained in more detail <>). +This example will start Solr in standalone mode, using the defined JVM properties (explained in more detail <>). -[[RunningSolronHDFS-SolrCloudInstances]] === SolrCloud Instances In SolrCloud mode, it's best to leave the data and update log directories as the defaults Solr comes with and simply specify the `solr.hdfs.home`. All dynamically created collections will create the appropriate directories automatically under the `solr.hdfs.home` root directory. @@ -70,7 +67,6 @@ bin/solr start -c -Dsolr.directoryFactory=HdfsDirectoryFactory This command starts Solr in SolrCloud mode, using the defined JVM properties. -[[RunningSolronHDFS-Modifyingsolr.in.sh_nix_orsolr.in.cmd_Windows_]] === Modifying solr.in.sh (*nix) or solr.in.cmd (Windows) The examples above assume you will pass JVM arguments as part of the start command every time you use `bin/solr` to start Solr. However, `bin/solr` looks for an include file named `solr.in.sh` (`solr.in.cmd` on Windows) to set environment variables. By default, this file is found in the `bin` directory, and you can modify it to permanently add the `HdfsDirectoryFactory` settings and ensure they are used every time Solr is started. @@ -85,7 +81,6 @@ For example, to set JVM arguments to always use HDFS when running in SolrCloud m -Dsolr.hdfs.home=hdfs://host:port/path \ ---- -[[RunningSolronHDFS-TheBlockCache]] == The Block Cache For performance, the HdfsDirectoryFactory uses a Directory that will cache HDFS blocks. This caching mechanism is meant to replace the standard file system cache that Solr utilizes so much. By default, this cache is allocated off heap. This cache will often need to be quite large and you may need to raise the off heap memory limit for the specific JVM you are running Solr in. For the Oracle/OpenJDK JVMs, the follow is an example command line parameter that you can use to raise the limit when starting Solr: @@ -95,18 +90,15 @@ For performance, the HdfsDirectoryFactory uses a Directory that will cache HDFS -XX:MaxDirectMemorySize=20g ---- -[[RunningSolronHDFS-HdfsDirectoryFactoryParameters]] == HdfsDirectoryFactory Parameters The `HdfsDirectoryFactory` has a number of settings that are defined as part of the `directoryFactory` configuration. -[[RunningSolronHDFS-SolrHDFSSettings]] === Solr HDFS Settings `solr.hdfs.home`:: A root location in HDFS for Solr to write collection data to. Rather than specifying an HDFS location for the data directory or update log directory, use this to specify one root location and have everything automatically created within this HDFS location. The structure of this parameter is `hdfs://host:port/path/solr`. -[[RunningSolronHDFS-BlockCacheSettings]] === Block Cache Settings `solr.hdfs.blockcache.enabled`:: @@ -124,7 +116,6 @@ Number of memory slabs to allocate. Each slab is 128 MB in size. The default is `solr.hdfs.blockcache.global`:: Enable/Disable using one global cache for all SolrCores. The settings used will be from the first HdfsDirectoryFactory created. The default is `true`. -[[RunningSolronHDFS-NRTCachingDirectorySettings]] === NRTCachingDirectory Settings `solr.hdfs.nrtcachingdirectory.enable`:: true | @@ -136,13 +127,11 @@ NRTCachingDirectory max segment size for merges. The default is `16`. `solr.hdfs.nrtcachingdirectory.maxcachedmb`:: NRTCachingDirectory max cache size. The default is `192`. -[[RunningSolronHDFS-HDFSClientConfigurationSettings]] === HDFS Client Configuration Settings `solr.hdfs.confdir`:: Pass the location of HDFS client configuration files - needed for HDFS HA for example. -[[RunningSolronHDFS-KerberosAuthenticationSettings]] === Kerberos Authentication Settings Hadoop can be configured to use the Kerberos protocol to verify user identity when trying to access core services like HDFS. If your HDFS directories are protected using Kerberos, then you need to configure Solr's HdfsDirectoryFactory to authenticate using Kerberos in order to read and write to HDFS. To enable Kerberos authentication from Solr, you need to set the following parameters: @@ -157,8 +146,7 @@ This file will need to be present on all Solr servers at the same path provided `solr.hdfs.security.kerberos.principal`:: The Kerberos principal that Solr should use to authenticate to secure Hadoop; the format of a typical Kerberos V5 principal is: `primary/instance@realm`. -[[RunningSolronHDFS-Example]] -== Example +== Example solrconfig.xml for HDFS Here is a sample `solrconfig.xml` configuration for storing Solr indexes on HDFS: @@ -189,7 +177,6 @@ If using Kerberos, you will need to add the three Kerberos related properties to ---- -[[RunningSolronHDFS-AutomaticallyAddReplicasinSolrCloud]] == Automatically Add Replicas in SolrCloud One benefit to running Solr in HDFS is the ability to automatically add new replicas when the Overseer notices that a shard has gone down. Because the "gone" index shards are stored in HDFS, the a new core will be created and the new core will point to the existing indexes in HDFS. @@ -205,7 +192,6 @@ The minimum time (in ms) to wait for initiating replacement of a replica after f `autoReplicaFailoverBadNodeExpiration`:: The delay (in ms) after which a replica marked as down would be unmarked. The default is `60000`. -[[RunningSolronHDFS-TemporarilydisableautoAddReplicasfortheentirecluster]] === Temporarily Disable autoAddReplicas for the Entire Cluster When doing offline maintenance on the cluster and for various other use cases where an admin would like to temporarily disable auto addition of replicas, the following APIs will disable and re-enable autoAddReplicas for *all collections in the cluster*: diff --git a/solr/solr-ref-guide/src/running-solr.adoc b/solr/solr-ref-guide/src/running-solr.adoc index ecc41122577..f18183e16b4 100644 --- a/solr/solr-ref-guide/src/running-solr.adoc +++ b/solr/solr-ref-guide/src/running-solr.adoc @@ -114,7 +114,7 @@ Solr also provides a number of useful examples to help you learn about key featu bin/solr -e techproducts ---- -Currently, the available examples you can run are: techproducts, dih, schemaless, and cloud. See the section <> for details on each example. +Currently, the available examples you can run are: techproducts, dih, schemaless, and cloud. See the section <> for details on each example. .Getting Started with SolrCloud [NOTE] @@ -171,7 +171,7 @@ You may want to add a few sample documents before trying to index your own conte In the `bin/` directory is the post script, a command line tool which can be used to index different types of documents. Do not worry too much about the details for now. The <> section has all the details on indexing. -To see some information about the usage of `bin/post`, use the `-help` option. Windows users, see the section for <>. +To see some information about the usage of `bin/post`, use the `-help` option. Windows users, see the section for <>. `bin/post` can post various types of content to Solr, including files in Solr's native XML and JSON formats, CSV files, a directory tree of rich documents, or even a simple short web crawl. See the examples at the end of `bin/post -help` for various commands to easily get started posting your content into Solr. diff --git a/solr/solr-ref-guide/src/schema-api.adoc b/solr/solr-ref-guide/src/schema-api.adoc index 893936fe3cc..a12eeb5d4fa 100644 --- a/solr/solr-ref-guide/src/schema-api.adoc +++ b/solr/solr-ref-guide/src/schema-api.adoc @@ -52,7 +52,7 @@ The base address for the API is `\http://:/solr/`. bin/solr -e cloud -noprompt ---- -== API Entry Points +== Schema API Entry Points * `/schema`: <> the schema, or <> the schema to add, remove, or replace fields, dynamic fields, copy fields, or field types * `/schema/fields`: <> about all defined fields or a specific named field @@ -408,14 +408,12 @@ The query parameters should be added to the API request after '?'. `wt`:: Defines the format of the response. The options are *json*, *xml* or *schema.xml*. If not specified, JSON will be returned by default. -[[SchemaAPI-OUTPUT]] ==== Retrieve Schema Response *Output Content* The output will include all fields, field types, dynamic rules and copy field rules, in the format requested (JSON or XML). The schema name and version are also included. -[[SchemaAPI-EXAMPLES]] ==== Retrieve Schema Examples Get the entire schema in JSON. diff --git a/solr/solr-ref-guide/src/schema-factory-definition-in-solrconfig.adoc b/solr/solr-ref-guide/src/schema-factory-definition-in-solrconfig.adoc index 9d0e60d6705..4f265912191 100644 --- a/solr/solr-ref-guide/src/schema-factory-definition-in-solrconfig.adoc +++ b/solr/solr-ref-guide/src/schema-factory-definition-in-solrconfig.adoc @@ -31,7 +31,6 @@ Schemaless mode requires enabling the Managed Schema if it is not already, but f While the "read" features of the Schema API are supported for all schema types, support for making schema modifications programatically depends on the `` in use. -[[SchemaFactoryDefinitioninSolrConfig-SolrUsesManagedSchemabyDefault]] == Solr Uses Managed Schema by Default When a `` is not explicitly declared in a `solrconfig.xml` file, Solr implicitly uses a `ManagedIndexSchemaFactory`, which is by default `"mutable"` and keeps schema information in a `managed-schema` file. @@ -54,7 +53,6 @@ If you wish to explicitly configure `ManagedIndexSchemaFactory` the following op With the default configuration shown above, you can use the <> to modify the schema as much as you want, and then later change the value of `mutable` to *false* if you wish to "lock" the schema in place and prevent future changes. -[[SchemaFactoryDefinitioninSolrConfig-Classicschema.xml]] == Classic schema.xml An alternative to using a managed schema is to explicitly configure a `ClassicIndexSchemaFactory`. `ClassicIndexSchemaFactory` requires the use of a `schema.xml` configuration file, and disallows any programatic changes to the Schema at run time. The `schema.xml` file must be edited manually and is only loaded only when the collection is loaded. @@ -64,7 +62,6 @@ An alternative to using a managed schema is to explicitly configure a `ClassicIn ---- -[[SchemaFactoryDefinitioninSolrConfig-Switchingfromschema.xmltoManagedSchema]] === Switching from schema.xml to Managed Schema If you have an existing Solr collection that uses `ClassicIndexSchemaFactory`, and you wish to convert to use a managed schema, you can simply modify the `solrconfig.xml` to specify the use of the `ManagedIndexSchemaFactory`. @@ -78,7 +75,6 @@ Once Solr is restarted and it detects that a `schema.xml` file exists, but the ` You are now free to use the <> as much as you want to make changes, and remove the `schema.xml.bak`. -[[SchemaFactoryDefinitioninSolrConfig-SwitchingfromManagedSchematoManuallyEditedschema.xml]] === Switching from Managed Schema to Manually Edited schema.xml If you have started Solr with managed schema enabled and you would like to switch to manually editing a `schema.xml` file, you should take the following steps: @@ -89,7 +85,7 @@ If you have started Solr with managed schema enabled and you would like to switc .. Add a `ClassicIndexSchemaFactory` definition as shown above . Reload the core(s). -If you are using SolrCloud, you may need to modify the files via ZooKeeper. The `bin/solr` script provides an easy way to download the files from ZooKeeper and upload them back after edits. See the section <> for more information. +If you are using SolrCloud, you may need to modify the files via ZooKeeper. The `bin/solr` script provides an easy way to download the files from ZooKeeper and upload them back after edits. See the section <> for more information. [TIP] ==== diff --git a/solr/solr-ref-guide/src/schemaless-mode.adoc b/solr/solr-ref-guide/src/schemaless-mode.adoc index 30e7d514343..825c294ac6b 100644 --- a/solr/solr-ref-guide/src/schemaless-mode.adoc +++ b/solr/solr-ref-guide/src/schemaless-mode.adoc @@ -26,7 +26,6 @@ These Solr features, all controlled via `solrconfig.xml`, are: . Field value class guessing: Previously unseen fields are run through a cascading set of value-based parsers, which guess the Java class of field values - parsers for Boolean, Integer, Long, Float, Double, and Date are currently available. . Automatic schema field addition, based on field value class(es): Previously unseen fields are added to the schema, based on field value Java classes, which are mapped to schema field types - see <>. -[[SchemalessMode-UsingtheSchemalessExample]] == Using the Schemaless Example The three features of schemaless mode are pre-configured in the `_default` <> in the Solr distribution. To start an example instance of Solr using these configs, run the following command: @@ -67,12 +66,10 @@ You can use the `/schema/fields` <> to co "uniqueKey":true}]} ---- -[[SchemalessMode-ConfiguringSchemalessMode]] == Configuring Schemaless Mode As described above, there are three configuration elements that need to be in place to use Solr in schemaless mode. In the `_default` config set included with Solr these are already configured. If, however, you would like to implement schemaless on your own, you should make the following changes. -[[SchemalessMode-EnableManagedSchema]] === Enable Managed Schema As described in the section <>, Managed Schema support is enabled by default, unless your configuration specifies that `ClassicIndexSchemaFactory` should be used. @@ -87,7 +84,6 @@ You can configure the `ManagedIndexSchemaFactory` (and control the resource file ---- -[[SchemalessMode-DefineanUpdateRequestProcessorChain]] === Define an UpdateRequestProcessorChain The UpdateRequestProcessorChain allows Solr to guess field types, and you can define the default field type classes to use. To start, you should define it as follows (see the javadoc links below for update processor factory documentation): @@ -174,7 +170,6 @@ Javadocs for update processor factories mentioned above: * {solr-javadocs}/solr-core/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.html[ParseDateFieldUpdateProcessorFactory] * {solr-javadocs}/solr-core/org/apache/solr/update/processor/AddSchemaFieldsUpdateProcessorFactory.html[AddSchemaFieldsUpdateProcessorFactory] -[[SchemalessMode-MaketheUpdateRequestProcessorChaintheDefaultfortheUpdateRequestHandler]] === Make the UpdateRequestProcessorChain the Default for the UpdateRequestHandler Once the UpdateRequestProcessorChain has been defined, you must instruct your UpdateRequestHandlers to use it when working with index updates (i.e., adding, removing, replacing documents). There are two ways to do this. The update chain shown above has a `default=true` attribute which will use it for any update handler. An alternative, more explicit way is to use <> to set the defaults on all `/update` request handlers: @@ -193,7 +188,6 @@ Once the UpdateRequestProcessorChain has been defined, you must instruct your Up After each of these changes have been made, Solr should be restarted (or, you can reload the cores to load the new `solrconfig.xml` definitions). ==== -[[SchemalessMode-ExamplesofIndexedDocuments]] == Examples of Indexed Documents Once the schemaless mode has been enabled (whether you configured it manually or are using `_default`), documents that include fields that are not defined in your schema will be indexed, using the guessed field types which are automatically added to the schema. @@ -243,13 +237,14 @@ The fields now in the schema (output from `curl \http://localhost:8983/solr/gett "name":"Sold", "type":"plongs"}, { - "name":"_root_" ...} + "name":"_root_", ...}, { - "name":"_text_" ...} + "name":"_text_", ...}, { - "name":"_version_" ...} + "name":"_version_", ...}, { - "name":"id" ...} + "name":"id", ...} +]} ---- In addition string versions of the text fields are indexed, using copyFields to a `*_str` dynamic field: (output from `curl \http://localhost:8983/solr/gettingstarted/schema/copyfields` ): @@ -277,7 +272,7 @@ Even if you want to use schemaless mode for most fields, you can still use the < Internally, the Schema API and the Schemaless Update Processors both use the same <> functionality. -Also, if you do not need the `*_str` version of a text field, you can simply remove the `copyField` definition from the auto-generated schema and it will not be re-added since the original field is now defined. +Also, if you do not need the `*_str` version of a text field, you can simply remove the `copyField` definition from the auto-generated schema and it will not be re-added since the original field is now defined. ==== Once a field has been added to the schema, its field type is fixed. As a consequence, adding documents with field value(s) that conflict with the previously guessed field type will fail. For example, after adding the above document, the "```Sold```" field has the fieldType `plongs`, but the document below has a non-integral decimal value in this field: diff --git a/solr/solr-ref-guide/src/segments-info.adoc b/solr/solr-ref-guide/src/segments-info.adoc index c5a4395b89b..b0d72fe318f 100644 --- a/solr/solr-ref-guide/src/segments-info.adoc +++ b/solr/solr-ref-guide/src/segments-info.adoc @@ -22,4 +22,4 @@ The Segments Info screen lets you see a visualization of the various segments in image::images/segments-info/segments_info.png[image,width=486,height=250] -This information may be useful for people to help make decisions about the optimal <> for their data. +This information may be useful for people to help make decisions about the optimal <> for their data. diff --git a/solr/solr-ref-guide/src/setting-up-an-external-zookeeper-ensemble.adoc b/solr/solr-ref-guide/src/setting-up-an-external-zookeeper-ensemble.adoc index ab548362ebd..d82ac29c5e6 100644 --- a/solr/solr-ref-guide/src/setting-up-an-external-zookeeper-ensemble.adoc +++ b/solr/solr-ref-guide/src/setting-up-an-external-zookeeper-ensemble.adoc @@ -40,7 +40,6 @@ For example, if you only have two ZooKeeper nodes and one goes down, 50% of avai More information on ZooKeeper clusters is available from the ZooKeeper documentation at http://zookeeper.apache.org/doc/r3.4.10/zookeeperAdmin.html#sc_zkMulitServerSetup. -[[SettingUpanExternalZooKeeperEnsemble-DownloadApacheZooKeeper]] == Download Apache ZooKeeper The first step in setting up Apache ZooKeeper is, of course, to download the software. It's available from http://zookeeper.apache.org/releases.html. @@ -52,15 +51,12 @@ When using stand-alone ZooKeeper, you need to take care to keep your version of Solr currently uses Apache ZooKeeper v3.4.10. ==== -[[SettingUpanExternalZooKeeperEnsemble-SettingUpaSingleZooKeeper]] == Setting Up a Single ZooKeeper -[[SettingUpanExternalZooKeeperEnsemble-Createtheinstance]] -=== Create the instance +=== Create the Instance Creating the instance is a simple matter of extracting the files into a specific target directory. The actual directory itself doesn't matter, as long as you know where it is, and where you'd like to have ZooKeeper store its internal data. -[[SettingUpanExternalZooKeeperEnsemble-Configuretheinstance]] -=== Configure the instance +=== Configure the Instance The next step is to configure your ZooKeeper instance. To do that, create the following file: `/conf/zoo.cfg`. To this file, add the following information: [source,bash] @@ -80,15 +76,13 @@ The parameters are as follows: Once this file is in place, you're ready to start the ZooKeeper instance. -[[SettingUpanExternalZooKeeperEnsemble-Runtheinstance]] -=== Run the instance +=== Run the Instance To run the instance, you can simply use the `ZOOKEEPER_HOME/bin/zkServer.sh` script provided, as with this command: `zkServer.sh start` Again, ZooKeeper provides a great deal of power through additional configurations, but delving into them is beyond the scope of this tutorial. For more information, see the ZooKeeper http://zookeeper.apache.org/doc/r3.4.5/zookeeperStarted.html[Getting Started] page. For this example, however, the defaults are fine. -[[SettingUpanExternalZooKeeperEnsemble-PointSolrattheinstance]] -=== Point Solr at the instance +=== Point Solr at the Instance Pointing Solr at the ZooKeeper instance you've created is a simple matter of using the `-z` parameter when using the bin/solr script. For example, in order to point the Solr instance to the ZooKeeper you've started on port 2181, this is what you'd need to do: @@ -108,12 +102,10 @@ bin/solr start -cloud -s -p 8987 -z localhost:2 NOTE: When you are not using an example to start solr, make sure you upload the configuration set to ZooKeeper before creating the collection. -[[SettingUpanExternalZooKeeperEnsemble-ShutdownZooKeeper]] -=== Shut down ZooKeeper +=== Shut Down ZooKeeper To shut down ZooKeeper, use the zkServer script with the "stop" command: `zkServer.sh stop`. -[[SettingUpanExternalZooKeeperEnsemble-SettingupaZooKeeperEnsemble]] == Setting up a ZooKeeper Ensemble With an external ZooKeeper ensemble, you need to set things up just a little more carefully as compared to the Getting Started example. @@ -188,8 +180,7 @@ Once these servers are running, you can reference them from Solr just as you did bin/solr start -e cloud -z localhost:2181,localhost:2182,localhost:2183 -noprompt ---- -[[SettingUpanExternalZooKeeperEnsemble-SecuringtheZooKeeperconnection]] -== Securing the ZooKeeper connection +== Securing the ZooKeeper Connection You may also want to secure the communication between ZooKeeper and Solr. diff --git a/solr/solr-ref-guide/src/shards-and-indexing-data-in-solrcloud.adoc b/solr/solr-ref-guide/src/shards-and-indexing-data-in-solrcloud.adoc index d2dbcf7dd65..3d0a87d97dc 100644 --- a/solr/solr-ref-guide/src/shards-and-indexing-data-in-solrcloud.adoc +++ b/solr/solr-ref-guide/src/shards-and-indexing-data-in-solrcloud.adoc @@ -36,10 +36,9 @@ If a leader goes down, one of the other replicas is automatically elected as the When a document is sent to a Solr node for indexing, the system first determines which Shard that document belongs to, and then which node is currently hosting the leader for that shard. The document is then forwarded to the current leader for indexing, and the leader forwards the update to all of the other replicas. -[[ShardsandIndexingDatainSolrCloud-DocumentRouting]] == Document Routing -Solr offers the ability to specify the router implementation used by a collection by specifying the `router.name` parameter when <>. +Solr offers the ability to specify the router implementation used by a collection by specifying the `router.name` parameter when <>. If you use the (default) "```compositeId```" router, you can send documents with a prefix in the document ID which will be used to calculate the hash Solr uses to determine the shard a document is sent to for indexing. The prefix can be anything you'd like it to be (it doesn't have to be the shard name, for example), but it must be consistent so Solr behaves consistently. For example, if you wanted to co-locate documents for a customer, you could use the customer name or ID as the prefix. If your customer is "IBM", for example, with a document with the ID "12345", you would insert the prefix into the document id field: "IBM!12345". The exclamation mark ('!') is critical here, as it distinguishes the prefix used to determine which shard to direct the document to. @@ -55,16 +54,14 @@ If you do not want to influence how documents are stored, you don't need to spec If you created the collection and defined the "implicit" router at the time of creation, you can additionally define a `router.field` parameter to use a field from each document to identify a shard where the document belongs. If the field specified is missing in the document, however, the document will be rejected. You could also use the `\_route_` parameter to name a specific shard. -[[ShardsandIndexingDatainSolrCloud-ShardSplitting]] == Shard Splitting When you create a collection in SolrCloud, you decide on the initial number shards to be used. But it can be difficult to know in advance the number of shards that you need, particularly when organizational requirements can change at a moment's notice, and the cost of finding out later that you chose wrong can be high, involving creating new cores and re-indexing all of your data. The ability to split shards is in the Collections API. It currently allows splitting a shard into two pieces. The existing shard is left as-is, so the split action effectively makes two copies of the data as new shards. You can delete the old shard at a later time when you're ready. -More details on how to use shard splitting is in the section on the Collection API's <>. +More details on how to use shard splitting is in the section on the Collection API's <>. -[[ShardsandIndexingDatainSolrCloud-IgnoringCommitsfromClientApplicationsinSolrCloud]] == Ignoring Commits from Client Applications in SolrCloud In most cases, when running in SolrCloud mode, indexing client applications should not send explicit commit requests. Rather, you should configure auto commits with `openSearcher=false` and auto soft-commits to make recent updates visible in search requests. This ensures that auto commits occur on a regular schedule in the cluster. diff --git a/solr/solr-ref-guide/src/solr-control-script-reference.adoc b/solr/solr-ref-guide/src/solr-control-script-reference.adoc index 45a9e80e969..368aacc2422 100644 --- a/solr/solr-ref-guide/src/solr-control-script-reference.adoc +++ b/solr/solr-ref-guide/src/solr-control-script-reference.adoc @@ -83,7 +83,7 @@ The available options are: * dih * schemaless + -See the section <> below for more details on the example configurations. +See the section <> below for more details on the example configurations. + *Example*: `bin/solr start -e schemaless` @@ -185,7 +185,6 @@ When starting in SolrCloud mode, the interactive script session will prompt you For more information about starting Solr in SolrCloud mode, see also the section <>. -[[SolrControlScriptReference-RunningwithExampleConfigurations]] ==== Running with Example Configurations `bin/solr start -e ` @@ -297,7 +296,6 @@ Solr process 39827 running on port 8865 "collections":"2"}} ---- -[[SolrControlScriptReference-Healthcheck]] === Healthcheck The `healthcheck` command generates a JSON-formatted health report for a collection when running in SolrCloud mode. The health report provides information about the state of every replica for all shards in a collection, including the number of committed documents and its current state. @@ -306,7 +304,6 @@ The `healthcheck` command generates a JSON-formatted health report for a collect `bin/solr healthcheck -help` -[[SolrControlScriptReference-AvailableParameters.2]] ==== Healthcheck Parameters `-c `:: @@ -371,7 +368,6 @@ Below is an example healthcheck request and response using a non-standard ZooKee "leader":true}]}]} ---- -[[SolrControlScriptReference-CollectionsandCores]] == Collections and Cores The `bin/solr` script can also help you create new collections (in SolrCloud mode) or cores (in standalone mode), or delete collections. @@ -566,7 +562,6 @@ If the `-updateIncludeFileOnly` option is set to *true*, then only the settings If the `-updateIncludeFileOnly` option is set to *false*, then the settings in `bin/solr.in.sh` or `bin\solr.in.cmd` will be updated, and `security.json` will be removed. However, the `basicAuth.conf` file is not removed with either option. -[[SolrControlScriptReference-ZooKeeperOperations]] == ZooKeeper Operations The `bin/solr` script allows certain operations affecting ZooKeeper. These operations are for SolrCloud mode only. The operations are available as sub-commands, which each have their own set of options. @@ -577,7 +572,6 @@ The `bin/solr` script allows certain operations affecting ZooKeeper. These opera NOTE: Solr should have been started at least once before issuing these commands to initialize ZooKeeper with the znodes Solr expects. Once ZooKeeper is initialized, Solr doesn't need to be running on any node to use these commands. -[[SolrControlScriptReference-UploadaConfigurationSet]] === Upload a Configuration Set Use the `zk upconfig` command to upload one of the pre-configured configuration set or a customized configuration set to ZooKeeper. @@ -618,10 +612,9 @@ bin/solr zk upconfig -z 111.222.333.444:2181 -n mynewconfig -d /path/to/configse .Reload Collections When Changing Configurations [WARNING] ==== -This command does *not* automatically make changes effective! It simply uploads the configuration sets to ZooKeeper. You can use the Collection API's <> to reload any collections that uses this configuration set. +This command does *not* automatically make changes effective! It simply uploads the configuration sets to ZooKeeper. You can use the Collection API's <> to reload any collections that uses this configuration set. ==== -[[SolrControlScriptReference-DownloadaConfigurationSet]] === Download a Configuration Set Use the `zk downconfig` command to download a configuration set from ZooKeeper to the local filesystem. @@ -791,12 +784,10 @@ An example of this command with the parameters is: `bin/solr zk ls /collections` -[[SolrControlScriptReference-Createaznode_supportschroot_]] === Create a znode (supports chroot) Use the `zk mkroot` command to create a znode. The primary use-case for this command to support ZooKeeper's "chroot" concept. However, it can also be used to create arbitrary paths. -[[SolrControlScriptReference-AvailableParameters.9]] ==== Create znode Parameters ``:: diff --git a/solr/solr-ref-guide/src/solr-glossary.adoc b/solr/solr-ref-guide/src/solr-glossary.adoc index 1feed2f3710..de27081fa74 100644 --- a/solr/solr-ref-guide/src/solr-glossary.adoc +++ b/solr/solr-ref-guide/src/solr-glossary.adoc @@ -33,7 +33,7 @@ Where possible, terms are linked to relevant parts of the Solr Reference Guide f [[SolrGlossary-A]] === A -[[atomicupdates]]<>:: +[[atomicupdates]]<>:: An approach to updating only one or more fields of a document, instead of reindexing the entire document. @@ -120,7 +120,7 @@ A JVM instance running Solr. Also known as a Solr server. [[SolrGlossary-O]] === O -[[optimisticconcurrency]]<>:: +[[optimisticconcurrency]]<>:: Also known as "optimistic locking", this is an approach that allows for updates to documents currently in the index while retaining locking or version control. [[overseer]]Overseer:: diff --git a/solr/solr-ref-guide/src/solr-jdbc-apache-zeppelin.adoc b/solr/solr-ref-guide/src/solr-jdbc-apache-zeppelin.adoc index 45877f2188c..82e92a87dd2 100644 --- a/solr/solr-ref-guide/src/solr-jdbc-apache-zeppelin.adoc +++ b/solr/solr-ref-guide/src/solr-jdbc-apache-zeppelin.adoc @@ -24,7 +24,6 @@ IMPORTANT: This requires Apache Zeppelin 0.6.0 or greater which contains the JDB To use http://zeppelin.apache.org[Apache Zeppelin] with Solr, you will need to create a JDBC interpreter for Solr. This will add SolrJ to the interpreter classpath. Once the interpreter has been created, you can create a notebook to issue queries. The http://zeppelin.apache.org/docs/latest/interpreter/jdbc.html[Apache Zeppelin JDBC interpreter documentation] provides additional information about JDBC prefixes and other features. -[[SolrJDBC-ApacheZeppelin-CreatetheApacheSolrJDBCInterpreter]] == Create the Apache Solr JDBC Interpreter .Click "Interpreter" in the top navigation @@ -41,7 +40,6 @@ image::images/solr-jdbc-apache-zeppelin/zeppelin_solrjdbc_3.png[image,height=400 For most installations, Apache Zeppelin configures PostgreSQL as the JDBC interpreter default driver. The default driver can either be replaced by the Solr driver as outlined above or you can add a separate JDBC interpreter prefix as outlined in the http://zeppelin.apache.org/docs/latest/interpreter/jdbc.html[Apache Zeppelin JDBC interpreter documentation]. ==== -[[SolrJDBC-ApacheZeppelin-CreateaNotebook]] == Create a Notebook .Click Notebook \-> Create new note @@ -50,7 +48,6 @@ image::images/solr-jdbc-apache-zeppelin/zeppelin_solrjdbc_4.png[image,width=517, .Provide a name and click "Create Note" image::images/solr-jdbc-apache-zeppelin/zeppelin_solrjdbc_5.png[image,width=839,height=400] -[[SolrJDBC-ApacheZeppelin-QuerywiththeNotebook]] == Query with the Notebook [IMPORTANT] diff --git a/solr/solr-ref-guide/src/solr-jdbc-dbvisualizer.adoc b/solr/solr-ref-guide/src/solr-jdbc-dbvisualizer.adoc index f3ecc867896..8b9b2b2b5e1 100644 --- a/solr/solr-ref-guide/src/solr-jdbc-dbvisualizer.adoc +++ b/solr/solr-ref-guide/src/solr-jdbc-dbvisualizer.adoc @@ -27,10 +27,8 @@ For https://www.dbvis.com/[DbVisualizer], you will need to create a new driver f Once the driver has been created, you can create a connection to Solr with the connection string format outlined in the generic section and use the SQL Commander to issue queries. -[[SolrJDBC-DbVisualizer-SetupDriver]] == Setup Driver -[[SolrJDBC-DbVisualizer-OpenDriverManager]] === Open Driver Manager From the Tools menu, choose Driver Manager to add a driver. @@ -38,21 +36,18 @@ From the Tools menu, choose Driver Manager to add a driver. image::images/solr-jdbc-dbvisualizer/dbvisualizer_solrjdbc_1.png[image,width=673,height=400] -[[SolrJDBC-DbVisualizer-CreateaNewDriver]] === Create a New Driver image::images/solr-jdbc-dbvisualizer/dbvisualizer_solrjdbc_2.png[image,width=532,height=400] -[[SolrJDBC-DbVisualizer-NametheDriver]] -=== Name the Driver +=== Name the Driver in Driver Manager Provide a name for the driver, and provide the URL format: `jdbc:solr:///?collection=`. Do not fill in values for the variables "```zk_connection_string```" and "```collection```", those will be provided later when the connection to Solr is configured. The Driver Class will also be automatically added when the driver .jars are added. image::images/solr-jdbc-dbvisualizer/dbvisualizer_solrjdbc_3.png[image,width=532,height=400] -[[SolrJDBC-DbVisualizer-AddDriverFilestoClasspath]] === Add Driver Files to Classpath The driver files to be added are: @@ -75,17 +70,14 @@ image::images/solr-jdbc-dbvisualizer/dbvisualizer_solrjdbc_7.png[image,width=655 image::images/solr-jdbc-dbvisualizer/dbvisualizer_solrjdbc_9.png[image,width=651,height=400] -[[SolrJDBC-DbVisualizer-ReviewandCloseDriverManager]] === Review and Close Driver Manager Once the driver files have been added, you can close the Driver Manager. -[[SolrJDBC-DbVisualizer-CreateaConnection]] == Create a Connection Next, create a connection to Solr using the driver just created. -[[SolrJDBC-DbVisualizer-UsetheConnectionWizard]] === Use the Connection Wizard image::images/solr-jdbc-dbvisualizer/dbvisualizer_solrjdbc_11.png[image,width=763,height=400] @@ -94,19 +86,16 @@ image::images/solr-jdbc-dbvisualizer/dbvisualizer_solrjdbc_11.png[image,width=76 image::images/solr-jdbc-dbvisualizer/dbvisualizer_solrjdbc_12.png[image,width=807,height=400] -[[SolrJDBC-DbVisualizer-NametheConnection]] === Name the Connection image::images/solr-jdbc-dbvisualizer/dbvisualizer_solrjdbc_13.png[image,width=402,height=400] -[[SolrJDBC-DbVisualizer-SelecttheSolrdriver]] === Select the Solr driver image::images/solr-jdbc-dbvisualizer/dbvisualizer_solrjdbc_14.png[image,width=399,height=400] -[[SolrJDBC-DbVisualizer-SpecifytheSolrURL]] === Specify the Solr URL Provide the Solr URL, using the ZooKeeper host and port and the collection. For example, `jdbc:solr://localhost:9983?collection=test` @@ -114,7 +103,6 @@ Provide the Solr URL, using the ZooKeeper host and port and the collection. For image::images/solr-jdbc-dbvisualizer/dbvisualizer_solrjdbc_15.png[image,width=401,height=400] -[[SolrJDBC-DbVisualizer-OpenandConnecttoSolr]] == Open and Connect to Solr Once the connection has been created, double-click on it to open the connection details screen and connect to Solr. @@ -125,7 +113,6 @@ image::images/solr-jdbc-dbvisualizer/dbvisualizer_solrjdbc_16.png[image,width=62 image::images/solr-jdbc-dbvisualizer/dbvisualizer_solrjdbc_17.png[image,width=592,height=400] -[[SolrJDBC-DbVisualizer-OpenSQLCommandertoEnterQueries]] == Open SQL Commander to Enter Queries When the connection is established, you can use the SQL Commander to issue queries and view data. diff --git a/solr/solr-ref-guide/src/spatial-search.adoc b/solr/solr-ref-guide/src/spatial-search.adoc index 8b56c022f23..64d813fd107 100644 --- a/solr/solr-ref-guide/src/spatial-search.adoc +++ b/solr/solr-ref-guide/src/spatial-search.adoc @@ -42,7 +42,6 @@ There are four main field types available for spatial search: Some esoteric details that are not in this guide can be found at http://wiki.apache.org/solr/SpatialSearch. -[[SpatialSearch-LatLonPointSpatialField]] == LatLonPointSpatialField Here's how `LatLonPointSpatialField` (LLPSF) should usually be configured in the schema: @@ -52,7 +51,6 @@ Here's how `LatLonPointSpatialField` (LLPSF) should usually be configured in the LLPSF supports toggling `indexed`, `stored`, `docValues`, and `multiValued`. LLPSF internally uses a 2-dimensional Lucene "Points" (BDK tree) index when "indexed" is enabled (the default). When "docValues" is enabled, a latitude and longitudes pair are bit-interleaved into 64 bits and put into Lucene DocValues. The accuracy of the docValues data is about a centimeter. -[[SpatialSearch-IndexingPoints]] == Indexing Points For indexing geodetic points (latitude and longitude), supply it in "lat,lon" order (comma separated). @@ -61,7 +59,6 @@ For indexing non-geodetic points, it depends. Use `x y` (a space) if RPT. For Po If you'd rather use a standard industry format, Solr supports WKT and GeoJSON. However it's much bulkier than the raw coordinates for such simple data. (Not supported by the deprecated LatLonType or PointType) -[[SpatialSearch-SearchingwithQueryParsers]] == Searching with Query Parsers There are two spatial Solr "query parsers" for geospatial search: `geofilt` and `bbox`. They take the following parameters: @@ -100,7 +97,6 @@ When used with `BBoxField`, additional options are supported: (Advanced option; not supported by LatLonType (deprecated) or PointType). If you only want the query to score (with the above `score` local parameter), not filter, then set this local parameter to false. -[[SpatialSearch-geofilt]] === geofilt The `geofilt` filter allows you to retrieve results based on the geospatial distance (AKA the "great circle distance") from a given point. Another way of looking at it is that it creates a circular shape filter. For example, to find all documents within five kilometers of a given lat/lon point, you could enter `&q=*:*&fq={!geofilt sfield=store}&pt=45.15,-93.85&d=5`. This filter returns all results within a circle of the given radius around the initial point: @@ -108,7 +104,6 @@ The `geofilt` filter allows you to retrieve results based on the geospatial dist image::images/spatial-search/circle.png[5KM radius] -[[SpatialSearch-bbox]] === bbox The `bbox` filter is very similar to `geofilt` except it uses the _bounding box_ of the calculated circle. See the blue box in the diagram below. It takes the same parameters as geofilt. @@ -126,7 +121,6 @@ image::images/spatial-search/bbox.png[Bounding box] When a bounding box includes a pole, the bounding box ends up being a "bounding bowl" (a _spherical cap_) that includes all values north of the lowest latitude of the circle if it touches the north pole (or south of the highest latitude if it touches the south pole). ==== -[[SpatialSearch-Filteringbyanarbitraryrectangle]] === Filtering by an Arbitrary Rectangle Sometimes the spatial search requirement calls for finding everything in a rectangular area, such as the area covered by a map the user is looking at. For this case, geofilt and bbox won't cut it. This is somewhat of a trick, but you can use Solr's range query syntax for this by supplying the lower-left corner as the start of the range and the upper-right corner as the end of the range. @@ -138,7 +132,6 @@ Here's an example: LatLonType (deprecated) does *not* support rectangles that cross the dateline. For RPT and BBoxField, if you are non-geospatial coordinates (`geo="false"`) then you must quote the points due to the space, e.g. `"x y"`. -[[SpatialSearch-Optimizing_CacheorNot]] === Optimizing: Cache or Not It's most common to put a spatial query into an "fq" parameter – a filter query. By default, Solr will cache the query in the filter cache. @@ -149,7 +142,6 @@ If you know the filter query (be it spatial or not) is fairly unique and not lik LLPSF does not support Solr's "PostFilter". -[[SpatialSearch-DistanceSortingorBoosting_FunctionQueries_]] == Distance Sorting or Boosting (Function Queries) There are four distance function queries: @@ -161,7 +153,6 @@ There are four distance function queries: For more information about these function queries, see the section on <>. -[[SpatialSearch-geodist]] === geodist `geodist` is a distance function that takes three optional parameters: `(sfield,latitude,longitude)`. You can use the `geodist` function to sort results by distance or score return results. @@ -170,19 +161,16 @@ For example, to sort your results by ascending distance, enter `...&q=*:*&fq={!g To return the distance as the document score, enter `...&q={!func}geodist()&sfield=store&pt=45.15,-93.85&sort=score+asc`. -[[SpatialSearch-MoreExamples]] -== More Examples +== More Spatial Search Examples Here are a few more useful examples of what you can do with spatial search in Solr. -[[SpatialSearch-UseasaSub-QuerytoExpandSearchResults]] === Use as a Sub-Query to Expand Search Results Here we will query for results in Jacksonville, Florida, or within 50 kilometers of 45.15,-93.85 (near Buffalo, Minnesota): `&q=*:*&fq=(state:"FL" AND city:"Jacksonville") OR {!geofilt}&sfield=store&pt=45.15,-93.85&d=50&sort=geodist()+asc` -[[SpatialSearch-FacetbyDistance]] === Facet by Distance To facet by distance, you can use the Frange query parser: @@ -191,14 +179,12 @@ To facet by distance, you can use the Frange query parser: There are other ways to do it too, like using a \{!geofilt} in each facet.query. -[[SpatialSearch-BoostNearestResults]] === Boost Nearest Results Using the <> or <>, you can combine spatial search with the boost function to boost the nearest results: `&q.alt=*:*&fq={!geofilt}&sfield=store&pt=45.15,-93.85&d=50&bf=recip(geodist(),2,200,20)&sort=score desc` -[[SpatialSearch-RPT]] == RPT RPT refers to either `SpatialRecursivePrefixTreeFieldType` (aka simply RPT) and an extended version: `RptWithGeometrySpatialField` (aka RPT with Geometry). RPT offers several functional improvements over LatLonPointSpatialField: @@ -215,8 +201,7 @@ RPT _shares_ various features in common with `LatLonPointSpatialField`. Some are * Sort/boost via `geodist` * Well-Known-Text (WKT) shape syntax (required for specifying polygons & other complex shapes), and GeoJSON too. In addition to indexing and searching, this works with the `wt=geojson` (GeoJSON Solr response-writer) and `[geo f=myfield]` (geo Solr document-transformer). -[[SpatialSearch-Schemaconfiguration]] -=== Schema Configuration +=== Schema Configuration for RPT To use RPT, the field type must be registered and configured in `schema.xml`. There are many options for this field type. @@ -266,7 +251,6 @@ A third choice is `packedQuad`, which is generally more efficient than `quad`, p *_And there are others:_* `normWrapLongitude`, `datelineRule`, `validationRule`, `autoIndex`, `allowMultiOverlap`, `precisionModel`. For further info, see notes below about `spatialContextFactory` implementations referenced above, especially the link to the JTS based one. -[[SpatialSearch-JTSandPolygons]] === JTS and Polygons As indicated above, `spatialContextFactory` must be set to `JTS` for polygon support, including multi-polygon. @@ -297,7 +281,6 @@ Inside the parenthesis following the search predicate is the shape definition. T Beyond this Reference Guide and Spatila4j's docs, there are some details that remain at the Solr Wiki at http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4. -[[SpatialSearch-RptWithGeometrySpatialField]] === RptWithGeometrySpatialField The `RptWithGeometrySpatialField` field type is a derivative of `SpatialRecursivePrefixTreeFieldType` that also stores the original geometry internally in Lucene DocValues, which it uses to achieve accurate search. It can also be used for indexed point fields. The Intersects predicate (the default) is particularly fast, since many search results can be returned as an accurate hit without requiring a geometry check. This field type is configured just like RPT except that the default `distErrPct` is 0.15 (higher than 0.025) because the grid squares are purely for performance and not to fundamentally represent the shape. @@ -316,7 +299,6 @@ An optional in-memory cache can be defined in `solrconfig.xml`, which should be When using this field type, you will likely _not_ want to mark the field as stored because it's redundant with the DocValues data and surely larger because of the formatting (be it WKT or GeoJSON). To retrieve the spatial data in search results from DocValues, use the `[geo]` transformer -- <>. -[[SpatialSearch-HeatmapFaceting]] === Heatmap Faceting The RPT field supports generating a 2D grid of facet counts for documents having spatial data in each grid cell. For high-detail grids, this can be used to plot points, and for lesser detail it can be used for heatmap generation. The grid cells are determined at index-time based on RPT's configuration. At facet counting time, the indexed cells in the region of interest are traversed and a grid of counters corresponding to each cell are incremented. Solr can return the data in a straight-forward 2D array of integers or in a PNG which compresses better for larger data sets but must be decoded. @@ -365,7 +347,6 @@ The `counts_ints2D` key has a 2D array of integers. The initial outer level is i If `format=png` then the output key is `counts_png`. It's a base-64 encoded string of a 4-byte PNG. The PNG logically holds exactly the same data that the ints2D format does. Note that the alpha channel byte is flipped to make it easier to view the PNG for diagnostic purposes, since otherwise counts would have to exceed 2^24 before it becomes non-opague. Thus counts greater than this value will become opaque. -[[SpatialSearch-BBoxField]] == BBoxField The `BBoxField` field type indexes a single rectangle (bounding box) per document field and supports searching via a bounding box. It supports most spatial search predicates, it has enhanced relevancy modes based on the overlap or area between the search rectangle and the indexed rectangle. It's particularly useful for its relevancy modes. To configure it in the schema, use a configuration like this: diff --git a/solr/solr-ref-guide/src/spell-checking.adoc b/solr/solr-ref-guide/src/spell-checking.adoc index adb784a6e69..b46c8a1e096 100644 --- a/solr/solr-ref-guide/src/spell-checking.adoc +++ b/solr/solr-ref-guide/src/spell-checking.adoc @@ -212,7 +212,7 @@ This parameter turns on SpellCheck suggestions for the request. If *true*, then [[SpellChecking-Thespellcheck.qorqParameter]] === The spellcheck.q or q Parameter -This parameter specifies the query to spellcheck. If `spellcheck.q` is defined, then it is used; otherwise the original input query is used. The `spellcheck.q` parameter is intended to be the original query, minus any extra markup like field names, boosts, and so on. If the `q` parameter is specified, then the `SpellingQueryConverter` class is used to parse it into tokens; otherwise the <> is used. The choice of which one to use is up to the application. Essentially, if you have a spelling "ready" version in your application, then it is probably better to use `spellcheck.q`. Otherwise, if you just want Solr to do the job, use the `q` parameter. +This parameter specifies the query to spellcheck. If `spellcheck.q` is defined, then it is used; otherwise the original input query is used. The `spellcheck.q` parameter is intended to be the original query, minus any extra markup like field names, boosts, and so on. If the `q` parameter is specified, then the `SpellingQueryConverter` class is used to parse it into tokens; otherwise the <> is used. The choice of which one to use is up to the application. Essentially, if you have a spelling "ready" version in your application, then it is probably better to use `spellcheck.q`. Otherwise, if you just want Solr to do the job, use the `q` parameter. [NOTE] ==== diff --git a/solr/solr-ref-guide/src/stream-decorators.adoc b/solr/solr-ref-guide/src/stream-decorators.adoc index e65f18adb2a..4db4a8278c9 100644 --- a/solr/solr-ref-guide/src/stream-decorators.adoc +++ b/solr/solr-ref-guide/src/stream-decorators.adoc @@ -382,7 +382,7 @@ cartesianProduct( } ---- -As you can see in the examples above, the `cartesianProduct` function does support flattening tuples across multiple fields and/or evaluators. +As you can see in the examples above, the `cartesianProduct` function does support flattening tuples across multiple fields and/or evaluators. == classify @@ -615,8 +615,6 @@ eval(expr) In the example above the `eval` expression reads the first tuple from the underlying expression. It then compiles and executes the string Streaming Expression in the epxr_s field. - -[[StreamingExpressions-executor]] == executor The `executor` function wraps a stream source that contains streaming expressions, and executes the expressions in parallel. The `executor` function looks for the expression in the `expr_s` field in each tuple. The `executor` function has an internal thread pool that runs tasks that compile and run expressions in parallel on the same worker node. This function can also be parallelized across worker nodes by wrapping it in the <> function to provide parallel execution of expressions across a cluster. @@ -984,7 +982,6 @@ The worker nodes can be from the same collection as the data, or they can be a d * `zkHost`: (Optional) The ZooKeeper connect string where the worker collection resides. * `sort`: The sort criteria for ordering tuples returned by the worker nodes. -[[StreamingExpressions-Syntax.25]] === parallel Syntax [source,text] @@ -1000,10 +997,9 @@ The worker nodes can be from the same collection as the data, or they can be a d The expression above shows a `parallel` function wrapping a `reduce` function. This will cause the `reduce` function to be run in parallel across 20 worker nodes. -[[StreamingExpressions-priority]] == priority -The `priority` function is a simple priority scheduler for the <> function. The executor function doesn't directly have a concept of task prioritization; instead it simply executes tasks in the order that they are read from it's underlying stream. The `priority` function provides the ability to schedule a higher priority task ahead of lower priority tasks that were submitted earlier. +The `priority` function is a simple priority scheduler for the <> function. The `executor` function doesn't directly have a concept of task prioritization; instead it simply executes tasks in the order that they are read from it's underlying stream. The `priority` function provides the ability to schedule a higher priority task ahead of lower priority tasks that were submitted earlier. The `priority` function wraps two <> that are both emitting tuples that contain streaming expressions to execute. The first topic is considered the higher priority task queue. @@ -1011,14 +1007,12 @@ Each time the `priority` function is called, it checks the higher priority task The `priority` function will only emit a batch of tasks from one of the queues each time it is called. This ensures that no lower priority tasks are executed until the higher priority queue has no tasks to run. -[[StreamingExpressions-Parameters.25]] -=== Parameters +=== priority Parameters * `topic expression`: (Mandatory) the high priority task queue * `topic expression`: (Mandatory) the lower priority task queue -[[StreamingExpressions-Syntax.26]] -=== Syntax +=== priority Syntax [source,text] ---- @@ -1092,7 +1086,7 @@ The example about shows the rollup function wrapping the search function. Notice == scoreNodes -See section in <>. +See section in <>. == select diff --git a/solr/solr-ref-guide/src/streaming-expressions.adoc b/solr/solr-ref-guide/src/streaming-expressions.adoc index 5ea3dd928dd..1474aaae397 100644 --- a/solr/solr-ref-guide/src/streaming-expressions.adoc +++ b/solr/solr-ref-guide/src/streaming-expressions.adoc @@ -46,7 +46,6 @@ Streams from outside systems can be joined with streams originating from Solr an Both streaming expressions and the streaming API are considered experimental, and the APIs are subject to change. ==== -[[StreamingExpressions-StreamLanguageBasics]] == Stream Language Basics Streaming Expressions are comprised of streaming functions which work with a Solr collection. They emit a stream of tuples (key/value Maps). @@ -55,7 +54,6 @@ Many of the provided streaming functions are designed to work with entire result Some streaming functions act as stream sources to originate the stream flow. Other streaming functions act as stream decorators to wrap other stream functions and perform operations on the stream of tuples. Many streams functions can be parallelized across a worker collection. This can be particularly powerful for relational algebra functions. -[[StreamingExpressions-StreamingRequestsandResponses]] === Streaming Requests and Responses Solr has a `/stream` request handler that takes streaming expression requests and returns the tuples as a JSON stream. This request handler is implicitly defined, meaning there is nothing that has to be defined in `solrconfig.xml` - see <>. @@ -112,7 +110,6 @@ StreamFactory streamFactory = new StreamFactory().withCollectionZkHost("collecti ParallelStream pstream = (ParallelStream)streamFactory.constructStream("parallel(collection1, group(search(collection1, q=\"*:*\", fl=\"id,a_s,a_i,a_f\", sort=\"a_s asc,a_f asc\", partitionKeys=\"a_s\"), by=\"a_s asc\"), workers=\"2\", zkHost=\""+zkHost+"\", sort=\"a_s asc\")"); ---- -[[StreamingExpressions-DataRequirements]] === Data Requirements Because streaming expressions relies on the `/export` handler, many of the field and field type requirements to use `/export` are also requirements for `/stream`, particularly for `sort` and `fl` parameters. Please see the section <> for details. diff --git a/solr/solr-ref-guide/src/suggester.adoc b/solr/solr-ref-guide/src/suggester.adoc index 61bf53d0a20..1950fc7dcab 100644 --- a/solr/solr-ref-guide/src/suggester.adoc +++ b/solr/solr-ref-guide/src/suggester.adoc @@ -32,15 +32,11 @@ The main features of this Suggester are: * Term dictionary pluggability, giving you the flexibility to choose the dictionary implementation * Distributed support -The `solrconfig.xml` found in Solr's "```techproducts```" example has the new Suggester implementation configured already. For more on search components, see the section <>. - -[[Suggester-ConfiguringSuggesterinsolrconfig.xml]] -== Configuring Suggester in solrconfig.xml +The `solrconfig.xml` found in Solr's "```techproducts```" example has a Suggester implementation configured already. For more on search components, see the section <>. The "```techproducts```" example `solrconfig.xml` has a `suggest` search component and a `/suggest` request handler already configured. You can use that as the basis for your configuration, or create it from scratch, as detailed below. -[[Suggester-AddingtheSuggestSearchComponent]] -=== Adding the Suggest Search Component +== Adding the Suggest Search Component The first step is to add a search component to `solrconfig.xml` and tell it to use the SuggestComponent. Here is some sample code that could be used. @@ -59,25 +55,33 @@ The first step is to add a search component to `solrconfig.xml` and tell it to u ---- -[[Suggester-SuggesterSearchComponentParameters]] -==== Suggester Search Component Parameters +=== Suggester Search Component Parameters -The Suggester search component takes several configuration parameters. The choice of the lookup implementation (`lookupImpl`, how terms are found in the suggestion dictionary) and the dictionary implementation (`dictionaryImpl`, how terms are stored in the suggestion dictionary) will dictate some of the parameters required. Below are the main parameters that can be used no matter what lookup or dictionary implementation is used. In the following sections additional parameters are provided for each implementation. +The Suggester search component takes several configuration parameters. -// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed +The choice of the lookup implementation (`lookupImpl`, how terms are found in the suggestion dictionary) and the dictionary implementation (`dictionaryImpl`, how terms are stored in the suggestion dictionary) will dictate some of the parameters required. -[cols="30,70",options="header"] -|=== -|Parameter |Description -|searchComponent name |Arbitrary name for the search component. -|name |A symbolic name for this suggester. You can refer to this name in the URL parameters and in the SearchHandler configuration. It is possible to have mutiples of these -|lookupImpl |Lookup implementation. There are several possible implementations, described below in the section <>. If not set, the default lookup is JaspellLookupFactory. -|dictionaryImpl |The dictionary implementation to use. There are several possible implementations, described below in the section <> . If not set, the default dictionary implementation is HighFrequencyDictionaryFactory unless a `sourceLocation` is used, in which case, the dictionary implementation will be FileDictionaryFactory -|field a| -A field from the index to use as the basis of suggestion terms. If `sourceLocation` is empty (meaning any dictionary implementation other than FileDictionaryFactory) then terms from this field in the index will be used. +Below are the main parameters that can be used no matter what lookup or dictionary implementation is used. In the following sections additional parameters are provided for each implementation. -To be used as the basis for a suggestion, the field must be stored. You may want to <> to create a special 'suggest' field comprised of terms from other fields in documents. In any event, you likely want a minimal amount of analysis on the field, so an additional option is to create a field type in your schema that only uses basic tokenizers or filters. One option for such a field type is shown here: +`searchComponent name`:: +Arbitrary name for the search component. +`name`:: +A symbolic name for this suggester. You can refer to this name in the URL parameters and in the SearchHandler configuration. It is possible to have multiples of these in one `solrconfig.xml` file. + +`lookupImpl`:: +Lookup implementation. There are several possible implementations, described below in the section <>. If not set, the default lookup is `JaspellLookupFactory`. + +`dictionaryImpl`:: +The dictionary implementation to use. There are several possible implementations, described below in the section <>. ++ +If not set, the default dictionary implementation is `HighFrequencyDictionaryFactory`. However, if a `sourceLocation` is used, the dictionary implementation will be `FileDictionaryFactory`. + +`field`:: +A field from the index to use as the basis of suggestion terms. If `sourceLocation` is empty (meaning any dictionary implementation other than `FileDictionaryFactory`), then terms from this field in the index will be used. ++ +To be used as the basis for a suggestion, the field must be stored. You may want to <> to create a special 'suggest' field comprised of terms from other fields in documents. In any event, you very likely want a minimal amount of analysis on the field, so an additional option is to create a field type in your schema that only uses basic tokenizers or filters. One option for such a field type is shown here: ++ [source,xml] ---- @@ -88,154 +92,211 @@ To be used as the basis for a suggestion, the field must be stored. You may want ---- ++ +However, this minimal analysis is not required if you want more analysis to occur on terms. If using the `AnalyzingLookupFactory` as your `lookupImpl`, however, you have the option of defining the field type rules to use for index and query time analysis. -However, this minimal analysis is not required if you want more analysis to occur on terms. If using the AnalyzingLookupFactory as your lookupImpl, however, you have the option of defining the field type rules to use for index and query time analysis. +`sourceLocation`:: +The path to the dictionary file if using the `FileDictionaryFactory`. If this value is empty then the main index will be used as a source of terms and weights. -|sourceLocation |The path to the dictionary file if using the FileDictionaryFactory. If this value is empty then the main index will be used as a source of terms and weights. -|storeDir |The location to store the dictionary file. -|buildOnCommit or buildOnOptimize |If true then the lookup data structure will be rebuilt after soft-commit. If false, the default, then the lookup data will be built only when requested by URL parameter `suggest.build=true`. Use `buildOnCommit` to rebuild the dictionary with every soft-commit, or `buildOnOptimize` to build the dictionary only when the index is optimized. Some lookup implementations may take a long time to build, specially with large indexes, in such cases, using buildOnCommit or buildOnOptimize, particularly with a high frequency of softCommits is not recommended, and it's recommended instead to build the suggester at a lower frequency by manually issuing requests with `suggest.build=true`. -|buildOnStartup |If true then the lookup data structure will be built when Solr starts or when the core is reloaded. If this parameter is not specified, the suggester will check if the lookup data structure is present on disk and build it if not found. Enabling this to true could lead to the core talking longer to load (or reload) as the suggester data structure needs to be built, which can sometimes take a long time. It’s usually preferred to have this setting set to 'false' and build suggesters manually issuing requests with `suggest.build=true`. -|=== +`storeDir`:: +The location to store the dictionary file. -[[Suggester-LookupImplementations]] -==== Lookup Implementations +`buildOnCommit` and `buildOnOptimize`:: +If `true`, the lookup data structure will be rebuilt after soft-commit. If `false`, the default, then the lookup data will be built only when requested by URL parameter `suggest.build=true`. Use `buildOnCommit` to rebuild the dictionary with every soft-commit, or `buildOnOptimize` to build the dictionary only when the index is optimized. ++ +Some lookup implementations may take a long time to build, especially with large indexes. In such cases, using `buildOnCommit` or `buildOnOptimize`, particularly with a high frequency of softCommits is not recommended; it's recommended instead to build the suggester at a lower frequency by manually issuing requests with `suggest.build=true`. + +`buildOnStartup`:: +If `true,` then the lookup data structure will be built when Solr starts or when the core is reloaded. If this parameter is not specified, the suggester will check if the lookup data structure is present on disk and build it if not found. ++ +Enabling this to `true` could lead to the core talking longer to load (or reload) as the suggester data structure needs to be built, which can sometimes take a long time. It’s usually preferred to have this setting set to `false`, the default, and build suggesters manually issuing requests with `suggest.build=true`. + +=== Lookup Implementations The `lookupImpl` parameter defines the algorithms used to look up terms in the suggest index. There are several possible implementations to choose from, and some require additional parameters to be configured. -[[Suggester-AnalyzingLookupFactory]] -===== AnalyzingLookupFactory +==== AnalyzingLookupFactory A lookup that first analyzes the incoming text and adds the analyzed form to a weighted FST, and then does the same thing at lookup time. This implementation uses the following additional properties: -* suggestAnalyzerFieldType: The field type to use for the query-time and build-time term suggestion analysis. -* exactMatchFirst: If true, the default, exact suggestions are returned first, even if they are prefixes or other strings in the FST have larger weights. -* preserveSep: If true, the default, then a separator between tokens is preserved. This means that suggestions are sensitive to tokenization (e.g., baseball is different from base ball). -* preservePositionIncrements: If true, the suggester will preserve position increments. This means that token filters which leave gaps (for example, when StopFilter matches a stopword) the position would be respected when building the suggester. The default is false. +`suggestAnalyzerFieldType`:: +The field type to use for the query-time and build-time term suggestion analysis. -[[Suggester-FuzzyLookupFactory]] -===== FuzzyLookupFactory +`exactMatchFirst`:: +If `true`, the default, exact suggestions are returned first, even if they are prefixes or other strings in the FST have larger weights. + +`preserveSep`:: +If `true`, the default, then a separator between tokens is preserved. This means that suggestions are sensitive to tokenization (e.g., baseball is different from base ball). + +`preservePositionIncrements`:: +If `true`, the suggester will preserve position increments. This means that token filters which leave gaps (for example, when StopFilter matches a stopword) the position would be respected when building the suggester. The default is `false`. + +==== FuzzyLookupFactory This is a suggester which is an extension of the AnalyzingSuggester but is fuzzy in nature. The similarity is measured by the Levenshtein algorithm. This implementation uses the following additional properties: -* exactMatchFirst: If true, the default, exact suggestions are returned first, even if they are prefixes or other strings in the FST have larger weights. -* preserveSep: If true, the default, then a separator between tokens is preserved. This means that suggestions are sensitive to tokenization (e.g., baseball is different from base ball). -* maxSurfaceFormsPerAnalyzedForm: Maximum number of surface forms to keep for a single analyzed form. When there are too many surface forms we discard the lowest weighted ones. -* maxGraphExpansions: When building the FST ("index-time"), we add each path through the tokenstream graph as an individual entry. This places an upper-bound on how many expansions will be added for a single suggestion. The default is -1 which means there is no limit. -* preservePositionIncrements: If true, the suggester will preserve position increments. This means that token filters which leave gaps (for example, when StopFilter matches a stopword) the position would be respected when building the suggester. The default is false. -* maxEdits: The maximum number of string edits allowed. The systems hard limit is 2. The default is 1. -* transpositions: If true, the default, transpositions should be treated as a primitive edit operation. -* nonFuzzyPrefix: The length of the common non fuzzy prefix match which must match a suggestion. The default is 1. -* minFuzzyLength: The minimum length of query before which any string edits will be allowed. The default is 3. -* unicodeAware: If true, maxEdits, minFuzzyLength, transpositions and nonFuzzyPrefix parameters will be measured in unicode code points (actual letters) instead of bytes. The default is false. +`exactMatchFirst`:: +If `true`, the default, exact suggestions are returned first, even if they are prefixes or other strings in the FST have larger weights. -[[Suggester-AnalyzingInfixLookupFactory]] -===== AnalyzingInfixLookupFactory +`preserveSep`:: +If `true`, the default, then a separator between tokens is preserved. This means that suggestions are sensitive to tokenization (e.g., baseball is different from base ball). + +`maxSurfaceFormsPerAnalyzedForm`:: +The maximum number of surface forms to keep for a single analyzed form. When there are too many surface forms we discard the lowest weighted ones. + +`maxGraphExpansions`:: +When building the FST ("index-time"), we add each path through the tokenstream graph as an individual entry. This places an upper-bound on how many expansions will be added for a single suggestion. The default is `-1` which means there is no limit. + +`preservePositionIncrements`:: +If `true`, the suggester will preserve position increments. This means that token filters which leave gaps (for example, when StopFilter matches a stopword) the position would be respected when building the suggester. The default is `false`. + +`maxEdits`:: +The maximum number of string edits allowed. The system's hard limit is `2`. The default is `1`. + +`transpositions`:: +If `true`, the default, transpositions should be treated as a primitive edit operation. + +`nonFuzzyPrefix`:: +The length of the common non fuzzy prefix match which must match a suggestion. The default is `1`. + +`minFuzzyLength`:: +The minimum length of query before which any string edits will be allowed. The default is `3`. + +`unicodeAware`:: +If `true`, the `maxEdits`, `minFuzzyLength`, `transpositions` and `nonFuzzyPrefix` parameters will be measured in unicode code points (actual letters) instead of bytes. The default is `false`. + +==== AnalyzingInfixLookupFactory Analyzes the input text and then suggests matches based on prefix matches to any tokens in the indexed text. This uses a Lucene index for its dictionary. This implementation uses the following additional properties. -* indexPath: When using AnalyzingInfixSuggester you can provide your own path where the index will get built. The default is analyzingInfixSuggesterIndexDir and will be created in your collections data directory. -* minPrefixChars: Minimum number of leading characters before PrefixQuery is used (default is 4). Prefixes shorter than this are indexed as character ngrams (increasing index size but making lookups faster). -* allTermsRequired: Boolean option for multiple terms. Default is true - all terms required. -* highlight: Highlight suggest terms. Default is true. +`indexPath`:: +When using `AnalyzingInfixSuggester` you can provide your own path where the index will get built. The default is `analyzingInfixSuggesterIndexDir` and will be created in your collection's `data/` directory. -This implementation supports <>. +`minPrefixChars`:: +Minimum number of leading characters before PrefixQuery is used (default is `4`). Prefixes shorter than this are indexed as character ngrams (increasing index size but making lookups faster). -[[Suggester-BlendedInfixLookupFactory]] -===== BlendedInfixLookupFactory +`allTermsRequired`:: +Boolean option for multiple terms. The default is `true`, all terms will be required. -An extension of the AnalyzingInfixSuggester which provides additional functionality to weight prefix matches across the matched documents. You can tell it to score higher if a hit is closer to the start of the suggestion or vice versa. +`highlight`:: +Highlight suggest terms. Default is `true`. + +This implementation supports <>. + +==== BlendedInfixLookupFactory + +An extension of the `AnalyzingInfixSuggester` which provides additional functionality to weight prefix matches across the matched documents. You can tell it to score higher if a hit is closer to the start of the suggestion or vice versa. This implementation uses the following additional properties: -* blenderType: used to calculate weight coefficient using the position of the first matching word. Can be one of: -** position_linear: weightFieldValue*(1 - 0.10*position): Matches to the start will be given a higher score (Default) -** position_reciprocal: weightFieldValue/(1+position): Matches to the end will be given a higher score. -*** exponent: an optional configuration variable for the position_reciprocal blenderType used to control how fast the score will increase or decrease. Default 2.0. -* numFactor: The factor to multiply the number of searched elements from which results will be pruned. Default is 10. -* indexPath: When using BlendedInfixSuggester you can provide your own path where the index will get built. The default directory name is blendedInfixSuggesterIndexDir and will be created in your collections data directory. -* minPrefixChars: Minimum number of leading characters before PrefixQuery is used (default 4). Prefixes shorter than this are indexed as character ngrams (increasing index size but making lookups faster). +`blenderType`:: +Used to calculate weight coefficient using the position of the first matching word. Can be one of: +`position_linear`::: +`weightFieldValue*(1 - 0.10*position)`: Matches to the start will be given a higher score. This is the default. +`position_reciprocal`::: +`weightFieldValue/(1+position)`: Matches to the end will be given a higher score. +`exponent`:::: +An optional configuration variable for the position_reciprocal blenderType used to control how fast the score will increase or decrease. Default `2.0`. -This implementation supports <> . +`numFactor`:: +The factor to multiply the number of searched elements from which results will be pruned. Default is `10`. -[[Suggester-FreeTextLookupFactory]] -===== FreeTextLookupFactory +`indexPath`:: +When using `BlendedInfixSuggester` you can provide your own path where the index will get built. The default directory name is `blendedInfixSuggesterIndexDir` and will be created in your collections data directory. + +`minPrefixChars`:: +Minimum number of leading characters before PrefixQuery is used (the default is `4`). Prefixes shorter than this are indexed as character ngrams (increasing index size but making lookups faster). + +This implementation supports <> . + +==== FreeTextLookupFactory It looks at the last tokens plus the prefix of whatever final token the user is typing, if present, to predict the most likely next token. The number of previous tokens that need to be considered can also be specified. This suggester would only be used as a fallback, when the primary suggester fails to find any suggestions. This implementation uses the following additional properties: -* suggestFreeTextAnalyzerFieldType: The analyzer used at "query-time" and "build-time" to analyze suggestions. This field is required. -* ngrams: The max number of tokens out of which singles will be make the dictionary. The default value is 2. Increasing this would mean you want more than the previous 2 tokens to be taken into consideration when making the suggestions. +`suggestFreeTextAnalyzerFieldType`:: +The analyzer used at "query-time" and "build-time" to analyze suggestions. This parameter is required. -[[Suggester-FSTLookupFactory]] -===== FSTLookupFactory +`ngrams`:: +The max number of tokens out of which singles will be made the dictionary. The default value is `2`. Increasing this would mean you want more than the previous 2 tokens to be taken into consideration when making the suggestions. + +==== FSTLookupFactory An automaton-based lookup. This implementation is slower to build, but provides the lowest memory cost. We recommend using this implementation unless you need more sophisticated matching results, in which case you should use the Jaspell implementation. This implementation uses the following additional properties: -* exactMatchFirst: If true, the default, exact suggestions are returned first, even if they are prefixes or other strings in the FST have larger weights. -* weightBuckets: The number of separate buckets for weights which the suggester will use while building its dictionary. +`exactMatchFirst`:: +If `true`, the default, exact suggestions are returned first, even if they are prefixes or other strings in the FST have larger weights. -[[Suggester-TSTLookupFactory]] -===== TSTLookupFactory +`weightBuckets`:: +The number of separate buckets for weights which the suggester will use while building its dictionary. + +==== TSTLookupFactory A simple compact ternary trie based lookup. -[[Suggester-WFSTLookupFactory]] -===== WFSTLookupFactory +==== WFSTLookupFactory -A weighted automaton representation which is an alternative to FSTLookup for more fine-grained ranking. WFSTLookup does not use buckets, but instead a shortest path algorithm. Note that it expects weights to be whole numbers. If weight is missing it's assumed to be 1.0. Weights affect the sorting of matching suggestions when `spellcheck.onlyMorePopular=true` is selected: weights are treated as "popularity" score, with higher weights preferred over suggestions with lower weights. +A weighted automaton representation which is an alternative to `FSTLookup` for more fine-grained ranking. `WFSTLookup` does not use buckets, but instead a shortest path algorithm. -[[Suggester-JaspellLookupFactory]] -===== JaspellLookupFactory +Note that it expects weights to be whole numbers. If weight is missing it's assumed to be `1.0`. Weights affect the sorting of matching suggestions when `spellcheck.onlyMorePopular=true` is selected: weights are treated as "popularity" score, with higher weights preferred over suggestions with lower weights. + +==== JaspellLookupFactory A more complex lookup based on a ternary trie from the http://jaspell.sourceforge.net/[JaSpell] project. Use this implementation if you need more sophisticated matching results. -[[Suggester-DictionaryImplementations]] -==== Dictionary Implementations +=== Dictionary Implementations The dictionary implementations define how terms are stored. There are several options, and multiple dictionaries can be used in a single request if necessary. -[[Suggester-DocumentDictionaryFactory]] -===== DocumentDictionaryFactory +==== DocumentDictionaryFactory A dictionary with terms, weights, and an optional payload taken from the index. This dictionary implementation takes the following parameters in addition to parameters described for the Suggester generally and for the lookup implementation: -* weightField: A field that is stored or a numeric DocValue field. This field is optional. -* payloadField: The payloadField should be a field that is stored. This field is optional. -* contextField: Field to be used for context filtering. Note that only some lookup implementations support filtering. +`weightField`:: +A field that is stored or a numeric DocValue field. This parameter is optional. -[[Suggester-DocumentExpressionDictionaryFactory]] -===== DocumentExpressionDictionaryFactory +`payloadField`:: +The `payloadField` should be a field that is stored. This parameter is optional. -This dictionary implementation is the same as the DocumentDictionaryFactory but allows users to specify an arbitrary expression into the 'weightExpression' tag. +`contextField`:: +Field to be used for context filtering. Note that only some lookup implementations support filtering. + +==== DocumentExpressionDictionaryFactory + +This dictionary implementation is the same as the `DocumentDictionaryFactory` but allows users to specify an arbitrary expression into the `weightExpression` tag. This dictionary implementation takes the following parameters in addition to parameters described for the Suggester generally and for the lookup implementation: -* payloadField: The payloadField should be a field that is stored. This field is optional. -* weightExpression: An arbitrary expression used for scoring the suggestions. The fields used must be numeric fields. This field is required. -* contextField: Field to be used for context filtering. Note that only some lookup implementations support filtering. +`payloadField`:: +The `payloadField` should be a field that is stored. This parameter is optional. -[[Suggester-HighFrequencyDictionaryFactory]] -===== HighFrequencyDictionaryFactory +`weightExpression`:: +An arbitrary expression used for scoring the suggestions. The fields used must be numeric fields. This parameter is required. + +`contextField`:: +Field to be used for context filtering. Note that only some lookup implementations support filtering. + +==== HighFrequencyDictionaryFactory This dictionary implementation allows adding a threshold to prune out less frequent terms in cases where very common terms may overwhelm other terms. This dictionary implementation takes one parameter in addition to parameters described for the Suggester generally and for the lookup implementation: -* threshold: A value between zero and one representing the minimum fraction of the total documents where a term should appear in order to be added to the lookup dictionary. +`threshold`:: +A value between zero and one representing the minimum fraction of the total documents where a term should appear in order to be added to the lookup dictionary. -[[Suggester-FileDictionaryFactory]] -===== FileDictionaryFactory +==== FileDictionaryFactory This dictionary implementation allows using an external file that contains suggest entries. Weights and payloads can also be used. @@ -243,8 +304,9 @@ If using a dictionary file, it should be a plain text file in UTF-8 encoding. Yo This dictionary implementation takes one parameter in addition to parameters described for the Suggester generally and for the lookup implementation: -fieldDelimiter:: Specify the delimiter to be used separating the entries, weights and payloads. The default is tab ('\t'). - +`fieldDelimiter`:: +Specifies the delimiter to be used separating the entries, weights and payloads. The default is tab (`\t`). ++ .Example File [source,text] ---- @@ -253,8 +315,7 @@ accidentally 2.0 accommodate 3.0 ---- -[[Suggester-MultipleDictionaries]] -==== Multiple Dictionaries +=== Multiple Dictionaries It is possible to include multiple `dictionaryImpl` definitions in a single SuggestComponent definition. @@ -285,10 +346,9 @@ To do this, simply define separate suggesters, as in this example: ---- -When using these Suggesters in a query, you would define multiple 'suggest.dictionary' parameters in the request, referring to the names given for each Suggester in the search component definition. The response will include the terms in sections for each Suggester. See the <> section below for an example request and response. +When using these Suggesters in a query, you would define multiple `suggest.dictionary` parameters in the request, referring to the names given for each Suggester in the search component definition. The response will include the terms in sections for each Suggester. See the <> section below for an example request and response. -[[Suggester-AddingtheSuggestRequestHandler]] -=== Adding the Suggest Request Handler +== Adding the Suggest Request Handler After adding the search component, a request handler must be added to `solrconfig.xml`. This request handler works the <>, and allows you to configure default parameters for serving suggestion requests. The request handler definition must incorporate the "suggest" search component defined previously. @@ -305,42 +365,50 @@ After adding the search component, a request handler must be added to `solrconfi ---- -[[Suggester-SuggestRequestHandlerParameters]] -==== Suggest Request Handler Parameters +=== Suggest Request Handler Parameters The following parameters allow you to set defaults for the Suggest request handler: -// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed +`suggest=true`:: +This parameter should always be `true`, because we always want to run the Suggester for queries submitted to this handler. -[cols="30,70",options="header"] -|=== -|Parameter |Description -|suggest=true |This parameter should always be true, because we always want to run the Suggester for queries submitted to this handler. -|suggest.dictionary |The name of the dictionary component configured in the search component. This is a mandatory parameter. It can be set in the request handler, or sent as a parameter at query time. -|suggest.q |The query to use for suggestion lookups. -|suggest.count |Specifies the number of suggestions for Solr to return. -|suggest.cfq |A Context Filter Query used to filter suggestions based on the context field, if supported by the suggester. -|suggest.build |If true, it will build the suggester index. This is likely useful only for initial requests; you would probably not want to build the dictionary on every request, particularly in a production system. If you would like to keep your dictionary up to date, you should use the `buildOnCommit` or `buildOnOptimize` parameter for the search component. -|suggest.reload |If true, it will reload the suggester index. -|suggest.buildAll |If true, it will build all suggester indexes. -|suggest.reloadAll |If true, it will reload all suggester indexes. -|=== +`suggest.dictionary`:: +The name of the dictionary component configured in the search component. This is a mandatory parameter. It can be set in the request handler, or sent as a parameter at query time. + +`suggest.q`:: +The query to use for suggestion lookups. + +`suggest.count`:: +Specifies the number of suggestions for Solr to return. + +`suggest.cfq`:: +A Context Filter Query used to filter suggestions based on the context field, if supported by the suggester. + +`suggest.build`:: +If `true`, it will build the suggester index. This is likely useful only for initial requests; you would probably not want to build the dictionary on every request, particularly in a production system. If you would like to keep your dictionary up to date, you should use the `buildOnCommit` or `buildOnOptimize` parameter for the search component. + +`suggest.reload`:: +If `true`, it will reload the suggester index. + +`suggest.buildAll`:: +If `true`, it will build all suggester indexes. + +`suggest.reloadAll`:: +If `true`, it will reload all suggester indexes. These properties can also be overridden at query time, or not set in the request handler at all and always sent at query time. .Context Filtering [IMPORTANT] ==== -Context filtering (`suggest.cfq`) is currently only supported by AnalyzingInfixLookupFactory and BlendedInfixLookupFactory, and only when backed by a Document*Dictionary. All other implementations will return unfiltered matches as if filtering was not requested. +Context filtering (`suggest.cfq`) is currently only supported by `AnalyzingInfixLookupFactory` and `BlendedInfixLookupFactory`, and only when backed by a `Document*Dictionary`. All other implementations will return unfiltered matches as if filtering was not requested. ==== -[[Suggester-ExampleUsages]] == Example Usages -[[Suggester-GetSuggestionswithWeights]] === Get Suggestions with Weights -This is the basic suggestion using a single dictionary and a single Solr core. +This is a basic suggestion using a single dictionary and a single Solr core. Example query: @@ -349,7 +417,7 @@ Example query: http://localhost:8983/solr/techproducts/suggest?suggest=true&suggest.build=true&suggest.dictionary=mySuggester&wt=json&suggest.q=elec ---- -In this example, we've simply requested the string 'elec' with the suggest.q parameter and requested that the suggestion dictionary be built with suggest.build (note, however, that you would likely not want to build the index on every query - instead you should use buildOnCommit or buildOnOptimize if you have regularly changing documents). +In this example, we've simply requested the string 'elec' with the `suggest.q` parameter and requested that the suggestion dictionary be built with `suggest.build` (note, however, that you would likely not want to build the index on every query - instead you should use `buildOnCommit` or `buildOnOptimize` if you have regularly changing documents). Example response: @@ -388,8 +456,7 @@ Example response: } ---- -[[Suggester-MultipleDictionaries.1]] -=== Multiple Dictionaries +=== Using Multiple Dictionaries If you have defined multiple dictionaries, you can use them in queries. @@ -401,7 +468,7 @@ http://localhost:8983/solr/techproducts/suggest?suggest=true& \ suggest.dictionary=mySuggester&suggest.dictionary=altSuggester&wt=json&suggest.q=elec ---- -In this example we have sent the string 'elec' as the suggest.q parameter and named two suggest.dictionary definitions to be used. +In this example we have sent the string 'elec' as the `suggest.q` parameter and named two `suggest.dictionary` definitions to be used. Example response: @@ -441,10 +508,9 @@ Example response: } ---- -[[Suggester-ContextFiltering]] === Context Filtering -Context filtering lets you filter suggestions by a separate context field, such as category, department or any other token. The AnalyzingInfixLookupFactory and BlendedInfixLookupFactory currently support this feature, when backed by DocumentDictionaryFactory. +Context filtering lets you filter suggestions by a separate context field, such as category, department or any other token. The `AnalyzingInfixLookupFactory` and `BlendedInfixLookupFactory` currently support this feature, when backed by `DocumentDictionaryFactory`. Add `contextField` to your suggester configuration. This example will suggest names and allow to filter by category: @@ -469,8 +535,7 @@ Example context filtering suggest query: [source,text] ---- -http://localhost:8983/solr/techproducts/suggest?suggest=true&suggest.build=true& \ - suggest.dictionary=mySuggester&wt=json&suggest.q=c&suggest.cfq=memory +http://localhost:8983/solr/techproducts/suggest?suggest=true&suggest.build=true&suggest.dictionary=mySuggester&wt=json&suggest.q=c&suggest.cfq=memory ---- -The suggester will only bring back suggestions for products tagged with cat=memory. +The suggester will only bring back suggestions for products tagged with 'cat=memory'. diff --git a/solr/solr-ref-guide/src/taking-solr-to-production.adoc b/solr/solr-ref-guide/src/taking-solr-to-production.adoc index 97634101529..fe7ed082ed8 100644 --- a/solr/solr-ref-guide/src/taking-solr-to-production.adoc +++ b/solr/solr-ref-guide/src/taking-solr-to-production.adoc @@ -20,17 +20,14 @@ This section provides guidance on how to setup Solr to run in production on *nix platforms, such as Ubuntu. Specifically, we’ll walk through the process of setting up to run a single Solr instance on a Linux host and then provide tips on how to support multiple Solr nodes running on the same host. -[[TakingSolrtoProduction-ServiceInstallationScript]] == Service Installation Script Solr includes a service installation script (`bin/install_solr_service.sh`) to help you install Solr as a service on Linux. Currently, the script only supports CentOS, Debian, Red Hat, SUSE and Ubuntu Linux distributions. Before running the script, you need to determine a few parameters about your setup. Specifically, you need to decide where to install Solr and which system user should be the owner of the Solr files and process. -[[TakingSolrtoProduction-Planningyourdirectorystructure]] === Planning Your Directory Structure We recommend separating your live Solr files, such as logs and index files, from the files included in the Solr distribution bundle, as that makes it easier to upgrade Solr and is considered a good practice to follow as a system administrator. -[[TakingSolrtoProduction-SolrInstallationDirectory]] ==== Solr Installation Directory By default, the service installation script will extract the distribution archive into `/opt`. You can change this location using the `-i` option when running the installation script. The script will also create a symbolic link to the versioned directory of Solr. For instance, if you run the installation script for Solr {solr-docs-version}.0, then the following directory structure will be used: @@ -43,19 +40,16 @@ By default, the service installation script will extract the distribution archiv Using a symbolic link insulates any scripts from being dependent on the specific Solr version. If, down the road, you need to upgrade to a later version of Solr, you can just update the symbolic link to point to the upgraded version of Solr. We’ll use `/opt/solr` to refer to the Solr installation directory in the remaining sections of this page. -[[TakingSolrtoProduction-SeparateDirectoryforWritableFiles]] ==== Separate Directory for Writable Files You should also separate writable Solr files into a different directory; by default, the installation script uses `/var/solr`, but you can override this location using the `-d` option. With this approach, the files in `/opt/solr` will remain untouched and all files that change while Solr is running will live under `/var/solr`. -[[TakingSolrtoProduction-CreatetheSolruser]] === Create the Solr User Running Solr as `root` is not recommended for security reasons, and the <> start command will refuse to do so. Consequently, you should determine the username of a system user that will own all of the Solr files and the running Solr process. By default, the installation script will create the *solr* user, but you can override this setting using the -u option. If your organization has specific requirements for creating new user accounts, then you should create the user before running the script. The installation script will make the Solr user the owner of the `/opt/solr` and `/var/solr` directories. You are now ready to run the installation script. -[[TakingSolrtoProduction-RuntheSolrInstallationScript]] === Run the Solr Installation Script To run the script, you'll need to download the latest Solr distribution archive and then do the following: @@ -97,12 +91,10 @@ If you do not want to start the service immediately, pass the `-n` option. You c We'll cover some additional configuration settings you can make to fine-tune your Solr setup in a moment. Before moving on, let's take a closer look at the steps performed by the installation script. This gives you a better overview and will help you understand important details about your Solr installation when reading other pages in this guide; such as when a page refers to Solr home, you'll know exactly where that is on your system. -[[TakingSolrtoProduction-SolrHomeDirectory]] ==== Solr Home Directory The Solr home directory (not to be confused with the Solr installation directory) is where Solr manages core directories with index files. By default, the installation script uses `/var/solr/data`. If the `-d` option is used on the install script, then this will change to the `data` subdirectory in the location given to the -d option. Take a moment to inspect the contents of the Solr home directory on your system. If you do not <>, the home directory must contain a `solr.xml` file. When Solr starts up, the Solr Control Script passes the location of the home directory using the `-Dsolr.solr.home=...` system property. -[[TakingSolrtoProduction-Environmentoverridesincludefile]] ==== Environment Overrides Include File The service installation script creates an environment specific include file that overrides defaults used by the `bin/solr` script. The main advantage of using an include file is that it provides a single location where all of your environment-specific overrides are defined. Take a moment to inspect the contents of the `/etc/default/solr.in.sh` file, which is the default path setup by the installation script. If you used the `-s` option on the install script to change the name of the service, then the first part of the filename will be different. For a service named `solr-demo`, the file will be named `/etc/default/solr-demo.in.sh`. There are many settings that you can override using this file. However, at a minimum, this script needs to define the `SOLR_PID_DIR` and `SOLR_HOME` variables, such as: @@ -115,7 +107,6 @@ SOLR_HOME=/var/solr/data The `SOLR_PID_DIR` variable sets the directory where the <> will write out a file containing the Solr server’s process ID. -[[TakingSolrtoProduction-Logsettings]] ==== Log Settings Solr uses Apache Log4J for logging. The installation script copies `/opt/solr/server/resources/log4j.properties` to `/var/solr/log4j.properties`. Take a moment to verify that the Solr include file is configured to send logs to the correct location by checking the following settings in `/etc/default/solr.in.sh`: @@ -128,7 +119,6 @@ SOLR_LOGS_DIR=/var/solr/logs For more information about Log4J configuration, please see: <> -[[TakingSolrtoProduction-init.dscript]] ==== init.d Script When running a service like Solr on Linux, it’s common to setup an init.d script so that system administrators can control Solr using the service tool, such as: `service solr start`. The installation script creates a very basic init.d script to help you get started. Take a moment to inspect the `/etc/init.d/solr` file, which is the default script name setup by the installation script. If you used the `-s` option on the install script to change the name of the service, then the filename will be different. Notice that the following variables are setup for your environment based on the parameters passed to the installation script: @@ -149,7 +139,6 @@ service solr start The `/etc/init.d/solr` script also supports the **stop**, **restart**, and *status* commands. Please keep in mind that the init script that ships with Solr is very basic and is intended to show you how to setup Solr as a service. However, it’s also common to use more advanced tools like *supervisord* or *upstart* to control Solr as a service on Linux. While showing how to integrate Solr with tools like supervisord is beyond the scope of this guide, the `init.d/solr` script should provide enough guidance to help you get started. Also, the installation script sets the Solr service to start automatically when the host machine initializes. -[[TakingSolrtoProduction-ProgressCheck]] === Progress Check In the next section, we cover some additional environment settings to help you fine-tune your production setup. However, before we move on, let's review what we've achieved thus far. Specifically, you should be able to control Solr using `/etc/init.d/solr`. Please verify the following commands work with your setup: @@ -174,10 +163,8 @@ Solr process PID running on port 8983 If the `status` command is not successful, look for error messages in `/var/solr/logs/solr.log`. -[[TakingSolrtoProduction-Finetuneyourproductionsetup]] == Fine-Tune Your Production Setup -[[TakingSolrtoProduction-MemoryandGCSettings]] === Memory and GC Settings By default, the `bin/solr` script sets the maximum Java heap size to 512M (-Xmx512m), which is fine for getting started with Solr. For production, you’ll want to increase the maximum heap size based on the memory requirements of your search application; values between 10 and 20 gigabytes are not uncommon for production servers. When you need to change the memory settings for your Solr server, use the `SOLR_JAVA_MEM` variable in the include file, such as: @@ -189,13 +176,11 @@ SOLR_JAVA_MEM="-Xms10g -Xmx10g" Also, the <> comes with a set of pre-configured Java Garbage Collection settings that have shown to work well with Solr for a number of different workloads. However, these settings may not work well for your specific use of Solr. Consequently, you may need to change the GC settings, which should also be done with the `GC_TUNE` variable in the `/etc/default/solr.in.sh` include file. For more information about tuning your memory and garbage collection settings, see: <>. -[[TakingSolrtoProduction-Out-of-MemoryShutdownHook]] ==== Out-of-Memory Shutdown Hook The `bin/solr` script registers the `bin/oom_solr.sh` script to be called by the JVM if an OutOfMemoryError occurs. The `oom_solr.sh` script will issue a `kill -9` to the Solr process that experiences the `OutOfMemoryError`. This behavior is recommended when running in SolrCloud mode so that ZooKeeper is immediately notified that a node has experienced a non-recoverable error. Take a moment to inspect the contents of the `/opt/solr/bin/oom_solr.sh` script so that you are familiar with the actions the script will perform if it is invoked by the JVM. -[[TakingSolrtoProduction-SolrCloud]] -=== SolrCloud +=== Going to Production with SolrCloud To run Solr in SolrCloud mode, you need to set the `ZK_HOST` variable in the include file to point to your ZooKeeper ensemble. Running the embedded ZooKeeper is not supported in production environments. For instance, if you have a ZooKeeper ensemble hosted on the following three hosts on the default client port 2181 (zk1, zk2, and zk3), then you would set: @@ -206,7 +191,6 @@ ZK_HOST=zk1,zk2,zk3 When the `ZK_HOST` variable is set, Solr will launch in "cloud" mode. -[[TakingSolrtoProduction-ZooKeeperchroot]] ==== ZooKeeper chroot If you're using a ZooKeeper instance that is shared by other systems, it's recommended to isolate the SolrCloud znode tree using ZooKeeper's chroot support. For instance, to ensure all znodes created by SolrCloud are stored under `/solr`, you can put `/solr` on the end of your `ZK_HOST` connection string, such as: @@ -225,12 +209,9 @@ bin/solr zk mkroot /solr -z : [NOTE] ==== - If you also want to bootstrap ZooKeeper with existing `solr_home`, you can instead use the `zkcli.sh` / `zkcli.bat` `bootstrap` command, which will also create the chroot path if it does not exist. See <> for more info. - ==== -[[TakingSolrtoProduction-SolrHostname]] === Solr Hostname Use the `SOLR_HOST` variable in the include file to set the hostname of the Solr server. @@ -242,7 +223,6 @@ SOLR_HOST=solr1.example.com Setting the hostname of the Solr server is recommended, especially when running in SolrCloud mode, as this determines the address of the node when it registers with ZooKeeper. -[[TakingSolrtoProduction-Overridesettingsinsolrconfig.xml]] === Override Settings in solrconfig.xml Solr allows configuration properties to be overridden using Java system properties passed at startup using the `-Dproperty=value` syntax. For instance, in `solrconfig.xml`, the default auto soft commit settings are set to: @@ -268,7 +248,6 @@ The `bin/solr` script simply passes options starting with `-D` on to the JVM dur SOLR_OPTS="$SOLR_OPTS -Dsolr.autoSoftCommit.maxTime=10000" ---- -[[TakingSolrtoProduction-RunningmultipleSolrnodesperhost]] == Running Multiple Solr Nodes Per Host The `bin/solr` script is capable of running multiple instances on one machine, but for a *typical* installation, this is not a recommended setup. Extra CPU and memory resources are required for each additional instance. A single instance is easily capable of handling multiple indexes. diff --git a/solr/solr-ref-guide/src/the-query-elevation-component.adoc b/solr/solr-ref-guide/src/the-query-elevation-component.adoc index dcd3c7e190f..638aa8163f0 100644 --- a/solr/solr-ref-guide/src/the-query-elevation-component.adoc +++ b/solr/solr-ref-guide/src/the-query-elevation-component.adoc @@ -31,7 +31,6 @@ All of the sample configuration and queries used in this section assume you are bin/solr -e techproducts ---- -[[TheQueryElevationComponent-ConfiguringtheQueryElevationComponent]] == Configuring the Query Elevation Component You can configure the Query Elevation Component in the `solrconfig.xml` file. Search components like `QueryElevationComponent` may be added to any request handler; a dedicated request handler is used here for brevity. @@ -72,7 +71,6 @@ Path to the file that defines query elevation. This file must exist in `> can be used to annotate each document with information about whether or not it was elevated: @@ -132,7 +125,6 @@ Likewise, it can be helpful when troubleshooting to see all matching documents `\http://localhost:8983/solr/techproducts/elevate?q=ipod&df=text&markExcludes=true&fl=id,[elevated],[excluded]` -[[TheQueryElevationComponent-TheelevateIdsandexcludeIdsParameters]] === The elevateIds and excludeIds Parameters When the elevation component is in use, the pre-configured list of elevations for a query can be overridden at request time to use the unique keys specified in these request parameters. @@ -147,7 +139,6 @@ For example, in the request below documents IW-02 and F8V7067-APL-KIT will be el `\http://localhost:8983/solr/techproducts/elevate?q=ipod&df=text&elevateIds=IW-02,F8V7067-APL-KIT` -[[TheQueryElevationComponent-ThefqParameter]] -=== The fq Parameter +=== The fq Parameter with Elevation Query elevation respects the standard filter query (`fq`) parameter. That is, if the query contains the `fq` parameter, all results will be within that filter even if `elevate.xml` adds other documents to the result set. diff --git a/solr/solr-ref-guide/src/the-stats-component.adoc b/solr/solr-ref-guide/src/the-stats-component.adoc index a5eb334a1bf..ada56a86e1a 100644 --- a/solr/solr-ref-guide/src/the-stats-component.adoc +++ b/solr/solr-ref-guide/src/the-stats-component.adoc @@ -27,7 +27,6 @@ The sample queries in this section assume you are running the "```techproducts`` bin/solr -e techproducts ---- -[[TheStatsComponent-StatsComponentParameters]] == Stats Component Parameters The Stats Component accepts the following parameters: @@ -41,8 +40,7 @@ Specifies a field for which statistics should be generated. This parameter may b <> may be used to indicate which subset of the supported statistics should be computed, and/or that statistics should be computed over the results of an arbitrary numeric function (or query) instead of a simple field name. See the examples below. -[[TheStatsComponent-Example]] -=== Example +=== Stats Component Example The query below demonstrates computing stats against two different fields numeric fields, as well as stats over the results of a `termfreq()` function call using the `text` field: @@ -89,10 +87,9 @@ The query below demonstrates computing stats against two different fields numeri ---- -[[TheStatsComponent-StatisticsSupported]] == Statistics Supported -The table below explains the statistics supported by the Stats component. Not all statistics are supported for all field types, and not all statistics are computed by default (see <> below for details) +The table below explains the statistics supported by the Stats component. Not all statistics are supported for all field types, and not all statistics are computed by default (see <> below for details) `min`:: The minimum value of the field/function in all documents in the set. This statistic is computed for all field types and is computed by default. @@ -134,14 +131,13 @@ Input for this option can be floating point number between `0.0` and `1.0` indic + This statistic is computed for all field types but is not computed by default. -[[TheStatsComponent-LocalParameters]] -== Local Parameters +== Local Parameters with the Stats Component Similar to the <>, the `stats.field` parameter supports local parameters for: * Tagging & Excluding Filters: `stats.field={!ex=filterA}price` * Changing the Output Key: `stats.field={!key=my_price_stats}price` -* Tagging stats for <>: `stats.field={!tag=my_pivot_stats}price` +* Tagging stats for <>: `stats.field={!tag=my_pivot_stats}price` Local parameters can also be used to specify individual statistics by name, overriding the set of statistics computed by default, eg: `stats.field={!min=true max=true percentiles='99,99.9,99.99'}price` @@ -159,8 +155,7 @@ Additional "Expert" local params are supported in some cases for affecting the b ** `hllLog2m` - an integer value specifying an explicit "log2m" value to use, overriding the heuristic value determined by the cardinality local param and the field type – see the https://github.com/aggregateknowledge/java-hll/[java-hll] documentation for more details ** `hllRegwidth` - an integer value specifying an explicit "regwidth" value to use, overriding the heuristic value determined by the cardinality local param and the field type – see the https://github.com/aggregateknowledge/java-hll/[java-hll] documentation for more details -[[TheStatsComponent-Examples]] -=== Examples +=== Examples with Local Parameters Here we compute some statistics for the price field. The min, max, mean, 90th, and 99th percentile price values are computed against all products that are in stock (`q=*:*` and `fq=inStock:true`), and independently all of the default statistics are computed against all products regardless of whether they are in stock or not (by excluding that filter). @@ -193,7 +188,6 @@ Here we compute some statistics for the price field. The min, max, mean, 90th, a ---- -[[TheStatsComponent-TheStatsComponentandFaceting]] == The Stats Component and Faceting Sets of `stats.field` parameters can be referenced by `'tag'` when using Pivot Faceting to compute multiple statistics at every level (i.e.: field) in the tree of pivot constraints. diff --git a/solr/solr-ref-guide/src/the-term-vector-component.adoc b/solr/solr-ref-guide/src/the-term-vector-component.adoc index 218d55393eb..fc679b731a8 100644 --- a/solr/solr-ref-guide/src/the-term-vector-component.adoc +++ b/solr/solr-ref-guide/src/the-term-vector-component.adoc @@ -22,8 +22,7 @@ The TermVectorComponent is a search component designed to return additional info For each document in the response, the TermVectorCcomponent can return the term vector, the term frequency, inverse document frequency, position, and offset information. -[[TheTermVectorComponent-Configuration]] -== Configuration +== Term Vector Component Configuration The TermVectorComponent is not enabled implicitly in Solr - it must be explicitly configured in your `solrconfig.xml` file. The examples on this page show how it is configured in Solr's "```techproducts```" example: @@ -67,7 +66,6 @@ Once your handler is defined, you may use in conjunction with any schema (that h termOffsets="true" /> ---- -[[TheTermVectorComponent-InvokingtheTermVectorComponent]] == Invoking the Term Vector Component The example below shows an invocation of this component using the above configuration: @@ -124,8 +122,7 @@ The example below shows an invocation of this component using the above configur ---- -[[TheTermVectorComponent-RequestParameters]] -=== Request Parameters +=== Term Vector Request Parameters The example below shows some of the available request parameters for this component: @@ -168,7 +165,6 @@ To learn more about TermVector component output, see the Wiki page: http://wiki. For schema requirements, see also the section <>. -[[TheTermVectorComponent-SolrJandtheTermVectorComponent]] == SolrJ and the Term Vector Component Neither the `SolrQuery` class nor the `QueryResponse` class offer specific method calls to set Term Vector Component parameters or get the "termVectors" output. However, there is a patch for it: https://issues.apache.org/jira/browse/SOLR-949[SOLR-949]. diff --git a/solr/solr-ref-guide/src/the-terms-component.adoc b/solr/solr-ref-guide/src/the-terms-component.adoc index 69e1b07ba8e..c8b51ca8b7a 100644 --- a/solr/solr-ref-guide/src/the-terms-component.adoc +++ b/solr/solr-ref-guide/src/the-terms-component.adoc @@ -22,12 +22,10 @@ The Terms Component provides access to the indexed terms in a field and the numb In a sense, this search component provides fast field-faceting over the whole index, not restricted by the base query or any filters. The document frequencies returned are the number of documents that match the term, including any documents that have been marked for deletion but not yet removed from the index. -[[TheTermsComponent-ConfiguringtheTermsComponent]] == Configuring the Terms Component By default, the Terms Component is already configured in `solrconfig.xml` for each collection. -[[TheTermsComponent-DefiningtheTermsComponent]] === Defining the Terms Component Defining the Terms search component is straightforward: simply give it a name and use the class `solr.TermsComponent`. @@ -39,7 +37,6 @@ Defining the Terms search component is straightforward: simply give it a name an This makes the component available for use, but by itself will not be useable until included with a request handler. -[[TheTermsComponent-UsingtheTermsComponentinaRequestHandler]] === Using the Terms Component in a Request Handler The terms component is included with the `/terms` request handler, which is among Solr's out-of-the-box request handlers - see <>. @@ -48,7 +45,6 @@ Note that the defaults for this request handler set the parameter "terms" to tru You could add this component to another handler if you wanted to, and pass "terms=true" in the HTTP request in order to get terms back. If it is only defined in a separate handler, you must use that handler when querying in order to get terms and not regular documents as results. -[[TheTermsComponent-TermsComponentParameters]] === Terms Component Parameters The parameters below allow you to control what terms are returned. You can also configure any of these with the request handler if you'd like to set them permanently. Or, you can add them to the query request. These parameters are: @@ -159,12 +155,10 @@ The response to a terms request is a list of the terms and their document freque You may also be interested in the {solr-javadocs}/solr-core/org/apache/solr/handler/component/TermsComponent.html[TermsComponent javadoc]. -[[TheTermsComponent-Examples]] -== Examples +== Terms Component Examples All of the following sample queries work with Solr's "`bin/solr -e techproducts`" example. -[[TheTermsComponent-GetTop10Terms]] === Get Top 10 Terms This query requests the first ten terms in the name field: `\http://localhost:8983/solr/techproducts/terms?terms.fl=name` @@ -195,8 +189,6 @@ Results: ---- - -[[TheTermsComponent-GetFirst10TermsStartingwithLetter_a_]] === Get First 10 Terms Starting with Letter 'a' This query requests the first ten terms in the name field, in index order (instead of the top 10 results by document count): `\http://localhost:8983/solr/techproducts/terms?terms.fl=name&terms.lower=a&terms.sort=index` @@ -227,7 +219,6 @@ Results: ---- -[[TheTermsComponent-SolrJinvocation]] === SolrJ Invocation [source,java] @@ -245,7 +236,6 @@ Results: List terms = request.process(getSolrClient()).getTermsResponse().getTerms("terms_s"); ---- -[[TheTermsComponent-UsingtheTermsComponentforanAuto-SuggestFeature]] == Using the Terms Component for an Auto-Suggest Feature If the <> doesn't suit your needs, you can use the Terms component in Solr to build a similar feature for your own search application. Simply submit a query specifying whatever characters the user has typed so far as a prefix. For example, if the user has typed "at", the search engine's interface would submit the following query: @@ -288,7 +278,6 @@ Result: } ---- -[[TheTermsComponent-DistributedSearchSupport]] == Distributed Search Support The TermsComponent also supports distributed indexes. For the `/terms` request handler, you must provide the following two parameters: diff --git a/solr/solr-ref-guide/src/the-well-configured-solr-instance.adoc b/solr/solr-ref-guide/src/the-well-configured-solr-instance.adoc index a6883ec5cab..29f829ec1bb 100644 --- a/solr/solr-ref-guide/src/the-well-configured-solr-instance.adoc +++ b/solr/solr-ref-guide/src/the-well-configured-solr-instance.adoc @@ -37,7 +37,5 @@ This section covers the following topics: [IMPORTANT] ==== - The focus of this section is generally on configuring a single Solr instance, but for those interested in scaling a Solr implementation in a cluster environment, see also the section <>. There are also options to scale through sharding or replication, described in the section <>. - ==== diff --git a/solr/solr-ref-guide/src/tokenizers.adoc b/solr/solr-ref-guide/src/tokenizers.adoc index 7a8bdeb37f4..7718723baed 100644 --- a/solr/solr-ref-guide/src/tokenizers.adoc +++ b/solr/solr-ref-guide/src/tokenizers.adoc @@ -49,7 +49,6 @@ The following sections describe the tokenizer factory classes included in this r For user tips about Solr's tokenizers, see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters. -[[Tokenizers-StandardTokenizer]] == Standard Tokenizer This tokenizer splits the text field into tokens, treating whitespace and punctuation as delimiters. Delimiter characters are discarded, with the following exceptions: @@ -80,7 +79,6 @@ The Standard Tokenizer supports http://unicode.org/reports/tr29/#Word_Boundaries *Out:* "Please", "email", "john.doe", "foo.com", "by", "03", "09", "re", "m37", "xq" -[[Tokenizers-ClassicTokenizer]] == Classic Tokenizer The Classic Tokenizer preserves the same behavior as the Standard Tokenizer of Solr versions 3.1 and previous. It does not use the http://unicode.org/reports/tr29/#Word_Boundaries[Unicode standard annex UAX#29] word boundary rules that the Standard Tokenizer uses. This tokenizer splits the text field into tokens, treating whitespace and punctuation as delimiters. Delimiter characters are discarded, with the following exceptions: @@ -110,7 +108,6 @@ The Classic Tokenizer preserves the same behavior as the Standard Tokenizer of S *Out:* "Please", "email", "john.doe@foo.com", "by", "03-09", "re", "m37-xq" -[[Tokenizers-KeywordTokenizer]] == Keyword Tokenizer This tokenizer treats the entire text field as a single token. @@ -132,7 +129,6 @@ This tokenizer treats the entire text field as a single token. *Out:* "Please, email john.doe@foo.com by 03-09, re: m37-xq." -[[Tokenizers-LetterTokenizer]] == Letter Tokenizer This tokenizer creates tokens from strings of contiguous letters, discarding all non-letter characters. @@ -154,7 +150,6 @@ This tokenizer creates tokens from strings of contiguous letters, discarding all *Out:* "I", "can", "t" -[[Tokenizers-LowerCaseTokenizer]] == Lower Case Tokenizer Tokenizes the input stream by delimiting at non-letters and then converting all letters to lowercase. Whitespace and non-letters are discarded. @@ -176,7 +171,6 @@ Tokenizes the input stream by delimiting at non-letters and then converting all *Out:* "i", "just", "love", "my", "iphone" -[[Tokenizers-N-GramTokenizer]] == N-Gram Tokenizer Reads the field text and generates n-gram tokens of sizes in the given range. @@ -219,7 +213,6 @@ With an n-gram size range of 4 to 5: *Out:* "bicy", "bicyc", "icyc", "icycl", "cycl", "cycle", "ycle" -[[Tokenizers-EdgeN-GramTokenizer]] == Edge N-Gram Tokenizer Reads the field text and generates edge n-gram tokens of sizes in the given range. @@ -279,7 +272,6 @@ Edge n-gram range of 2 to 5, from the back side: *Out:* "oo", "loo", "aloo", "baloo" -[[Tokenizers-ICUTokenizer]] == ICU Tokenizer This tokenizer processes multilingual text and tokenizes it appropriately based on its script attribute. @@ -319,7 +311,6 @@ To use this tokenizer, you must add additional .jars to Solr's classpath (as des ==== -[[Tokenizers-PathHierarchyTokenizer]] == Path Hierarchy Tokenizer This tokenizer creates synonyms from file path hierarchies. @@ -347,7 +338,6 @@ This tokenizer creates synonyms from file path hierarchies. *Out:* "c:", "c:/usr", "c:/usr/local", "c:/usr/local/apache" -[[Tokenizers-RegularExpressionPatternTokenizer]] == Regular Expression Pattern Tokenizer This tokenizer uses a Java regular expression to break the input text stream into tokens. The expression provided by the pattern argument can be interpreted either as a delimiter that separates tokens, or to match patterns that should be extracted from the text as tokens. @@ -407,7 +397,6 @@ Extract part numbers which are preceded by "SKU", "Part" or "Part Number", case *Out:* "1234", "5678", "126-987" -[[Tokenizers-SimplifiedRegularExpressionPatternTokenizer]] == Simplified Regular Expression Pattern Tokenizer This tokenizer is similar to the `PatternTokenizerFactory` described above, but uses Lucene {lucene-javadocs}/core/org/apache/lucene/util/automaton/RegExp.html[`RegExp`] pattern matching to construct distinct tokens for the input stream. The syntax is more limited than `PatternTokenizerFactory`, but the tokenization is quite a bit faster. @@ -431,7 +420,6 @@ To match tokens delimited by simple whitespace characters: ---- -[[Tokenizers-SimplifiedRegularExpressionPatternSplittingTokenizer]] == Simplified Regular Expression Pattern Splitting Tokenizer This tokenizer is similar to the `SimplePatternTokenizerFactory` described above, but uses Lucene {lucene-javadocs}/core/org/apache/lucene/util/automaton/RegExp.html[`RegExp`] pattern matching to identify sequences of characters that should be used to split tokens. The syntax is more limited than `PatternTokenizerFactory`, but the tokenization is quite a bit faster. @@ -455,7 +443,6 @@ To match tokens delimited by simple whitespace characters: ---- -[[Tokenizers-UAX29URLEmailTokenizer]] == UAX29 URL Email Tokenizer This tokenizer splits the text field into tokens, treating whitespace and punctuation as delimiters. Delimiter characters are discarded, with the following exceptions: @@ -491,7 +478,6 @@ The UAX29 URL Email Tokenizer supports http://unicode.org/reports/tr29/#Word_Bou *Out:* "Visit", "http://accarol.com/contact.htm?from=external&a=10", "or", "e", "mail", "bob.cratchet@accarol.com" -[[Tokenizers-WhiteSpaceTokenizer]] == White Space Tokenizer Simple tokenizer that splits the text stream on whitespace and returns sequences of non-whitespace characters as tokens. Note that any punctuation _will_ be included in the tokens. diff --git a/solr/solr-ref-guide/src/transforming-and-indexing-custom-json.adoc b/solr/solr-ref-guide/src/transforming-and-indexing-custom-json.adoc index a3ea40e52f1..30e32a7c72b 100644 --- a/solr/solr-ref-guide/src/transforming-and-indexing-custom-json.adoc +++ b/solr/solr-ref-guide/src/transforming-and-indexing-custom-json.adoc @@ -20,16 +20,29 @@ If you have JSON documents that you would like to index without transforming them into Solr's structure, you can add them to Solr by including some parameters with the update request. These parameters provide information on how to split a single JSON file into multiple Solr documents and how to map fields to Solr's schema. One or more valid JSON documents can be sent to the `/update/json/docs` path with the configuration params. -[[TransformingandIndexingCustomJSON-MappingParameters]] == Mapping Parameters These parameters allow you to define how a JSON file should be read for multiple Solr documents. -* **split**: Defines the path at which to split the input JSON into multiple Solr documents and is required if you have multiple documents in a single JSON file. If the entire JSON makes a single solr document, the path must be “`/`”. It is possible to pass multiple split paths by separating them with a pipe `(|)` example : `split=/|/foo|/foo/bar` . If one path is a child of another, they automatically become a child document **f**: This is a multivalued mapping parameter. The format of the parameter is` target-field-name:json-path`. The `json-path` is required. The `target-field-name` is the Solr document field name, and is optional. If not specified, it is automatically derived from the input JSON.The default target field name is the fully qualified name of the field. Wildcards can be used here, see the <> below for more information. -* *mapUniqueKeyOnly* (boolean): This parameter is particularly convenient when the fields in the input JSON are not available in the schema and <> is not enabled. This will index all the fields into the default search field (using the `df` parameter, below) and only the `uniqueKey` field is mapped to the corresponding field in the schema. If the input JSON does not have a value for the `uniqueKey` field then a UUID is generated for the same. -* **df**: If the `mapUniqueKeyOnly` flag is used, the update handler needs a field where the data should be indexed to. This is the same field that other handlers use as a default search field. -* **srcField**: This is the name of the field to which the JSON source will be stored into. This can only be used if `split=/` (i.e., you want your JSON input file to be indexed as a single Solr document). Note that atomic updates will cause the field to be out-of-sync with the document. -* **echo**: This is for debugging purpose only. Set it to true if you want the docs to be returned as a response. Nothing will be indexed. +split:: +Defines the path at which to split the input JSON into multiple Solr documents and is required if you have multiple documents in a single JSON file. If the entire JSON makes a single solr document, the path must be “`/`”. It is possible to pass multiple split paths by separating them with a pipe `(|)` example : `split=/|/foo|/foo/bar`. If one path is a child of another, they automatically become a child document + +f:: +A multivalued mapping parameter. The format of the parameter is `target-field-name:json-path`. The `json-path` is required. The `target-field-name` is the Solr document field name, and is optional. If not specified, it is automatically derived from the input JSON. The default target field name is the fully qualified name of the field. ++ +Wildcards can be used here, see <> below for more information. + +mapUniqueKeyOnly:: +(boolean) This parameter is particularly convenient when the fields in the input JSON are not available in the schema and <> is not enabled. This will index all the fields into the default search field (using the `df` parameter, below) and only the `uniqueKey` field is mapped to the corresponding field in the schema. If the input JSON does not have a value for the `uniqueKey` field then a UUID is generated for the same. + +df:: +If the `mapUniqueKeyOnly` flag is used, the update handler needs a field where the data should be indexed to. This is the same field that other handlers use as a default search field. + +srcField:: +This is the name of the field to which the JSON source will be stored into. This can only be used if `split=/` (i.e., you want your JSON input file to be indexed as a single Solr document). Note that atomic updates will cause the field to be out-of-sync with the document. + +echo:: +This is for debugging purpose only. Set it to `true` if you want the docs to be returned as a response. Nothing will be indexed. For example, if we have a JSON file that includes two documents, we could define an update request like this: @@ -152,15 +165,16 @@ In this example, we simply named the field paths (such as `/exams/test`). Solr w [TIP] ==== - -Documents WILL get rejected if the fields do not exist in the schema before indexing. So, if you are NOT using schemaless mode, pre-create those fields. If you are working in <>, fields that don't exist will be created on the fly with Solr's best guess for the field type. - +Documents WILL get rejected if the fields do not exist in the schema before indexing. So, if you are NOT using schemaless mode, pre-create those fields. If you are working in <>, fields that don't exist will be created on the fly with Solr's best guess for the field type. ==== -[[TransformingandIndexingCustomJSON-Wildcards]] -== Wildcards +== Using Wildcards for Field Names -Instead of specifying all the field names explicitly, it is possible to specify wildcards to map fields automatically. There are two restrictions: wildcards can only be used at the end of the `json-path`, and the split path cannot use wildcards. A single asterisk `\*` maps only to direct children, and a double asterisk `\*\*` maps recursively to all descendants. The following are example wildcard path mappings: +Instead of specifying all the field names explicitly, it is possible to specify wildcards to map fields automatically. + +There are two restrictions: wildcards can only be used at the end of the `json-path`, and the split path cannot use wildcards. + +A single asterisk `\*` maps only to direct children, and a double asterisk `\*\*` maps recursively to all descendants. The following are example wildcard path mappings: * `f=$FQN:/**`: maps all fields to the fully qualified name (`$FQN`) of the JSON field. The fully qualified name is obtained by concatenating all the keys in the hierarchy with a period (`.`) as a delimiter. This is the default behavior if no `f` path mappings are specified. * `f=/docs/*`: maps all the fields under docs and in the name as given in json @@ -217,7 +231,7 @@ curl 'http://localhost:8983/solr/my_collection/update/json/docs'\ "test" : "term1", "marks" : 86} ] -}' +}' ---- In the above example, we've said all of the fields should be added to a field in Solr named 'txt'. This will add multiple fields to a single field, so whatever field you choose should be multi-valued. @@ -247,7 +261,7 @@ curl 'http://localhost:8983/solr/my_collection/update/json/docs?split=/exams'\ The indexed documents would be added to the index with fields that look like this: -[source,bash] +[source,json] ---- { "first":"John", @@ -265,8 +279,7 @@ The indexed documents would be added to the index with fields that look like thi "exams.marks":86} ---- -[[TransformingandIndexingCustomJSON-MultipledocumentsinaSinglePayload]] -== Multiple documents in a Single Payload +== Multiple Documents in a Single Payload This functionality supports documents in the http://jsonlines.org/[JSON Lines] format (`.jsonl`), which specifies one document per line. @@ -288,7 +301,6 @@ curl 'http://localhost:8983/solr/my_collection/update/json/docs' -H 'Content-typ { "first":"Steve", "last":"Woz", "grade":1, "subject": "Calculus", "test" : "term1", "marks" : 86}]' ---- -[[TransformingandIndexingCustomJSON-IndexingNestedDocuments]] == Indexing Nested Documents The following is an example of indexing nested documents: @@ -332,14 +344,12 @@ With this example, the documents indexed would be, as follows: "zip":95014}]} ---- -[[TransformingandIndexingCustomJSON-TipsforCustomJSONIndexing]] == Tips for Custom JSON Indexing -1. Schemaless mode: This handles field creation automatically. The field guessing may not be exactly as you expect, but it works. The best thing to do is to setup a local server in schemaless mode, index a few sample docs and create those fields in your real setup with proper field types before indexing -2. Pre-created Schema : Post your docs to the `/update/json/docs` endpoint with `echo=true`. This gives you the list of field names you need to create. Create the fields before you actually index -3. No schema, only full-text search : All you need to do is to do full-text search on your JSON. Set the configuration as given in the Setting JSON Defaults section. +. Schemaless mode: This handles field creation automatically. The field guessing may not be exactly as you expect, but it works. The best thing to do is to setup a local server in schemaless mode, index a few sample docs and create those fields in your real setup with proper field types before indexing +. Pre-created Schema: Post your docs to the `/update/json/docs` endpoint with `echo=true`. This gives you the list of field names you need to create. Create the fields before you actually index +. No schema, only full-text search : All you need to do is to do full-text search on your JSON. Set the configuration as given in the Setting JSON Defaults section. -[[TransformingandIndexingCustomJSON-SettingJSONDefaults]] == Setting JSON Defaults It is possible to send any json to the `/update/json/docs` endpoint and the default configuration of the component is as follows: diff --git a/solr/solr-ref-guide/src/transforming-result-documents.adoc b/solr/solr-ref-guide/src/transforming-result-documents.adoc index feb69318a79..754060db295 100644 --- a/solr/solr-ref-guide/src/transforming-result-documents.adoc +++ b/solr/solr-ref-guide/src/transforming-result-documents.adoc @@ -20,7 +20,6 @@ Document Transformers can be used to modify the information returned about each documents in the results of a query. -[[TransformingResultDocuments-UsingDocumentTransformers]] == Using Document Transformers When executing a request, a document transformer can be used by including it in the `fl` parameter using square brackets, for example: @@ -46,11 +45,9 @@ fl=id,name,score,my_val_a:[value v=42 t=int],my_val_b:[value v=7 t=float] The sections below discuss exactly what these various transformers do. -[[TransformingResultDocuments-AvailableTransformers]] == Available Transformers -[[TransformingResultDocuments-_value_-ValueAugmenterFactory]] === [value] - ValueAugmenterFactory Modifies every document to include the exact same value, as if it were a stored field in every document: @@ -94,7 +91,6 @@ In addition to using these request parameters, you can configure additional name The "```value```" option forces an explicit value to always be used, while the "```defaultValue```" option provides a default that can still be overridden using the "```v```" and "```t```" local parameters. -[[TransformingResultDocuments-_explain_-ExplainAugmenterFactory]] === [explain] - ExplainAugmenterFactory Augments each document with an inline explanation of its score exactly like the information available about each document in the debug section: @@ -128,18 +124,16 @@ A default style can be configured by specifying an "args" parameter in your conf ---- - -[[TransformingResultDocuments-_child_-ChildDocTransformerFactory]] === [child] - ChildDocTransformerFactory -This transformer returns all <> of each parent document matching your query in a flat list nested inside the matching parent document. This is useful when you have indexed nested child documents and want to retrieve the child documents for the relevant parent documents for any type of search query. +This transformer returns all <> of each parent document matching your query in a flat list nested inside the matching parent document. This is useful when you have indexed nested child documents and want to retrieve the child documents for the relevant parent documents for any type of search query. [source,plain] ---- fl=id,[child parentFilter=doc_type:book childFilter=doc_type:chapter limit=100] ---- -Note that this transformer can be used even though the query itself is not a <>. +Note that this transformer can be used even though the query itself is not a <>. When using this transformer, the `parentFilter` parameter must be specified, and works the same as in all Block Join Queries, additional optional parameters are: @@ -147,7 +141,6 @@ When using this transformer, the `parentFilter` parameter must be specified, and * `limit` - the maximum number of child documents to be returned per parent document (default: 10) -[[TransformingResultDocuments-_shard_-ShardAugmenterFactory]] === [shard] - ShardAugmenterFactory This transformer adds information about what shard each individual document came from in a distributed request. @@ -155,7 +148,6 @@ This transformer adds information about what shard each individual document came ShardAugmenterFactory does not support any request parameters, or configuration options. -[[TransformingResultDocuments-_docid_-DocIdAugmenterFactory]] === [docid] - DocIdAugmenterFactory This transformer adds the internal Lucene document id to each document – this is primarily only useful for debugging purposes. @@ -163,7 +155,6 @@ This transformer adds the internal Lucene document id to each document – this DocIdAugmenterFactory does not support any request parameters, or configuration options. -[[TransformingResultDocuments-_elevated_and_excluded_]] === [elevated] and [excluded] These transformers are available only when using the <>. @@ -195,7 +186,6 @@ fl=id,[elevated],[excluded]&excludeIds=GB18030TEST&elevateIds=6H500F0&markExclud ---- -[[TransformingResultDocuments-_json_xml_]] === [json] / [xml] These transformers replace field value containing a string representation of a valid XML or JSON structure with the actual raw XML or JSON structure rather than just the string value. Each applies only to the specific writer, such that `[json]` only applies to `wt=json` and `[xml]` only applies to `wt=xml`. @@ -206,7 +196,6 @@ fl=id,source_s:[json]&wt=json ---- -[[TransformingResultDocuments-_subquery_]] === [subquery] This transformer executes a separate query per transforming document passing document fields as an input for subquery parameters. It's usually used with `{!join}` and `{!parent}` query parsers, and is intended to be an improvement for `[child]`. @@ -261,8 +250,7 @@ Here is how it looks like in various formats: SolrDocumentList subResults = (SolrDocumentList)doc.getFieldValue("children"); ---- -[[TransformingResultDocuments-Subqueryresultfields]] -==== Subquery result fields +==== Subquery Result Fields To appear in subquery document list, a field should be specified both fl parameters, in main one fl (despite the main result documents have no this field) and in subquery's one eg `foo.fl`. Of course, you can use wildcard in any or both of these parameters. For example, if field title should appear in categories subquery, it can be done via one of these ways. @@ -274,14 +262,12 @@ fl=...*,categories:[subquery]&categories.fl=*&categories.q=... fl=...*,categories:[subquery]&categories.fl=*&categories.q=... ---- -[[TransformingResultDocuments-SubqueryParametersShift]] ==== Subquery Parameters Shift If subquery is declared as `fl=*,foo:[subquery]`, subquery parameters are prefixed with the given name and period. eg `q=*:*&fl=*,**foo**:[subquery]&**foo.**q=to be continued&**foo.**rows=10&**foo.**sort=id desc` -[[TransformingResultDocuments-DocumentFieldasanInputforSubqueryParameters]] ==== Document Field as an Input for Subquery Parameters It's necessary to pass some document field values as a parameter for subquery. It's supported via implicit *`row.__fieldname__`* parameter, and can be (but might not only) referred via Local Parameters syntax: `q=namne:john&fl=name,id,depts:[subquery]&depts.q={!terms f=id **v=$row.dept_id**}&depts.rows=10` @@ -292,7 +278,6 @@ Note, when document field has multiple values they are concatenated with comma b To log substituted subquery request parameters, add the corresponding parameter names, as in `depts.logParamsList=q,fl,rows,**row.dept_id**` -[[TransformingResultDocuments-CoresandCollectionsinSolrCloud]] ==== Cores and Collections in SolrCloud Use `foo:[subquery fromIndex=departments]` to invoke subquery on another core on the same node, it's what *`{!join}`* does for non-SolrCloud mode. But in case of SolrCloud just (and only) explicitly specify its' native parameters like `collection, shards` for subquery, eg: @@ -301,13 +286,10 @@ Use `foo:[subquery fromIndex=departments]` to invoke subquery on another core on [IMPORTANT] ==== - If subquery collection has a different unique key field name (let's say `foo_id` at contrast to `id` in primary collection), add the following parameters to accommodate this difference: `foo.fl=id:foo_id&foo.distrib.singlePass=true`. Otherwise you'll get `NullPoniterException` from `QueryComponent.mergeIds`. - ==== -[[TransformingResultDocuments-_geo_-Geospatialformatter]] === [geo] - Geospatial formatter Formats spatial data from a spatial field using a designated format type name. Two inner parameters are required: `f` for the field name, and `w` for the format name. Example: `geojson:[geo f=mySpatialField w=GeoJSON]`. @@ -317,7 +299,6 @@ Normally you'll simply be consistent in choosing the format type you want by set In addition, this feature is very useful with the `RptWithGeometrySpatialField` to avoid double-storage of the potentially large vector geometry. This transformer will detect that field type and fetch the geometry from an internal compact binary representation on disk (in docValues), and then format it as desired. As such, you needn't mark the field as stored, which would be redundant. In a sense this double-storage between docValues and stored-value storage isn't unique to spatial but with polygonal geometry it can be a lot of data, and furthermore you'd like to avoid storing it in a verbose format (like GeoJSON or WKT). -[[TransformingResultDocuments-_features_-LTRFeatureLoggerTransformerFactory]] === [features] - LTRFeatureLoggerTransformerFactory The "LTR" prefix stands for <>. This transformer returns the values of features and it can be used for feature extraction and feature logging. diff --git a/solr/solr-ref-guide/src/uima-integration.adoc b/solr/solr-ref-guide/src/uima-integration.adoc index c7f9725be10..92552052bda 100644 --- a/solr/solr-ref-guide/src/uima-integration.adoc +++ b/solr/solr-ref-guide/src/uima-integration.adoc @@ -20,7 +20,6 @@ You can integrate the Apache Unstructured Information Management Architecture (https://uima.apache.org/[UIMA]) with Solr. UIMA lets you define custom pipelines of Analysis Engines that incrementally add metadata to your documents as annotations. -[[UIMAIntegration-ConfiguringUIMA]] == Configuring UIMA The SolrUIMA UpdateRequestProcessor is a custom update request processor that takes documents being indexed, sends them to a UIMA pipeline, and then returns the documents enriched with the specified metadata. To configure UIMA for Solr, follow these steps: @@ -123,4 +122,3 @@ The SolrUIMA UpdateRequestProcessor is a custom update request processor that ta Once you are done with the configuration your documents will be automatically enriched with the specified fields when you index them. For more information about Solr UIMA integration, see https://wiki.apache.org/solr/SolrUIMA. - diff --git a/solr/solr-ref-guide/src/understanding-analyzers-tokenizers-and-filters.adoc b/solr/solr-ref-guide/src/understanding-analyzers-tokenizers-and-filters.adoc index 511a5e9935a..345634da898 100644 --- a/solr/solr-ref-guide/src/understanding-analyzers-tokenizers-and-filters.adoc +++ b/solr/solr-ref-guide/src/understanding-analyzers-tokenizers-and-filters.adoc @@ -25,16 +25,12 @@ The following sections describe how Solr breaks down and works with textual data * <> break field data into lexical units, or _tokens_. * <> examine a stream of tokens and keep them, transform or discard them, or create new ones. Tokenizers and filters may be combined to form pipelines, or _chains_, where the output of one is input to the next. Such a sequence of tokenizers and filters is called an _analyzer_ and the resulting output of an analyzer is used to match query results or build indices. - -[[UnderstandingAnalyzers_Tokenizers_andFilters-UsingAnalyzers_Tokenizers_andFilters]] == Using Analyzers, Tokenizers, and Filters Although the analysis process is used for both indexing and querying, the same analysis process need not be used for both operations. For indexing, you often want to simplify, or normalize, words. For example, setting all letters to lowercase, eliminating punctuation and accents, mapping words to their stems, and so on. Doing so can increase recall because, for example, "ram", "Ram" and "RAM" would all match a query for "ram". To increase query-time precision, a filter could be employed to narrow the matches by, for example, ignoring all-cap acronyms if you're interested in male sheep, but not Random Access Memory. The tokens output by the analysis process define the values, or _terms_, of that field and are used either to build an index of those terms when a new document is added, or to identify which documents contain the terms you are querying for. - -[[UnderstandingAnalyzers_Tokenizers_andFilters-ForMoreInformation]] === For More Information These sections will show you how to configure field analyzers and also serves as a reference for the details of configuring each of the available tokenizer and filter classes. It also serves as a guide so that you can configure your own analysis classes if you have special needs that cannot be met with the included filters or tokenizers. diff --git a/solr/solr-ref-guide/src/update-request-processors.adoc b/solr/solr-ref-guide/src/update-request-processors.adoc index 37cdebbb9bf..a11d74ad34a 100644 --- a/solr/solr-ref-guide/src/update-request-processors.adoc +++ b/solr/solr-ref-guide/src/update-request-processors.adoc @@ -22,8 +22,7 @@ Every update request received by Solr is run through a chain of plugins known as This can be useful, for example, to add a field to the document being indexed; to change the value of a particular field; or to drop an update if the incoming document doesn't fulfill certain criteria. In fact, a surprisingly large number of features in Solr are implemented as Update Processors and therefore it is necessary to understand how such plugins work and where are they configured. -[[UpdateRequestProcessors-AnatomyandLifecycle]] -== Anatomy and Lifecycle +== URP Anatomy and Lifecycle An Update Request Processor is created as part of a {solr-javadocs}/solr-core/org/apache/solr/update/processor/UpdateRequestProcessorChain.html[chain] of one or more update processors. Solr creates a default update request processor chain comprising of a few update request processors which enable essential Solr features. This default chain is used to process every update request unless a user chooses to configure and specify a different custom update request processor chain. @@ -38,14 +37,12 @@ When an update request is received by Solr, it looks up the update chain to be u NOTE: A single update request may contain a batch of multiple new documents or deletes and therefore the corresponding processXXX methods of an UpdateRequestProcessor will be invoked multiple times for every individual update. However, it is guaranteed that a single thread will serially invoke these methods. -[[UpdateRequestProcessors-Configuration]] -== Configuration +== Update Request Processor Configuration Update request processors chains can be created by either creating the whole chain directly in `solrconfig.xml` or by creating individual update processors in `solrconfig.xml` and then dynamically creating the chain at run-time by specifying all processors via request parameters. However, before we understand how to configure update processor chains, we must learn about the default update processor chain because it provides essential features which are needed in most custom request processor chains as well. -[[UpdateRequestProcessors-DefaultUpdateRequestProcessorChain]] === Default Update Request Processor Chain In case no update processor chains are configured in `solrconfig.xml`, Solr will automatically create a default update processor chain which will be used for all update requests. This default update processor chain consists of the following processors (in order): @@ -56,7 +53,6 @@ In case no update processor chains are configured in `solrconfig.xml`, Solr will Each of these perform an essential function and as such any custom chain usually contain all of these processors. The `RunUpdateProcessorFactory` is usually the last update processor in any custom chain. -[[UpdateRequestProcessors-CustomUpdateRequestProcessorChain]] === Custom Update Request Processor Chain The following example demonstrates how a custom chain can be configured inside `solrconfig.xml`. @@ -85,7 +81,6 @@ In the above example, a new update processor chain named "dedupe" is created wit Do not forget to add `RunUpdateProcessorFactory` at the end of any chains you define in `solrconfig.xml`. Otherwise update requests processed by that chain will not actually affect the indexed data. ==== -[[UpdateRequestProcessors-ConfiguringIndividualProcessorsasTop-LevelPlugins]] === Configuring Individual Processors as Top-Level Plugins Update request processors can also be configured independent of a chain in `solrconfig.xml`. @@ -113,7 +108,6 @@ In this case, an instance of `SignatureUpdateProcessorFactory` is configured wit ---- -[[UpdateRequestProcessors-UpdateProcessorsinSolrCloud]] == Update Processors in SolrCloud In a single node, stand-alone Solr, each update is run through all the update processors in a chain exactly once. But the behavior of update request processors in SolrCloud deserves special consideration. @@ -148,20 +142,18 @@ However executing a processor only on the forwarding nodes is a great way of dis .Custom update chain post-processors may never be invoked on a recovering replica [WARNING] ==== -While a replica is in <>, inbound update requests are buffered to the transaction log. After recovery has completed successfully, those buffered update requests are replayed. As of this writing, however, custom update chain post-processors are never invoked for buffered update requests. See https://issues.apache.org/jira/browse/SOLR-8030[SOLR-8030]. To work around this problem until SOLR-8030 has been fixed, *avoid specifying post-processors in custom update chains*. +While a replica is in <>, inbound update requests are buffered to the transaction log. After recovery has completed successfully, those buffered update requests are replayed. As of this writing, however, custom update chain post-processors are never invoked for buffered update requests. See https://issues.apache.org/jira/browse/SOLR-8030[SOLR-8030]. To work around this problem until SOLR-8030 has been fixed, *avoid specifying post-processors in custom update chains*. ==== -=== Atomic Updates +=== Atomic Update Processor Factory If the `AtomicUpdateProcessorFactory` is in the update chain before the `DistributedUpdateProcessor`, the incoming document to the chain will be a partial document. Because `DistributedUpdateProcessor` is responsible for processing <> into full documents on the leader node, this means that pre-processors which are executed only on the forwarding nodes can only operate on the partial document. If you have a processor which must process a full document then the only choice is to specify it as a post-processor. -[[UpdateRequestProcessors-UsingCustomChains]] == Using Custom Chains -[[UpdateRequestProcessors-update.chainRequestParameter]] === update.chain Request Parameter The `update.chain` parameter can be used in any update request to choose a custom chain which has been configured in `solrconfig.xml`. For example, in order to choose the "dedupe" chain described in a previous section, one can issue the following request: @@ -187,7 +179,6 @@ curl "http://localhost:8983/solr/gettingstarted/update/json?update.chain=dedupe& The above should dedupe the two identical documents and index only one of them. -[[UpdateRequestProcessors-Processor_Post-ProcessorRequestParameters]] === Processor & Post-Processor Request Parameters We can dynamically construct a custom update request processor chain using the `processor` and `post-processor` request parameters. Multiple processors can be specified as a comma-separated value for these two parameters. For example: @@ -232,7 +223,6 @@ curl "http://localhost:8983/solr/gettingstarted/update/json?processor=remove_bla In the first example, Solr will dynamically create a chain which has "signature" and "remove_blanks" as pre-processors to be executed only on the forwarding node where as in the second example, "remove_blanks" will be executed as a pre-processor and "signature" will be executed on the leader and replicas as a post-processor. -[[UpdateRequestProcessors-ConfiguringaCustomChainasaDefault]] === Configuring a Custom Chain as a Default We can also specify a custom chain to be used by default for all requests sent to specific update handlers instead of specifying the names in request parameters for each request. @@ -263,12 +253,10 @@ Alternately, one can achieve a similar effect using the "defaults" as shown in t ---- -[[UpdateRequestProcessors-UpdateRequestProcessorFactories]] == Update Request Processor Factories What follows are brief descriptions of the currently available update request processors. An `UpdateRequestProcessorFactory` can be integrated into an update chain in `solrconfig.xml` as necessary. You are strongly urged to examine the Javadocs for these classes; these descriptions are abridged snippets taken for the most part from the Javadocs. -[[UpdateRequestProcessors-GeneralUseUpdateProcessorFactories]] === General Use UpdateProcessorFactories {solr-javadocs}/solr-core/org/apache/solr/update/processor/AddSchemaFieldsUpdateProcessorFactory.html[AddSchemaFieldsUpdateProcessorFactory]:: This processor will dynamically add fields to the schema if an input document contains one or more fields that don't match any field or dynamic field in the schema. @@ -300,7 +288,6 @@ What follows are brief descriptions of the currently available update request pr {solr-javadocs}/solr-core/org/apache/solr/update/processor/UUIDUpdateProcessorFactory.html[UUIDUpdateProcessorFactory]:: An update processor that adds a newly generated UUID value to any document being added that does not already have a value in the specified field. -[[UpdateRequestProcessors-FieldMutatingUpdateProcessorFactoryDerivedFactories]] === FieldMutatingUpdateProcessorFactory Derived Factories These factories all provide functionality to _modify_ fields in a document as they're being indexed. When using any of these factories, please consult the {solr-javadocs}/solr-core/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.html[FieldMutatingUpdateProcessorFactory javadocs] for details on the common options they all support for configuring which fields are modified. @@ -349,7 +336,6 @@ These factories all provide functionality to _modify_ fields in a document as th {solr-javadocs}/solr-core/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.html[UniqFieldsUpdateProcessorFactory]:: Removes duplicate values found in fields matching the specified conditions. -[[UpdateRequestProcessors-UpdateProcessorFactoriesThatCanBeLoadedasPlugins]] === Update Processor Factories That Can Be Loaded as Plugins These processors are included in Solr releases as "contribs", and require additional jars loaded at runtime. See the README files associated with each contrib for details: @@ -364,7 +350,6 @@ The {solr-javadocs}/solr-uima/index.html[`uima`] contrib provides:: {solr-javadocs}/solr-uima/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.html[UIMAUpdateRequestProcessorFactory]::: Update document(s) to be indexed with UIMA extracted information. -[[UpdateRequestProcessors-UpdateProcessorFactoriesYouShouldNotModifyorRemove]] === Update Processor Factories You Should _Not_ Modify or Remove These are listed for completeness, but are part of the Solr infrastructure, particularly SolrCloud. Other than insuring you do _not_ remove them when modifying the update request handlers (or any copies you make), you will rarely, if ever, need to change these. @@ -377,11 +362,9 @@ These are listed for completeness, but are part of the Solr infrastructure, part {solr-javadocs}/solr-core/org/apache/solr/update/processor/RunUpdateProcessorFactory.html[RunUpdateProcessorFactory]:: Executes the update commands using the underlying UpdateHandler. Almost all processor chains should end with an instance of `RunUpdateProcessorFactory` unless the user is explicitly executing the update commands in an alternative custom `UpdateRequestProcessorFactory`. -[[UpdateRequestProcessors-UpdateProcessorsThatCanBeUsedatRuntime]] === Update Processors That Can Be Used at Runtime These Update processors do not need any configuration is your `solrconfig.xml` . They are automatically initialized when their name is added to the `processor` parameter. Multiple processors can be used by appending multiple processor names (comma separated) -[[UpdateRequestProcessors-TemplateUpdateProcessorFactory]] ==== TemplateUpdateProcessorFactory The `TemplateUpdateProcessorFactory` can be used to add new fields to documents based on a template pattern. diff --git a/solr/solr-ref-guide/src/updatehandlers-in-solrconfig.adoc b/solr/solr-ref-guide/src/updatehandlers-in-solrconfig.adoc index 040da8626db..43314574bbb 100644 --- a/solr/solr-ref-guide/src/updatehandlers-in-solrconfig.adoc +++ b/solr/solr-ref-guide/src/updatehandlers-in-solrconfig.adoc @@ -27,12 +27,10 @@ The settings in this section are configured in the `` element in ---- -[[UpdateHandlersinSolrConfig-Commits]] == Commits Data sent to Solr is not searchable until it has been _committed_ to the index. The reason for this is that in some cases commits can be slow and they should be done in isolation from other possible commit requests to avoid overwriting data. So, it's preferable to provide control over when data is committed. Several options are available to control the timing of commits. -[[UpdateHandlersinSolrConfig-commitandsoftCommit]] === commit and softCommit In Solr, a `commit` is an action which asks Solr to "commit" those changes to the Lucene index files. By default commit actions result in a "hard commit" of all the Lucene index files to stable storage (disk). When a client includes a `commit=true` parameter with an update request, this ensures that all index segments affected by the adds & deletes on an update are written to disk as soon as index updates are completed. @@ -41,7 +39,6 @@ If an additional flag `softCommit=true` is specified, then Solr performs a 'soft For more information about Near Real Time operations, see <>. -[[UpdateHandlersinSolrConfig-autoCommit]] === autoCommit These settings control how often pending updates will be automatically pushed to the index. An alternative to `autoCommit` is to use `commitWithin`, which can be defined when making the update request to Solr (i.e., when pushing documents), or in an update RequestHandler. @@ -77,7 +74,6 @@ You can also specify 'soft' autoCommits in the same way that you can specify 'so ---- -[[UpdateHandlersinSolrConfig-commitWithin]] === commitWithin The `commitWithin` settings allow forcing document commits to happen in a defined time period. This is used most frequently with <>, and for that reason the default is to perform a soft commit. This does not, however, replicate new documents to slave servers in a master/slave environment. If that's a requirement for your implementation, you can force a hard commit by adding a parameter, as in this example: @@ -91,7 +87,6 @@ The `commitWithin` settings allow forcing document commits to happen in a define With this configuration, when you call `commitWithin` as part of your update message, it will automatically perform a hard commit every time. -[[UpdateHandlersinSolrConfig-EventListeners]] == Event Listeners The UpdateHandler section is also where update-related event listeners can be configured. These can be triggered to occur after any commit (`event="postCommit"`) or only after optimize commands (`event="postOptimize"`). @@ -113,7 +108,6 @@ Any arguments to pass to the program. The default is none. `env`:: Any environment variables to set. The default is none. -[[UpdateHandlersinSolrConfig-TransactionLog]] == Transaction Log As described in the section <>, a transaction log is required for that feature. It is configured in the `updateHandler` section of `solrconfig.xml`. @@ -127,7 +121,7 @@ Realtime Get currently relies on the update log feature, which is enabled by def ---- -Three additional expert-level configuration settings affect indexing performance and how far a replica can fall behind on updates before it must enter into full recovery - see the section on <> for more information: +Three additional expert-level configuration settings affect indexing performance and how far a replica can fall behind on updates before it must enter into full recovery - see the section on <> for more information: `numRecordsToKeep`:: The number of update records to keep per log. The default is `100`. diff --git a/solr/solr-ref-guide/src/updating-parts-of-documents.adoc b/solr/solr-ref-guide/src/updating-parts-of-documents.adoc index 5ff8a2827ee..e6b51753734 100644 --- a/solr/solr-ref-guide/src/updating-parts-of-documents.adoc +++ b/solr/solr-ref-guide/src/updating-parts-of-documents.adoc @@ -20,15 +20,14 @@ Once you have indexed the content you need in your Solr index, you will want to start thinking about your strategy for dealing with changes to those documents. Solr supports three approaches to updating documents that have only partially changed. -The first is __<>__. This approach allows changing only one or more fields of a document without having to re-index the entire document. +The first is _<>_. This approach allows changing only one or more fields of a document without having to re-index the entire document. -The second approach is known as __<>__. This approach is similar to atomic updates (is a subset of atomic updates in some sense), but can be used only for updating single valued non-indexed and non-stored docValue-based numeric fields. +The second approach is known as _<>_. This approach is similar to atomic updates (is a subset of atomic updates in some sense), but can be used only for updating single valued non-indexed and non-stored docValue-based numeric fields. -The third approach is known as _<>_ or __optimistic locking__. It is a feature of many NoSQL databases, and allows conditional updating a document based on its version. This approach includes semantics and rules for how to deal with version matches or mis-matches. +The third approach is known as _<>_ or _optimistic locking_. It is a feature of many NoSQL databases, and allows conditional updating a document based on its version. This approach includes semantics and rules for how to deal with version matches or mis-matches. Atomic Updates (and in-place updates) and Optimistic Concurrency may be used as independent strategies for managing changes to documents, or they may be combined: you can use optimistic concurrency to conditionally apply an atomic update. -[[UpdatingPartsofDocuments-AtomicUpdates]] == Atomic Updates Solr supports several modifiers that atomically update values of a document. This allows updating only specific fields, which can help speed indexing processes in an environment where speed of index additions is critical to the application. @@ -52,7 +51,6 @@ Removes all occurrences of the specified regex from a multiValued field. May be `inc`:: Increments a numeric value by a specific amount. Must be specified as a single numeric value. -[[UpdatingPartsofDocuments-FieldStorage]] === Field Storage The core functionality of atomically updating a document requires that all fields in your schema must be configured as stored (`stored="true"`) or docValues (`docValues="true"`) except for fields which are `` destinations, which must be configured as `stored="false"`. Atomic updates are applied to the document represented by the existing stored field values. All data in copyField destinations fields must originate from ONLY copyField sources. @@ -61,8 +59,7 @@ If `` destinations are configured as stored, then Solr will attempt There are other kinds of derived fields that must also be set so they aren't stored. Some spatial field types use derived fields. Examples of this are solr.BBoxField and solr.LatLonType. CurrencyFieldType also uses derived fields. These types create additional fields which are normally specified by a dynamic field definition. That dynamic field definition must be not stored, or indexing will fail. -[[UpdatingPartsofDocuments-Example]] -=== Example +=== Example Updating Part of a Document If the following document exists in our collection: @@ -102,7 +99,6 @@ The resulting document in our collection will be: } ---- -[[UpdatingPartsofDocuments-In-PlaceUpdates]] == In-Place Updates In-place updates are very similar to atomic updates; in some sense, this is a subset of atomic updates. In regular atomic updates, the entire document is re-indexed internally during the application of the update. However, in this approach, only the fields to be updated are affected and the rest of the documents are not re-indexed internally. Hence, the efficiency of updating in-place is unaffected by the size of the documents that are updated (i.e., number of fields, size of fields, etc.). Apart from these internal differences, there is no functional difference between atomic updates and in-place updates. @@ -121,8 +117,7 @@ Set or replace the field value(s) with the specified value(s). May be specified `inc`:: Increments a numeric value by a specific amount. Must be specified as a single numeric value. -[[UpdatingPartsofDocuments-Example.1]] -=== Example +=== In-Place Update Example If the price and popularity fields are defined in the schema as: @@ -169,17 +164,16 @@ The resulting document in our collection will be: } ---- -[[UpdatingPartsofDocuments-OptimisticConcurrency]] == Optimistic Concurrency Optimistic Concurrency is a feature of Solr that can be used by client applications which update/replace documents to ensure that the document they are replacing/updating has not been concurrently modified by another client application. This feature works by requiring a `\_version_` field on all documents in the index, and comparing that to a `\_version_` specified as part of the update command. By default, Solr's Schema includes a `\_version_` field, and this field is automatically added to each new document. In general, using optimistic concurrency involves the following work flow: -1. A client reads a document. In Solr, one might retrieve the document with the `/get` handler to be sure to have the latest version. -2. A client changes the document locally. -3. The client resubmits the changed document to Solr, for example, perhaps with the `/update` handler. -4. If there is a version conflict (HTTP error code 409), the client starts the process over. +. A client reads a document. In Solr, one might retrieve the document with the `/get` handler to be sure to have the latest version. +. A client changes the document locally. +. The client resubmits the changed document to Solr, for example, perhaps with the `/update` handler. +. If there is a version conflict (HTTP error code 409), the client starts the process over. When the client resubmits a changed document to Solr, the `\_version_` can be included with the update to invoke optimistic concurrency control. Specific semantics are used to define when the document should be updated or when to report a conflict. @@ -233,7 +227,6 @@ $ curl 'http://localhost:8983/solr/techproducts/query?q=*:*&fl=id,_version_' For more information, please also see https://www.youtube.com/watch?v=WYVM6Wz-XTw[Yonik Seeley's presentation on NoSQL features in Solr 4] from Apache Lucene EuroCon 2012. -[[UpdatingPartsofDocuments-DocumentCentricVersioningConstraints]] == Document Centric Versioning Constraints Optimistic Concurrency is extremely powerful, and works very efficiently because it uses an internally assigned, globally unique values for the `\_version_` field. However, In some situations users may want to configure their own document specific version field, where the version values are assigned on a per-document basis by an external system, and have Solr reject updates that attempt to replace a document with an "older" version. In situations like this the {solr-javadocs}/solr-core/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessorFactory.html[`DocBasedVersionConstraintsProcessorFactory`] can be useful. @@ -252,9 +245,7 @@ Once configured, this update processor will reject (HTTP error code 409) any att .versionField vs `\_version_` [IMPORTANT] ==== - The `\_version_` field used by Solr for its normal optimistic concurrency also has important semantics in how updates are distributed to replicas in SolrCloud, and *MUST* be assigned internally by Solr. Users can not re-purpose that field and specify it as the `versionField` for use in the `DocBasedVersionConstraintsProcessorFactory` configuration. - ==== `DocBasedVersionConstraintsProcessorFactory` supports two additional configuration params which are optional: diff --git a/solr/solr-ref-guide/src/upgrading-a-solr-cluster.adoc b/solr/solr-ref-guide/src/upgrading-a-solr-cluster.adoc index 00b825adf36..24a7ac926e2 100644 --- a/solr/solr-ref-guide/src/upgrading-a-solr-cluster.adoc +++ b/solr/solr-ref-guide/src/upgrading-a-solr-cluster.adoc @@ -28,7 +28,6 @@ The steps outlined on this page assume you use the default service name of "```s ==== -[[UpgradingaSolrCluster-PlanningYourUpgrade]] == Planning Your Upgrade Here is a checklist of things you need to prepare before starting the upgrade process: @@ -49,19 +48,16 @@ If you are upgrading from an installation of Solr 5.x or later, these values can You should now be ready to upgrade your cluster. Please verify this process in a test / staging cluster before doing it in production. -[[UpgradingaSolrCluster-UpgradeProcess]] == Upgrade Process The approach we recommend is to perform the upgrade of each Solr node, one-by-one. In other words, you will need to stop a node, upgrade it to the new version of Solr, and restart it before moving on to the next node. This means that for a short period of time, there will be a mix of "Old Solr" and "New Solr" nodes running in your cluster. We also assume that you will point the new Solr node to your existing Solr home directory where the Lucene index files are managed for each collection on the node. This means that you won't need to move any index files around to perform the upgrade. -[[UpgradingaSolrCluster-Step1_StopSolr]] === Step 1: Stop Solr Begin by stopping the Solr node you want to upgrade. After stopping the node, if using a replication, (ie: collections with replicationFactor > 1) verify that all leaders hosted on the downed node have successfully migrated to other replicas; you can do this by visiting the <>. If not using replication, then any collections with shards hosted on the downed node will be temporarily off-line. -[[UpgradingaSolrCluster-Step2_InstallSolrasaService]] === Step 2: Install Solr as a Service Please follow the instructions to install Solr as a Service on Linux documented at <>. Use the `-n` parameter to avoid automatic start of Solr by the installer script. You need to update the `/etc/default/solr.in.sh` include file in the next step to complete the upgrade process. @@ -74,7 +70,6 @@ If you have a `/var/solr/solr.in.sh` file for your existing Solr install, runnin ==== -[[UpgradingaSolrCluster-Step3_SetEnvironmentVariableOverrides]] === Step 3: Set Environment Variable Overrides Open `/etc/default/solr.in.sh` with a text editor and verify that the following variables are set correctly, or add them bottom of the include file as needed: @@ -84,13 +79,10 @@ Open `/etc/default/solr.in.sh` with a text editor and verify that the following Make sure the user you plan to own the Solr process is the owner of the `SOLR_HOME` directory. For instance, if you plan to run Solr as the "solr" user and `SOLR_HOME` is `/var/solr/data`, then you would do: `sudo chown -R solr: /var/solr/data` -[[UpgradingaSolrCluster-Step4_StartSolr]] === Step 4: Start Solr You are now ready to start the upgraded Solr node by doing: `sudo service solr start`. The upgraded instance will join the existing cluster because you're using the same `SOLR_HOME`, `SOLR_PORT`, and `SOLR_HOST` settings used by the old Solr node; thus, the new server will look like the old node to the running cluster. Be sure to look in `/var/solr/logs/solr.log` for errors during startup. - -[[UpgradingaSolrCluster-Step5_RunHealthcheck]] === Step 5: Run Healthcheck You should run the Solr *healthcheck* command for all collections that are hosted on the upgraded node before proceeding to upgrade the next node in your cluster. For instance, if the newly upgraded node hosts a replica for the *MyDocuments* collection, then you can run the following command (replace ZK_HOST with the ZooKeeper connection string): diff --git a/solr/solr-ref-guide/src/upgrading-solr.adoc b/solr/solr-ref-guide/src/upgrading-solr.adoc index e41b93b6bc6..a1db074f31f 100644 --- a/solr/solr-ref-guide/src/upgrading-solr.adoc +++ b/solr/solr-ref-guide/src/upgrading-solr.adoc @@ -20,7 +20,6 @@ If you are already using Solr 6.5, Solr 6.6 should not present any major problems. However, you should review the {solr-javadocs}/changes/Changes.html[`CHANGES.txt`] file found in your Solr package for changes and updates that may effect your existing implementation. Detailed steps for upgrading a Solr cluster can be found in the appendix: <>. -[[UpgradingSolr-Upgradingfrom6.5.x]] == Upgrading from 6.5.x * Solr contribs map-reduce, morphlines-core and morphlines-cell have been removed. @@ -29,7 +28,6 @@ If you are already using Solr 6.5, Solr 6.6 should not present any major problem * ZooKeeper dependency has been upgraded from 3.4.6 to 3.4.10. -[[UpgradingSolr-Upgradingfromearlier6.xversions]] == Upgrading from earlier 6.x versions * If you use historical dates, specifically on or before the year 1582, you should re-index after upgrading to this version. @@ -47,12 +45,11 @@ If you are already using Solr 6.5, Solr 6.6 should not present any major problem ** The metrics "avgRequestsPerMinute", "5minRateRequestsPerMinute" and "15minRateRequestsPerMinute" have been replaced by corresponding per-second rates viz. "avgRequestsPerSecond", "5minRateRequestsPerSecond" and "15minRateRequestsPerSecond" for consistency with stats output in other parts of Solr. * A new highlighter named UnifiedHighlighter has been added. You are encouraged to try out the UnifiedHighlighter by setting `hl.method=unified` and report feedback. It might become the default in 7.0. It's more efficient/faster than the other highlighters, especially compared to the original Highlighter. That said, some options aren't supported yet. It will get more features in time, especially with your input. See HighlightParams.java for a listing of highlight parameters annotated with which highlighters use them. `hl.useFastVectorHighlighter` is now considered deprecated in lieu of `hl.method=fastVector`. * The <> now defaults to 1, and more importantly commits will now block if this limit is exceeded instead of throwing an exception (a good thing). Consequently there is no longer a risk in overlapping commits. Nonetheless users should continue to avoid excessive committing. Users are advised to remove any pre-existing maxWarmingSearchers entries from their solrconfig.xml files. -* The <> now supports leading wildcards. Beware of its possible heaviness, users are encouraged to use ReversedWildcardFilter in index time analysis. +* The <> now supports leading wildcards. Beware of its possible heaviness, users are encouraged to use ReversedWildcardFilter in index time analysis. * The JMX metric "avgTimePerRequest" (and the corresponding metric in the metrics API for each handler) used to be a simple non-decaying average based on total cumulative time and the number of requests. New Codahale Metrics implementation applies exponential decay to this value, which heavily biases the average towards the last 5 minutes. * Index-time boosts are now deprecated. As a replacement, index-time scoring factors should be indexed in a separate field and combined with the query score using a function query. These boosts will be removed in Solr 7.0. * Parallel SQL now uses Apache Calcite as its SQL framework. As part of this change the default aggregation mode has been changed to facet rather than map_reduce. There have also been changes to the SQL aggregate response and some SQL syntax changes. Consult the <> documentation for full details. -[[UpgradingSolr-Upgradingfrom5.5.x]] == Upgrading from 5.5.x * The deprecated `SolrServer` and subclasses have been removed, use <> instead. @@ -60,7 +57,7 @@ If you are already using Solr 6.5, Solr 6.6 should not present any major problem * `SolrClient.shutdown()` has been removed, use {solr-javadocs}/solr-solrj/org/apache/solr/client/solrj/SolrClient.html[`SolrClient.close()`] instead. * The deprecated `zkCredientialsProvider` element in `solrcloud` section of `solr.xml` is now removed. Use the correct spelling (<>) instead. * Internal/expert - `ResultContext` was significantly changed and expanded to allow for multiple full query results (`DocLists`) per Solr request. `TransformContext` was rendered redundant and was removed. See https://issues.apache.org/jira/browse/SOLR-7957[SOLR-7957] for details. -* Several changes have been made regarding the "<>" used in Solr, in order to provide better default behavior for new users. There are 3 key impacts of these changes on existing users who upgrade: +* Several changes have been made regarding the "<>" used in Solr, in order to provide better default behavior for new users. There are 3 key impacts of these changes on existing users who upgrade: ** `DefaultSimilarityFactory` has been removed. If you currently have `DefaultSimilarityFactory` explicitly referenced in your `schema.xml`, edit your config to use the functionally identical `ClassicSimilarityFactory`. See https://issues.apache.org/jira/browse/SOLR-8239[SOLR-8239] for more details. ** The implicit default Similarity used when no `` is configured in `schema.xml` has been changed to `SchemaSimilarityFactory`. Users who wish to preserve back-compatible behavior should either explicitly configure `ClassicSimilarityFactory`, or ensure that the `luceneMatchVersion` for the collection is less then 6.0. See https://issues.apache.org/jira/browse/SOLR-8270[SOLR-8270] + http://SOLR-8271[SOLR-8271] for details. ** `SchemaSimilarityFactory` has been modified to use `BM25Similarity` as the default for `fieldTypes` that do not explicitly declare a Similarity. The legacy behavior of using `ClassicSimilarity` as the default will occur if the `luceneMatchVersion` for the collection is less then 6.0, or the `'defaultSimFromFieldType'` configuration option may be used to specify any default of your choosing. See https://issues.apache.org/jira/browse/SOLR-8261[SOLR-8261] + https://issues.apache.org/jira/browse/SOLR-8329[SOLR-8329] for more details. @@ -74,7 +71,6 @@ If you are already using Solr 6.5, Solr 6.6 should not present any major problem * <> no longer includes `DateUtil`. If for some reason you need to format or parse dates, simply use `Instant.format()` and `Instant.parse()`. * If you are using spatial4j, please upgrade to 0.6 and <> to replace `com.spatial4j.core` with `org.locationtech.spatial4j` . -[[UpgradingSolr-UpgradingfromOlderVersionsofSolr]] == Upgrading from Older Versions of Solr Users upgrading from older versions are strongly encouraged to consult {solr-javadocs}/changes/Changes.html[`CHANGES.txt`] for the details of _all_ changes since the version they are upgrading from. diff --git a/solr/solr-ref-guide/src/uploading-data-with-index-handlers.adoc b/solr/solr-ref-guide/src/uploading-data-with-index-handlers.adoc index 6a8ad9967c7..ff59d617b27 100644 --- a/solr/solr-ref-guide/src/uploading-data-with-index-handlers.adoc +++ b/solr/solr-ref-guide/src/uploading-data-with-index-handlers.adoc @@ -25,7 +25,6 @@ The recommended way to configure and use request handlers is with path based nam A single unified update request handler supports XML, CSV, JSON, and javabin update requests, delegating to the appropriate `ContentStreamLoader` based on the `Content-Type` of the <>. -[[UploadingDatawithIndexHandlers-UpdateRequestHandlerConfiguration]] == UpdateRequestHandler Configuration The default configuration file has the update request handler configured by default. @@ -35,12 +34,10 @@ The default configuration file has the update request handler configured by defa ---- -[[UploadingDatawithIndexHandlers-XMLFormattedIndexUpdates]] == XML Formatted Index Updates Index update commands can be sent as XML message to the update handler using `Content-type: application/xml` or `Content-type: text/xml`. -[[UploadingDatawithIndexHandlers-AddingDocuments]] === Adding Documents The XML schema recognized by the update handler for adding documents is very straightforward: @@ -84,11 +81,9 @@ If the document schema defines a unique key, then by default an `/update` operat If you have a unique key field, but you feel confident that you can safely bypass the uniqueness check (e.g., you build your indexes in batch, and your indexing code guarantees it never adds the same document more than once) you can specify the `overwrite="false"` option when adding your documents. -[[UploadingDatawithIndexHandlers-XMLUpdateCommands]] === XML Update Commands -[[UploadingDatawithIndexHandlers-CommitandOptimizeOperations]] -==== Commit and Optimize Operations +==== Commit and Optimize During Updates The `` operation writes all documents loaded since the last commit to one or more segment files on the disk. Before a commit has been issued, newly indexed content is not visible to searches. The commit operation opens a new searcher, and triggers any event listeners that have been configured. @@ -114,7 +109,6 @@ Here are examples of and using optional attributes: ---- -[[UploadingDatawithIndexHandlers-DeleteOperations]] ==== Delete Operations Documents can be deleted from the index in two ways. "Delete by ID" deletes the document with the specified ID, and can be used only if a UniqueID field has been defined in the schema. "Delete by Query" deletes all documents matching a specified query, although `commitWithin` is ignored for a Delete by Query. A single delete message can contain multiple delete operations. @@ -136,12 +130,10 @@ When using the Join query parser in a Delete By Query, you should use the `score ==== -[[UploadingDatawithIndexHandlers-RollbackOperations]] ==== Rollback Operations The rollback command rolls back all add and deletes made to the index since the last commit. It neither calls any event listeners nor creates a new searcher. Its syntax is simple: ``. -[[UploadingDatawithIndexHandlers-UsingcurltoPerformUpdates]] === Using curl to Perform Updates You can use the `curl` utility to perform any of the above commands, using its `--data-binary` option to append the XML message to the `curl` command, and generating a HTTP POST request. For example: @@ -168,7 +160,7 @@ For posting XML messages contained in a file, you can use the alternative form: curl http://localhost:8983/solr/my_collection/update -H "Content-Type: text/xml" --data-binary @myfile.xml ---- -Short requests can also be sent using a HTTP GET command, if enabled in <> element, URL-encoding the request, as in the following. Note the escaping of "<" and ">": +Short requests can also be sent using a HTTP GET command, if enabled in <> element, URL-encoding the request, as in the following. Note the escaping of "<" and ">": [source,bash] ---- @@ -189,7 +181,6 @@ Responses from Solr take the form shown here: The status field will be non-zero in case of failure. -[[UploadingDatawithIndexHandlers-UsingXSLTtoTransformXMLIndexUpdates]] === Using XSLT to Transform XML Index Updates The UpdateRequestHandler allows you to index any arbitrary XML using the `` parameter to apply an https://en.wikipedia.org/wiki/XSLT[XSL transformation]. You must have an XSLT stylesheet in the `conf/xslt` directory of your <> that can transform the incoming data to the expected `` format, and use the `tr` parameter to specify the name of that stylesheet. @@ -250,23 +241,20 @@ You can also use the stylesheet in `XsltUpdateRequestHandler` to transform an in curl "http://localhost:8983/solr/my_collection/update?commit=true&tr=updateXml.xsl" -H "Content-Type: text/xml" --data-binary @myexporteddata.xml ---- -[[UploadingDatawithIndexHandlers-JSONFormattedIndexUpdates]] == JSON Formatted Index Updates Solr can accept JSON that conforms to a defined structure, or can accept arbitrary JSON-formatted documents. If sending arbitrarily formatted JSON, there are some additional parameters that need to be sent with the update request, described below in the section <>. -[[UploadingDatawithIndexHandlers-Solr-StyleJSON]] === Solr-Style JSON JSON formatted update requests may be sent to Solr's `/update` handler using `Content-Type: application/json` or `Content-Type: text/json`. JSON formatted updates can take 3 basic forms, described in depth below: -* <>, expressed as a top level JSON Object. To differentiate this from a set of commands, the `json.command=false` request parameter is required. -* <>, expressed as a top level JSON Array containing a JSON Object per document. -* <>, expressed as a top level JSON Object (aka: Map). +* <>, expressed as a top level JSON Object. To differentiate this from a set of commands, the `json.command=false` request parameter is required. +* <>, expressed as a top level JSON Array containing a JSON Object per document. +* <>, expressed as a top level JSON Object (aka: Map). -[[UploadingDatawithIndexHandlers-AddingaSingleJSONDocument]] ==== Adding a Single JSON Document The simplest way to add Documents via JSON is to send each document individually as a JSON Object, using the `/update/json/docs` path: @@ -280,7 +268,6 @@ curl -X POST -H 'Content-Type: application/json' 'http://localhost:8983/solr/my_ }' ---- -[[UploadingDatawithIndexHandlers-AddingMultipleJSONDocuments]] ==== Adding Multiple JSON Documents Adding multiple documents at one time via JSON can be done via a JSON Array of JSON Objects, where each object represents a document: @@ -307,7 +294,6 @@ A sample JSON file is provided at `example/exampledocs/books.json` and contains curl 'http://localhost:8983/solr/techproducts/update?commit=true' --data-binary @example/exampledocs/books.json -H 'Content-type:application/json' ---- -[[UploadingDatawithIndexHandlers-SendingJSONUpdateCommands]] ==== Sending JSON Update Commands In general, the JSON update syntax supports all of the update commands that the XML update handler supports, through a straightforward mapping. Multiple commands, adding and deleting documents, may be contained in one message: @@ -377,7 +363,6 @@ You can also specify `\_version_` with each "delete": You can specify the version of deletes in the body of the update request as well. -[[UploadingDatawithIndexHandlers-JSONUpdateConveniencePaths]] === JSON Update Convenience Paths In addition to the `/update` handler, there are a few additional JSON specific request handler paths available by default in Solr, that implicitly override the behavior of some request parameters: @@ -395,13 +380,11 @@ In addition to the `/update` handler, there are a few additional JSON specific r The `/update/json` path may be useful for clients sending in JSON formatted update commands from applications where setting the Content-Type proves difficult, while the `/update/json/docs` path can be particularly convenient for clients that always want to send in documents – either individually or as a list – without needing to worry about the full JSON command syntax. -[[UploadingDatawithIndexHandlers-CustomJSONDocuments]] === Custom JSON Documents Solr can support custom JSON. This is covered in the section <>. -[[UploadingDatawithIndexHandlers-CSVFormattedIndexUpdates]] == CSV Formatted Index Updates CSV formatted update requests may be sent to Solr's `/update` handler using `Content-Type: application/csv` or `Content-Type: text/csv`. @@ -413,7 +396,6 @@ A sample CSV file is provided at `example/exampledocs/books.csv` that you can us curl 'http://localhost:8983/solr/my_collection/update?commit=true' --data-binary @example/exampledocs/books.csv -H 'Content-type:application/csv' ---- -[[UploadingDatawithIndexHandlers-CSVUpdateParameters]] === CSV Update Parameters The CSV handler allows the specification of many parameters in the URL in the form: `f._parameter_._optional_fieldname_=_value_` . @@ -498,7 +480,6 @@ Add the given offset (as an integer) to the `rowid` before adding it to the docu + Example: `rowidOffset=10` -[[UploadingDatawithIndexHandlers-IndexingTab-Delimitedfiles]] === Indexing Tab-Delimited files The same feature used to index CSV documents can also be easily used to index tab-delimited files (TSV files) and even handle backslash escaping rather than CSV encapsulation. @@ -517,7 +498,6 @@ This file could then be imported into Solr by setting the `separator` to tab (%0 curl 'http://localhost:8983/solr/my_collection/update/csv?commit=true&separator=%09&escape=%5c' --data-binary @/tmp/result.txt ---- -[[UploadingDatawithIndexHandlers-CSVUpdateConveniencePaths]] === CSV Update Convenience Paths In addition to the `/update` handler, there is an additional CSV specific request handler path available by default in Solr, that implicitly override the behavior of some request parameters: @@ -530,16 +510,14 @@ In addition to the `/update` handler, there is an additional CSV specific reques The `/update/csv` path may be useful for clients sending in CSV formatted update commands from applications where setting the Content-Type proves difficult. -[[UploadingDatawithIndexHandlers-NestedChildDocuments]] == Nested Child Documents -Solr indexes nested documents in blocks as a way to model documents containing other documents, such as a blog post parent document and comments as child documents -- or products as parent documents and sizes, colors, or other variations as child documents. At query time, the <> can search these relationships. In terms of performance, indexing the relationships between documents may be more efficient than attempting to do joins only at query time, since the relationships are already stored in the index and do not need to be computed. +Solr indexes nested documents in blocks as a way to model documents containing other documents, such as a blog post parent document and comments as child documents -- or products as parent documents and sizes, colors, or other variations as child documents. At query time, the <> can search these relationships. In terms of performance, indexing the relationships between documents may be more efficient than attempting to do joins only at query time, since the relationships are already stored in the index and do not need to be computed. -Nested documents may be indexed via either the XML or JSON data syntax (or using <> - but regardless of syntax, you must include a field that identifies the parent document as a parent; it can be any field that suits this purpose, and it will be used as input for the <>. +Nested documents may be indexed via either the XML or JSON data syntax (or using <> - but regardless of syntax, you must include a field that identifies the parent document as a parent; it can be any field that suits this purpose, and it will be used as input for the <>. To support nested documents, the schema must include an indexed/non-stored field `\_root_`. The value of that field is populated automatically and is the same for all documents in the block, regardless of the inheritance depth. -[[UploadingDatawithIndexHandlers-XMLExamples]] === XML Examples For example, here are two documents and their child documents: @@ -570,7 +548,6 @@ For example, here are two documents and their child documents: In this example, we have indexed the parent documents with the field `content_type`, which has the value "parentDocument". We could have also used a boolean field, such as `isParent`, with a value of "true", or any other similar approach. -[[UploadingDatawithIndexHandlers-JSONExamples]] === JSON Examples This example is equivalent to the XML example above, note the special `\_childDocuments_` key need to indicate the nested documents in JSON. diff --git a/solr/solr-ref-guide/src/uploading-data-with-solr-cell-using-apache-tika.adoc b/solr/solr-ref-guide/src/uploading-data-with-solr-cell-using-apache-tika.adoc index cdd9539f62a..1489d16d29e 100644 --- a/solr/solr-ref-guide/src/uploading-data-with-solr-cell-using-apache-tika.adoc +++ b/solr/solr-ref-guide/src/uploading-data-with-solr-cell-using-apache-tika.adoc @@ -26,8 +26,7 @@ If you want to supply your own `ContentHandler` for Solr to use, you can extend For more information on Solr's Extracting Request Handler, see https://wiki.apache.org/solr/ExtractingRequestHandler. -[[UploadingDatawithSolrCellusingApacheTika-KeyConcepts]] -== Key Concepts +== Key Solr Cell Concepts When using the Solr Cell framework, it is helpful to keep the following in mind: @@ -42,12 +41,9 @@ When using the Solr Cell framework, it is helpful to keep the following in mind: [TIP] ==== - While Apache Tika is quite powerful, it is not perfect and fails on some files. PDF files are particularly problematic, mostly due to the PDF format itself. In case of a failure processing any file, the `ExtractingRequestHandler` does not have a secondary mechanism to try to extract some text from the file; it will throw an exception and fail. - ==== -[[UploadingDatawithSolrCellusingApacheTika-TryingoutTikawiththeSolrtechproductsExample]] == Trying out Tika with the Solr techproducts Example You can try out the Tika framework using the `techproducts` example included in Solr. @@ -96,8 +92,7 @@ In this command, the `uprefix=attr_` parameter causes all generated fields that This command allows you to query the document using an attribute, as in: `\http://localhost:8983/solr/techproducts/select?q=attr_meta:microsoft`. -[[UploadingDatawithSolrCellusingApacheTika-InputParameters]] -== Input Parameters +== Solr Cell Input Parameters The table below describes the parameters accepted by the Extracting Request Handler. @@ -158,8 +153,6 @@ Prefixes all fields that are not defined in the schema with the given prefix. Th `xpath`:: When extracting, only return Tika XHTML content that satisfies the given XPath expression. See http://tika.apache.org/1.7/index.html for details on the format of Tika XHTML. See also http://wiki.apache.org/solr/TikaExtractOnlyExampleOutput. - -[[UploadingDatawithSolrCellusingApacheTika-OrderofOperations]] == Order of Operations Here is the order in which the Solr Cell framework, using the Extracting Request Handler and Tika, processes its input. @@ -169,7 +162,6 @@ Here is the order in which the Solr Cell framework, using the Extracting Request . Tika applies the mapping rules specified by `fmap.__source__=__target__` parameters. . If `uprefix` is specified, any unknown field names are prefixed with that value, else if `defaultField` is specified, any unknown fields are copied to the default field. -[[UploadingDatawithSolrCellusingApacheTika-ConfiguringtheSolrExtractingRequestHandler]] == Configuring the Solr ExtractingRequestHandler If you are not working with the supplied `sample_techproducts_configs` or `_default` <>, you must configure your own `solrconfig.xml` to know about the Jar's containing the `ExtractingRequestHandler` and its dependencies: @@ -216,7 +208,6 @@ The `tika.config` entry points to a file containing a Tika configuration. The `d * `EEEE, dd-MMM-yy HH:mm:ss zzz` * `EEE MMM d HH:mm:ss yyyy` -[[UploadingDatawithSolrCellusingApacheTika-Parserspecificproperties]] === Parser-Specific Properties Parsers used by Tika may have specific properties to govern how data is extracted. For instance, when using the Tika library from a Java program, the PDFParserConfig class has a method setSortByPosition(boolean) that can extract vertically oriented text. To access that method via configuration with the ExtractingRequestHandler, one can add the parseContext.config property to the solrconfig.xml file (see above) and then set properties in Tika's PDFParserConfig as below. Consult the Tika Java API documentation for configuration parameters that can be set for any particular parsers that require this level of control. @@ -232,14 +223,12 @@ Parsers used by Tika may have specific properties to govern how data is extracte ---- -[[UploadingDatawithSolrCellusingApacheTika-Multi-CoreConfiguration]] === Multi-Core Configuration For a multi-core configuration, you can specify `sharedLib='lib'` in the `` section of `solr.xml` and place the necessary jar files there. For more information about Solr cores, see <>. -[[UploadingDatawithSolrCellusingApacheTika-IndexingEncryptedDocumentswiththeExtractingUpdateRequestHandler]] == Indexing Encrypted Documents with the ExtractingUpdateRequestHandler The ExtractingRequestHandler will decrypt encrypted files and index their content if you supply a password in either `resource.password` on the request, or in a `passwordsFile` file. @@ -254,11 +243,9 @@ myFileName = myPassword .*\.pdf$ = myPdfPassword ---- -[[UploadingDatawithSolrCellusingApacheTika-Examples]] -== Examples +== Solr Cell Examples -[[UploadingDatawithSolrCellusingApacheTika-Metadata]] -=== Metadata +=== Metadata Created by Tika As mentioned before, Tika produces metadata about the document. Metadata describes different aspects of a document, such as the author's name, the number of pages, the file size, and so on. The metadata produced depends on the type of document submitted. For instance, PDFs have different metadata than Word documents do. @@ -277,17 +264,10 @@ The size of the stream in bytes. The content type of the stream, if available. -[IMPORTANT] -==== +IMPORTANT: We recommend that you try using the `extractOnly` option to discover which values Solr is setting for these metadata elements. -We recommend that you try using the `extractOnly` option to discover which values Solr is setting for these metadata elements. - -==== - -[[UploadingDatawithSolrCellusingApacheTika-ExamplesofUploadsUsingtheExtractingRequestHandler]] === Examples of Uploads Using the Extracting Request Handler -[[UploadingDatawithSolrCellusingApacheTika-CaptureandMapping]] ==== Capture and Mapping The command below captures `
` tags separately, and then maps all the instances of that field to a dynamic field named `foo_t`. @@ -297,18 +277,6 @@ The command below captures `
` tags separately, and then maps all the instan bin/post -c techproducts example/exampledocs/sample.html -params "literal.id=doc2&captureAttr=true&defaultField=_text_&fmap.div=foo_t&capture=div" ---- - -[[UploadingDatawithSolrCellusingApacheTika-Capture_Mapping]] -==== Capture & Mapping - -The command below captures `
` tags separately and maps the field to a dynamic field named `foo_t`. - -[source,bash] ----- -bin/post -c techproducts example/exampledocs/sample.html -params "literal.id=doc3&captureAttr=true&defaultField=_text_&capture=div&fmap.div=foo_t" ----- - -[[UploadingDatawithSolrCellusingApacheTika-UsingLiteralstoDefineYourOwnMetadata]] ==== Using Literals to Define Your Own Metadata To add in your own metadata, pass in the literal parameter along with the file: @@ -318,8 +286,7 @@ To add in your own metadata, pass in the literal parameter along with the file: bin/post -c techproducts -params "literal.id=doc4&captureAttr=true&defaultField=text&capture=div&fmap.div=foo_t&literal.blah_s=Bah" example/exampledocs/sample.html ---- -[[UploadingDatawithSolrCellusingApacheTika-XPath]] -==== XPath +==== XPath Expressions The example below passes in an XPath expression to restrict the XHTML returned by Tika: @@ -328,7 +295,6 @@ The example below passes in an XPath expression to restrict the XHTML returned b bin/post -c techproducts -params "literal.id=doc5&captureAttr=true&defaultField=text&capture=div&fmap.div=foo_t&xpath=/xhtml:html/xhtml:body/xhtml:div//node()" example/exampledocs/sample.html ---- -[[UploadingDatawithSolrCellusingApacheTika-ExtractingDatawithoutIndexingIt]] === Extracting Data without Indexing It Solr allows you to extract data without indexing. You might want to do this if you're using Solr solely as an extraction server or if you're interested in testing Solr extraction. @@ -347,7 +313,6 @@ The output includes XML generated by Tika (and further escaped by Solr's XML) us bin/post -c techproducts -params "extractOnly=true&wt=ruby&indent=true" -out yes example/exampledocs/sample.html ---- -[[UploadingDatawithSolrCellusingApacheTika-SendingDocumentstoSolrwithaPOST]] == Sending Documents to Solr with a POST The example below streams the file as the body of the POST, which does not, then, provide information to Solr about the name of the file. @@ -357,7 +322,6 @@ The example below streams the file as the body of the POST, which does not, then curl "http://localhost:8983/solr/techproducts/update/extract?literal.id=doc6&defaultField=text&commit=true" --data-binary @example/exampledocs/sample.html -H 'Content-type:text/html' ---- -[[UploadingDatawithSolrCellusingApacheTika-SendingDocumentstoSolrwithSolrCellandSolrJ]] == Sending Documents to Solr with Solr Cell and SolrJ SolrJ is a Java client that you can use to add documents to the index, update the index, or query the index. You'll find more information on SolrJ in <>. diff --git a/solr/solr-ref-guide/src/using-javascript.adoc b/solr/solr-ref-guide/src/using-javascript.adoc index 25aabf8f745..d2247fb25e2 100644 --- a/solr/solr-ref-guide/src/using-javascript.adoc +++ b/solr/solr-ref-guide/src/using-javascript.adoc @@ -22,7 +22,7 @@ Using Solr from JavaScript clients is so straightforward that it deserves a spec HTTP requests can be sent to Solr using the standard `XMLHttpRequest` mechanism. -Out of the box, Solr can send <>, which are easily interpreted in JavaScript. Just add `wt=json` to the request URL to have responses sent as JSON. +Out of the box, Solr can send <>, which are easily interpreted in JavaScript. Just add `wt=json` to the request URL to have responses sent as JSON. For more information and an excellent example, read the SolJSON page on the Solr Wiki: diff --git a/solr/solr-ref-guide/src/using-jmx-with-solr.adoc b/solr/solr-ref-guide/src/using-jmx-with-solr.adoc index 241b30be155..77fd0ca1b2c 100644 --- a/solr/solr-ref-guide/src/using-jmx-with-solr.adoc +++ b/solr/solr-ref-guide/src/using-jmx-with-solr.adoc @@ -22,7 +22,6 @@ http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html[Ja Solr, like any other good citizen of the Java universe, can be controlled via a JMX interface. You can enable JMX support by adding lines to `solrconfig.xml`. You can use a JMX client, like jconsole, to connect with Solr. Check out the Wiki page http://wiki.apache.org/solr/SolrJmx for more information. You may also find the following overview of JMX to be useful: http://docs.oracle.com/javase/8/docs/technotes/guides/management/agent.html. -[[UsingJMXwithSolr-ConfiguringJMX]] == Configuring JMX JMX configuration is provided in `solrconfig.xml`. Please see the http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html[JMX Technology Home Page] for more details. @@ -36,7 +35,6 @@ Enabling/disabling JMX and securing access to MBeanServers is left up to the use ==== -[[UsingJMXwithSolr-ConfiguringanExistingMBeanServer]] === Configuring an Existing MBeanServer The command: @@ -48,7 +46,6 @@ The command: enables JMX support in Solr if and only if an existing MBeanServer is found. Use this if you want to configure JMX with JVM parameters. Remove this to disable exposing Solr configuration and statistics to JMX. If this is specified, Solr will try to list all available MBeanServers and use the first one to register MBeans. -[[UsingJMXwithSolr-ConfiguringanExistingMBeanServerwithagentId]] === Configuring an Existing MBeanServer with agentId The command: @@ -60,7 +57,6 @@ The command: enables JMX support in Solr if and only if an existing MBeanServer is found matching the given agentId. If multiple servers are found, the first one is used. If none is found, an exception is raised and depending on the configuration, Solr may refuse to start. -[[UsingJMXwithSolr-ConfiguringaNewMBeanServer]] === Configuring a New MBeanServer The command: @@ -72,8 +68,7 @@ The command: creates a new MBeanServer exposed for remote monitoring at the specific service URL. If the JMXConnectorServer can't be started (probably because the serviceUrl is bad), an exception is thrown. -[[UsingJMXwithSolr-Example]] -==== Example +==== MBean Server Example Solr's `sample_techproducts_configs` config set uses the simple `` configuration option. If you start the example with the necessary JVM system properties to launch an internal MBeanServer, Solr will register with it and you can connect using a tool like `jconsole`: @@ -87,7 +82,6 @@ bin/solr -e techproducts -Dcom.sun.management.jmxremote 3. Connect to the "`start.jar`" shown in the list of local processes. 4. Switch to the "MBeans" tab. You should be able to see "`solr/techproducts`" listed there, at which point you can drill down and see details of every solr plugin. -[[UsingJMXwithSolr-ConfiguringaRemoteConnectiontoSolrJMX]] === Configuring a Remote Connection to Solr JMX If you need to attach a JMX-enabled Java profiling tool, such as JConsole or VisualVM, to a remote Solr server, then you need to enable remote JMX access when starting the Solr server. Simply change the `ENABLE_REMOTE_JMX_OPTS` property in the include file to true. You’ll also need to choose a port for the JMX RMI connector to bind to, such as 18983. For example, if your Solr include script sets: @@ -118,7 +112,5 @@ http://docs.oracle.com/javase/8/docs/technotes/guides/management/agent.html [IMPORTANT] ==== - Making JMX connections into machines running behind NATs (e.g. Amazon's EC2 service) is not a simple task. The `java.rmi.server.hostname` system property may help, but running `jconsole` on the server itself and using a remote desktop is often the simplest solution. See http://web.archive.org/web/20130525022506/http://jmsbrdy.com/monitoring-java-applications-running-on-ec2-i. - ==== diff --git a/solr/solr-ref-guide/src/using-python.adoc b/solr/solr-ref-guide/src/using-python.adoc index 1e8045f34b3..84a7b4cbaec 100644 --- a/solr/solr-ref-guide/src/using-python.adoc +++ b/solr/solr-ref-guide/src/using-python.adoc @@ -18,9 +18,8 @@ // specific language governing permissions and limitations // under the License. -Solr includes an output format specifically for <>, but <> is a little more robust. +Solr includes an output format specifically for <>, but <> is a little more robust. -[[UsingPython-SimplePython]] == Simple Python Making a query is a simple matter. First, tell Python you will need to make HTTP connections. @@ -50,7 +49,6 @@ for document in response['response']['docs']: print " Name =", document['name'] ---- -[[UsingPython-PythonwithJSON]] == Python with JSON JSON is a more robust response format, but you will need to add a Python package in order to use it. At a command line, install the simplejson package like this: diff --git a/solr/solr-ref-guide/src/using-solr-from-ruby.adoc b/solr/solr-ref-guide/src/using-solr-from-ruby.adoc index ef5454c5b15..0b7033604b9 100644 --- a/solr/solr-ref-guide/src/using-solr-from-ruby.adoc +++ b/solr/solr-ref-guide/src/using-solr-from-ruby.adoc @@ -18,7 +18,7 @@ // specific language governing permissions and limitations // under the License. -Solr has an optional Ruby response format that extends the <> to allow the response to be safely eval'd by Ruby's interpreter +Solr has an optional Ruby response format that extends the <> to allow the response to be safely eval'd by Ruby's interpreter This Ruby response format differs from JSON in the following ways: diff --git a/solr/solr-ref-guide/src/using-solrj.adoc b/solr/solr-ref-guide/src/using-solrj.adoc index 4788ea2213f..9ac5acc96b9 100644 --- a/solr/solr-ref-guide/src/using-solrj.adoc +++ b/solr/solr-ref-guide/src/using-solrj.adoc @@ -45,7 +45,6 @@ SolrClient solr = new CloudSolrClient.Builder().withSolrUrl("http://localhost:89 Once you have a `SolrClient`, you can use it by calling methods like `query()`, `add()`, and `commit()`. -[[UsingSolrJ-BuildingandRunningSolrJApplications]] == Building and Running SolrJ Applications The SolrJ API is included with Solr, so you do not have to download or install anything else. However, in order to build and run applications that use SolrJ, you have to add some libraries to the classpath. @@ -69,7 +68,6 @@ You can sidestep a lot of the messing around with the JAR files by using Maven i If you are worried about the SolrJ libraries expanding the size of your client application, you can use a code obfuscator like http://proguard.sourceforge.net/[ProGuard] to remove APIs that you are not using. -[[UsingSolrJ-SpecifyingSolrUrl]] == Specifying Solr Base URLs Most `SolrClient` implementations (with the notable exception of `CloudSolrClient`) require users to specify one or more Solr base URLs, which the client then uses to send HTTP requests to Solr. The path users include on the base URL they provide has an effect on the behavior of the created client from that point on. @@ -77,7 +75,6 @@ Most `SolrClient` implementations (with the notable exception of `CloudSolrClien . A URL with a path pointing to a specific core or collection (e.g. `http://hostname:8983/solr/core1`). When a core or collection is specified in the base URL, subsequent requests made with that client are not required to re-specify the affected collection. However, the client is limited to sending requests to that core/collection, and can not send requests to any others. . A URL with a generic path pointing to the root Solr path (e.g. `http://hostname:8983/solr`). When no core or collection is specified in the base URL, requests can be made to any core/collection, but the affected core/collection must be specified on all requests. -[[UsingSolrJ-SettingXMLResponseParser]] == Setting XMLResponseParser SolrJ uses a binary format, rather than XML, as its default response format. If you are trying to mix Solr and SolrJ versions where one is version 1.x and the other is 3.x or later, then you MUST use the XML response parser. The binary format changed in 3.x, and the two javabin versions are entirely incompatible. The following code will make this change: @@ -87,7 +84,6 @@ SolrJ uses a binary format, rather than XML, as its default response format. If solr.setParser(new XMLResponseParser()); ---- -[[UsingSolrJ-PerformingQueries]] == Performing Queries Use `query()` to have Solr search for results. You have to pass a `SolrQuery` object that describes the query, and you will get back a QueryResponse (from the `org.apache.solr.client.solrj.response` package). @@ -132,7 +128,6 @@ The `QueryResponse` is a collection of documents that satisfy the query paramete SolrDocumentList list = response.getResults(); ---- -[[UsingSolrJ-IndexingDocuments]] == Indexing Documents Other operations are just as simple. To index (add) a document, all you need to do is create a `SolrInputDocument` and pass it along to the `SolrClient` 's `add()` method. This example assumes that the SolrClient object called 'solr' is already created based on the examples shown earlier. @@ -150,7 +145,6 @@ UpdateResponse response = solr.add(document); solr.commit(); ---- -[[UsingSolrJ-UploadingContentinXMLorBinaryFormats]] === Uploading Content in XML or Binary Formats SolrJ lets you upload content in binary format instead of the default XML format. Use the following code to upload using binary format, which is the same format SolrJ uses to fetch results. If you are trying to mix Solr and SolrJ versions where one is version 1.x and the other is 3.x or later, then you MUST stick with the XML request writer. The binary format changed in 3.x, and the two javabin versions are entirely incompatible. @@ -160,12 +154,10 @@ SolrJ lets you upload content in binary format instead of the default XML format solr.setRequestWriter(new BinaryRequestWriter()); ---- -[[UsingSolrJ-UsingtheConcurrentUpdateSolrClient]] === Using the ConcurrentUpdateSolrClient When implementing java applications that will be bulk loading a lot of documents at once, {solr-javadocs}/solr-solrj/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClient.html[`ConcurrentUpdateSolrClient`] is an alternative to consider instead of using `HttpSolrClient`. The `ConcurrentUpdateSolrClient` buffers all added documents and writes them into open HTTP connections. This class is thread safe. Although any SolrClient request can be made with this implementation, it is only recommended to use the `ConcurrentUpdateSolrClient` for `/update` requests. -[[UsingSolrJ-EmbeddedSolrServer]] == EmbeddedSolrServer The {solr-javadocs}/solr-core/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.html[`EmbeddedSolrServer`] class provides an implementation of the `SolrClient` client API talking directly to an micro-instance of Solr running directly in your Java application. This embedded approach is not recommended in most cases and fairly limited in the set of features it supports – in particular it can not be used with <> or <>. `EmbeddedSolrServer` exists primarily to help facilitate testing. diff --git a/solr/solr-ref-guide/src/using-zookeeper-to-manage-configuration-files.adoc b/solr/solr-ref-guide/src/using-zookeeper-to-manage-configuration-files.adoc index 3166e1c3d5b..31b49f2ba0a 100644 --- a/solr/solr-ref-guide/src/using-zookeeper-to-manage-configuration-files.adoc +++ b/solr/solr-ref-guide/src/using-zookeeper-to-manage-configuration-files.adoc @@ -26,7 +26,6 @@ These files are uploaded in either of the following cases: * When you create a collection using the `bin/solr` script. * Explicitly upload a configuration set to ZooKeeper. -[[UsingZooKeepertoManageConfigurationFiles-StartupBootstrap]] == Startup Bootstrap When you try SolrCloud for the first time using the `bin/solr -e cloud`, the related configset gets uploaded to ZooKeeper automatically and is linked with the newly created collection. @@ -49,15 +48,9 @@ The create command will upload a copy of the `_default` configuration directory Once a configuration directory has been uploaded to ZooKeeper, you can update them using the <> -[IMPORTANT] -==== - -It's a good idea to keep these files under version control. - -==== +IMPORTANT: It's a good idea to keep these files under version control. -[[UsingZooKeepertoManageConfigurationFiles-UploadingConfigurationFilesusingbin_solrorSolrJ]] == Uploading Configuration Files using bin/solr or SolrJ In production situations, <> can also be uploaded to ZooKeeper independent of collection creation using either Solr's <> or the {solr-javadocs}/solr-solrj/org/apache/solr/client/solrj/impl/CloudSolrClient.html[CloudSolrClient.uploadConfig] java method. @@ -71,21 +64,19 @@ bin/solr zk upconfig -n -d > for instructions. +If you will share the same ZooKeeper instance with other applications you should use a _chroot_ in ZooKeeper. Please see <> for instructions. There are certain configuration files containing cluster wide configuration. Since some of these are crucial for the cluster to function properly, you may need to upload such files to ZooKeeper before starting your Solr cluster for the first time. Examples of such configuration files (not exhaustive) are `solr.xml`, `security.json` and `clusterprops.json`. diff --git a/solr/solr-ref-guide/src/v2-api.adoc b/solr/solr-ref-guide/src/v2-api.adoc index 6906b1c2210..c142d5ffb32 100644 --- a/solr/solr-ref-guide/src/v2-api.adoc +++ b/solr/solr-ref-guide/src/v2-api.adoc @@ -34,7 +34,6 @@ The old API and the v2 API differ in three principle ways: . Endpoint structure: The v2 API endpoint structure has been rationalized and regularized. . Documentation: The v2 APIs are self-documenting: append `/_introspect` to any valid v2 API path and the API specification will be returned in JSON format. -[[v2API-v2APIPathPrefixes]] == v2 API Path Prefixes Following are some v2 API URL paths and path prefixes, along with some of the operations that are supported at these paths and their sub-paths. @@ -57,7 +56,6 @@ Following are some v2 API URL paths and path prefixes, along with some of the op |`/v2/c/.system/blob` |Upload and download blobs and metadata. |=== -[[v2API-Introspect]] == Introspect Append `/_introspect` to any valid v2 API path and the API specification will be returned in JSON format. @@ -72,7 +70,6 @@ Most endpoints support commands provided in a body sent via POST. To limit the i `\http://localhost:8983/v2/c/gettingstarted/_introspect?method=POST&command=modify` -[[v2API-InterpretingtheIntrospectOutput]] === Interpreting the Introspect Output Example : `\http://localhost:8983/v2/c/gettingstarted/get/_introspect` @@ -154,13 +151,11 @@ Example of introspect for a POST API: `\http://localhost:8983/v2/c/gettingstarte "/c/gettingstarted/update":["POST"]}, [... more sub-paths ...] - } ---- The `"commands"` section in the above example has one entry for each command supported at this endpoint. The key is the command name and the value is a json object describing the command structure using JSON schema (see http://json-schema.org/ for a description). -[[v2API-InvocationExamples]] == Invocation Examples For the "gettingstarted" collection, set the replication factor and whether to automatically add replicas (see above for the introspect output for the `"modify"` command used here): diff --git a/solr/solr-ref-guide/src/velocity-response-writer.adoc b/solr/solr-ref-guide/src/velocity-response-writer.adoc index 424a033bb19..0101030ecc9 100644 --- a/solr/solr-ref-guide/src/velocity-response-writer.adoc +++ b/solr/solr-ref-guide/src/velocity-response-writer.adoc @@ -42,7 +42,6 @@ The above example shows the optional initialization and custom tool parameters u == Configuration & Usage -[[VelocityResponseWriter-VelocityResponseWriterinitializationparameters]] === VelocityResponseWriter Initialization Parameters `template.base.dir`:: @@ -66,7 +65,6 @@ External "tools" can be specified as list of string name/value (tool name / clas + A custom registered tool can override the built-in context objects with the same name, except for `$request`, `$response`, `$page`, and `$debug` (these tools are designed to not be overridden). -[[VelocityResponseWriter-VelocityResponseWriterrequestparameters]] === VelocityResponseWriter Request Parameters `v.template`:: @@ -102,7 +100,6 @@ Resource bundles can be added by providing a JAR file visible by the SolrResourc `v.template._template_name_`:: When the "params" resource loader is enabled, templates can be specified as part of the Solr request. -[[VelocityResponseWriter-VelocityResponseWritercontextobjects]] === VelocityResponseWriter Context Objects // TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed diff --git a/solr/solr-ref-guide/src/velocity-search-ui.adoc b/solr/solr-ref-guide/src/velocity-search-ui.adoc index 0cb46975455..cc2fb473009 100644 --- a/solr/solr-ref-guide/src/velocity-search-ui.adoc +++ b/solr/solr-ref-guide/src/velocity-search-ui.adoc @@ -18,11 +18,11 @@ // specific language governing permissions and limitations // under the License. -Solr includes a sample search UI based on the <> (also known as Solritas) that demonstrates several useful features, such as searching, faceting, highlighting, autocomplete, and geospatial searching. +Solr includes a sample search UI based on the <> (also known as Solritas) that demonstrates several useful features, such as searching, faceting, highlighting, autocomplete, and geospatial searching. When using the `sample_techproducts_configs` config set, you can access the Velocity sample Search UI: `\http://localhost:8983/solr/techproducts/browse` .The Velocity Search UI image::images/velocity-search-ui/techproducts_browse.png[image,width=500] -For more information about the Velocity Response Writer, see the <>. +For more information about the Velocity Response Writer, see the <>. diff --git a/solr/solr-ref-guide/src/working-with-currencies-and-exchange-rates.adoc b/solr/solr-ref-guide/src/working-with-currencies-and-exchange-rates.adoc index 5ed4a56a7c9..9208775433f 100644 --- a/solr/solr-ref-guide/src/working-with-currencies-and-exchange-rates.adoc +++ b/solr/solr-ref-guide/src/working-with-currencies-and-exchange-rates.adoc @@ -27,7 +27,6 @@ The `currency` FieldType provides support for monetary values to Solr/Lucene wit * Currency parsing by either currency code or symbol * Symmetric & asymmetric exchange rates (asymmetric exchange rates are useful if there are fees associated with exchanging the currency) -[[WorkingwithCurrenciesandExchangeRates-ConfiguringCurrencies]] == Configuring Currencies .CurrencyField has been Deprecated @@ -40,12 +39,12 @@ The `currency` field type is defined in `schema.xml`. This is the default config [source,xml] ---- - ---- -In this example, we have defined the name and class of the field type, and defined the `defaultCurrency` as "USD", for U.S. Dollars. We have also defined a `currencyConfig` to use a file called "currency.xml". This is a file of exchange rates between our default currency to other currencies. There is an alternate implementation that would allow regular downloading of currency data. See <> below for more. +In this example, we have defined the name and class of the field type, and defined the `defaultCurrency` as "USD", for U.S. Dollars. We have also defined a `currencyConfig` to use a file called "currency.xml". This is a file of exchange rates between our default currency to other currencies. There is an alternate implementation that would allow regular downloading of currency data. See <> below for more. Many of the example schemas that ship with Solr include a <> that uses this type, such as this example: @@ -60,10 +59,9 @@ At indexing time, money fields can be indexed in a native currency. For example, During query processing, range and point queries are both supported. -[[WorkingwithCurrenciesandExchangeRates-Sub-fieldSuffixes]] === Sub-field Suffixes -You must specify parameters `amountLongSuffix` and `codeStrSuffix`, corresponding to dynamic fields to be used for the raw amount and the currency dynamic sub-fields, e.g.: +You must specify parameters `amountLongSuffix` and `codeStrSuffix`, corresponding to dynamic fields to be used for the raw amount and the currency dynamic sub-fields, e.g.: [source,xml] ---- @@ -77,15 +75,13 @@ In the above example, the raw amount field will use the `"*_l_ns"` dynamic field .Atomic Updates won't work if dynamic sub-fields are stored [NOTE] ==== -As noted on <>, stored dynamic sub-fields will cause indexing to fail when you use Atomic Updates. To avoid this problem, specify `stored="false"` on those dynamic fields. +As noted on <>, stored dynamic sub-fields will cause indexing to fail when you use Atomic Updates. To avoid this problem, specify `stored="false"` on those dynamic fields. ==== -[[WorkingwithCurrenciesandExchangeRates-ExchangeRates]] == Exchange Rates You configure exchange rates by specifying a provider. Natively, two provider types are supported: `FileExchangeRateProvider` or `OpenExchangeRatesOrgProvider`. -[[WorkingwithCurrenciesandExchangeRates-FileExchangeRateProvider]] === FileExchangeRateProvider This provider requires you to provide a file of exchange rates. It is the default, meaning that to use this provider you only need to specify the file path and name as a value for `currencyConfig` in the definition for this type. @@ -103,9 +99,9 @@ There is a sample `currency.xml` file included with Solr, found in the same dire - - - + + + @@ -113,7 +109,6 @@ There is a sample `currency.xml` file included with Solr, found in the same dire ---- -[[WorkingwithCurrenciesandExchangeRates-OpenExchangeRatesOrgProvider]] === OpenExchangeRatesOrgProvider You can configure Solr to download exchange rates from http://www.OpenExchangeRates.Org[OpenExchangeRates.Org], with updates rates between USD and 170 currencies hourly. These rates are symmetrical only. @@ -122,10 +117,10 @@ In this case, you need to specify the `providerClass` in the definitions for the [source,xml] ---- - ---- diff --git a/solr/solr-ref-guide/src/working-with-dates.adoc b/solr/solr-ref-guide/src/working-with-dates.adoc index 31d0f1f6da8..5f28f61e23a 100644 --- a/solr/solr-ref-guide/src/working-with-dates.adoc +++ b/solr/solr-ref-guide/src/working-with-dates.adoc @@ -18,7 +18,6 @@ // specific language governing permissions and limitations // under the License. -[[WorkingwithDates-DateFormatting]] == Date Formatting Solr's date fields (`TrieDateField`, `DatePointField` and `DateRangeField`) represent "dates" as a point in time with millisecond precision. The format used is a restricted form of the canonical representation of dateTime in the http://www.w3.org/TR/xmlschema-2/#dateTime[XML Schema specification] – a restricted subset of https://en.wikipedia.org/wiki/ISO_8601[ISO-8601]. For those familiar with Java 8, Solr uses https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html#ISO_INSTANT[DateTimeFormatter.ISO_INSTANT] for formatting, and parsing too with "leniency". @@ -48,7 +47,6 @@ There must be a leading `'-'` for dates prior to year 0000, and Solr will format .Query escaping may be required [WARNING] ==== - As you can see, the date format includes colon characters separating the hours, minutes, and seconds. Because the colon is a special character to Solr's most common query parsers, escaping is sometimes required, depending on exactly what you are trying to do. This is normally an invalid query: `datefield:1972-05-20T17:33:18.772Z` @@ -57,10 +55,8 @@ These are valid queries: + `datefield:1972-05-20T17\:33\:18.772Z` + `datefield:"1972-05-20T17:33:18.772Z"` + `datefield:[1972-05-20T17:33:18.772Z TO *]` - ==== -[[WorkingwithDates-DateRangeFormatting]] === Date Range Formatting Solr's `DateRangeField` supports the same point in time date syntax described above (with _date math_ described below) and more to express date ranges. One class of examples is truncated dates, which represent the entire date span to the precision indicated. The other class uses the range syntax (`[ TO ]`). Here are some examples: @@ -74,12 +70,10 @@ Solr's `DateRangeField` supports the same point in time date syntax described ab Limitations: The range syntax doesn't support embedded date math. If you specify a date instance supported by TrieDateField with date math truncating it, like `NOW/DAY`, you still get the first millisecond of that day, not the entire day's range. Exclusive ranges (using `{` & `}`) work in _queries_ but not for _indexing_ ranges. -[[WorkingwithDates-DateMath]] == Date Math Solr's date field types also supports _date math_ expressions, which makes it easy to create times relative to fixed moments in time, include the current time which can be represented using the special value of "```NOW```". -[[WorkingwithDates-DateMathSyntax]] === Date Math Syntax Date math expressions consist either adding some quantity of time in a specified unit, or rounding the current time by a specified unit. expressions can be chained and are evaluated left to right. @@ -104,10 +98,8 @@ Note that while date math is most commonly used relative to `NOW` it can be appl `1972-05-20T17:33:18.772Z+6MONTHS+3DAYS/DAY` -[[WorkingwithDates-RequestParametersThatAffectDateMath]] === Request Parameters That Affect Date Math -[[WorkingwithDates-NOW]] ==== NOW The `NOW` parameter is used internally by Solr to ensure consistent date math expression parsing across multiple nodes in a distributed request. But it can be specified to instruct Solr to use an arbitrary moment in time (past or future) to override for all situations where the the special value of "```NOW```" would impact date math expressions. @@ -118,7 +110,6 @@ Example: `q=solr&fq=start_date:[* TO NOW]&NOW=1384387200000` -[[WorkingwithDates-TZ]] ==== TZ By default, all date math expressions are evaluated relative to the UTC TimeZone, but the `TZ` parameter can be specified to override this behaviour, by forcing all date based addition and rounding to be relative to the specified http://docs.oracle.com/javase/8/docs/api/java/util/TimeZone.html[time zone]. @@ -161,7 +152,6 @@ http://localhost:8983/solr/my_collection/select?q=*:*&facet.range=my_date_field& ... ---- -[[WorkingwithDates-MoreDateRangeFieldDetails]] == More DateRangeField Details `DateRangeField` is almost a drop-in replacement for places where `TrieDateField` is used. The only difference is that Solr's XML or SolrJ response formats will expose the stored data as a String instead of a Date. The underlying index data for this field will be a bit larger. Queries that align to units of time a second on up should be faster than TrieDateField, especially if it's in UTC. But the main point of DateRangeField as its name suggests is to allow indexing date ranges. To do that, simply supply strings in the format shown above. It also supports specifying 3 different relational predicates between the indexed data, and the query range: `Intersects` (default), `Contains`, `Within`. You can specify the predicate by querying using the `op` local-params parameter like so: diff --git a/solr/solr-ref-guide/src/working-with-enum-fields.adoc b/solr/solr-ref-guide/src/working-with-enum-fields.adoc index 8931543a036..205b73520e5 100644 --- a/solr/solr-ref-guide/src/working-with-enum-fields.adoc +++ b/solr/solr-ref-guide/src/working-with-enum-fields.adoc @@ -20,7 +20,6 @@ The EnumField type allows defining a field whose values are a closed set, and the sort order is pre-determined but is not alphabetic nor numeric. Examples of this are severity lists, or risk definitions. -[[WorkingwithEnumFields-DefininganEnumFieldinschema.xml]] == Defining an EnumField in schema.xml The EnumField type definition is quite simple, as in this example defining field types for "priorityLevel" and "riskLevel" enumerations: @@ -33,11 +32,10 @@ The EnumField type definition is quite simple, as in this example defining field Besides the `name` and the `class`, which are common to all field types, this type also takes two additional parameters: -* `enumsConfig`: the name of a configuration file that contains the `` list of field values and their order that you wish to use with this field type. If a path to the file is not defined specified, the file should be in the `conf` directory for the collection. -* `enumName`: the name of the specific enumeration in the `enumsConfig` file to use for this type. +`enumsConfig`:: the name of a configuration file that contains the `` list of field values and their order that you wish to use with this field type. If a path to the file is not defined specified, the file should be in the `conf` directory for the collection. +`enumName`:: the name of the specific enumeration in the `enumsConfig` file to use for this type. -[[WorkingwithEnumFields-DefiningtheEnumFieldconfigurationfile]] -== Defining the EnumField configuration file +== Defining the EnumField Configuration File The file named with the `enumsConfig` parameter can contain multiple enumeration value lists with different names if there are multiple uses for enumerations in your Solr schema. @@ -68,9 +66,7 @@ In this example, there are two value lists defined. Each list is between `enum` .Changing Values [IMPORTANT] ==== - You cannot change the order, or remove, existing values in an `` without reindexing. You can however add new values to the end. - ==== diff --git a/solr/solr-ref-guide/src/working-with-external-files-and-processes.adoc b/solr/solr-ref-guide/src/working-with-external-files-and-processes.adoc index 3aa0195dc3b..ac42636ca62 100644 --- a/solr/solr-ref-guide/src/working-with-external-files-and-processes.adoc +++ b/solr/solr-ref-guide/src/working-with-external-files-and-processes.adoc @@ -18,7 +18,6 @@ // specific language governing permissions and limitations // under the License. -[[WorkingwithExternalFilesandProcesses-TheExternalFileFieldType]] == The ExternalFileField Type The `ExternalFileField` type makes it possible to specify the values for a field in a file outside the Solr index. For such a field, the file contains mappings from a key field to the field value. Another way to think of this is that, instead of specifying the field in documents as they are indexed, Solr finds values for this field in the external file. @@ -41,7 +40,6 @@ The `keyField` attribute defines the key that will be defined in the external fi The `valType` attribute specifies the actual type of values that will be found in the file. The type specified must be either a float field type, so valid values for this attribute are `pfloat`, `float` or `tfloat`. This attribute can be omitted. -[[WorkingwithExternalFilesandProcesses-FormatoftheExternalFile]] === Format of the External File The file itself is located in Solr's index directory, which by default is `$SOLR_HOME/data`. The name of the file should be `external___fieldname__` or `external___fieldname__.*`. For the example above, then, the file could be named `external_entryRankFile` or `external_entryRankFile.txt`. @@ -62,10 +60,9 @@ doc40=42 The keys listed in this file do not need to be unique. The file does not need to be sorted, but Solr will be able to perform the lookup faster if it is. -[[WorkingwithExternalFilesandProcesses-ReloadinganExternalFile]] === Reloading an External File -It's possible to define an event listener to reload an external file when either a searcher is reloaded or when a new searcher is started. See the section <> for more information, but a sample definition in `solrconfig.xml` might look like this: +It's possible to define an event listener to reload an external file when either a searcher is reloaded or when a new searcher is started. See the section <> for more information, but a sample definition in `solrconfig.xml` might look like this: [source,xml] ---- @@ -73,15 +70,14 @@ It's possible to define an event listener to reload an external file when either ---- -[[WorkingwithExternalFilesandProcesses-ThePreAnalyzedFieldType]] == The PreAnalyzedField Type The `PreAnalyzedField` type provides a way to send to Solr serialized token streams, optionally with independent stored values of a field, and have this information stored and indexed without any additional text processing applied in Solr. This is useful if user wants to submit field content that was already processed by some existing external text processing pipeline (e.g., it has been tokenized, annotated, stemmed, synonyms inserted, etc.), while using all the rich attributes that Lucene's TokenStream provides (per-token attributes). The serialization format is pluggable using implementations of PreAnalyzedParser interface. There are two out-of-the-box implementations: -* <>: as the name suggests, it parses content that uses JSON to represent field's content. This is the default parser to use if the field type is not configured otherwise. -* <>: uses a simple strict plain text format, which in some situations may be easier to create than JSON. +* <>: as the name suggests, it parses content that uses JSON to represent field's content. This is the default parser to use if the field type is not configured otherwise. +* <>: uses a simple strict plain text format, which in some situations may be easier to create than JSON. There is only one configuration parameter, `parserImpl`. The value of this parameter should be a fully qualified class name of a class that implements PreAnalyzedParser interface. The default value of this parameter is `org.apache.solr.schema.JsonPreAnalyzedParser`. @@ -97,7 +93,6 @@ By default, the query-time analyzer for fields of this type will be the same as ---- -[[WorkingwithExternalFilesandProcesses-JsonPreAnalyzedParser]] === JsonPreAnalyzedParser This is the default serialization format used by PreAnalyzedField type. It uses a top-level JSON map with the following keys: @@ -115,8 +110,7 @@ This is the default serialization format used by PreAnalyzedField type. It uses Any other top-level key is silently ignored. -[[WorkingwithExternalFilesandProcesses-Tokenstreamserialization]] -==== Token stream serialization +==== Token Stream Serialization The token stream is expressed as a JSON list of JSON maps. The map for each token consists of the following keys and values: @@ -136,8 +130,7 @@ The token stream is expressed as a JSON list of JSON maps. The map for each toke Any other key is silently ignored. -[[WorkingwithExternalFilesandProcesses-Example]] -==== Example +==== JsonPreAnalyzedParser Example [source,json] ---- @@ -152,13 +145,11 @@ Any other key is silently ignored. } ---- -[[WorkingwithExternalFilesandProcesses-SimplePreAnalyzedParser]] === SimplePreAnalyzedParser The fully qualified class name to use when specifying this format via the `parserImpl` configuration parameter is `org.apache.solr.schema.SimplePreAnalyzedParser`. -[[WorkingwithExternalFilesandProcesses-Syntax]] -==== Syntax +==== SimplePreAnalyzedParser Syntax The serialization format supported by this parser is as follows: @@ -192,8 +183,7 @@ Special characters in "text" values can be escaped using the escape character `\ Please note that Unicode sequences (e.g. `\u0001`) are not supported. -[[WorkingwithExternalFilesandProcesses-Supportedattributenames]] -==== Supported attribute names +==== Supported Attributes The following token attributes are supported, and identified with short symbolic names: @@ -212,8 +202,7 @@ The following token attributes are supported, and identified with short symbolic Token positions are tracked and implicitly added to the token stream - the start and end offsets consider only the term text and whitespace, and exclude the space taken by token attributes. -[[WorkingwithExternalFilesandProcesses-Exampletokenstreams]] -==== Example token streams +==== Example Token Streams // TODO: in cwiki each of these examples was in it's own "panel" ... do we want something like that here? // TODO: these examples match what was in cwiki, but I'm honestly not sure if the formatting there was correct to start? diff --git a/solr/solr-ref-guide/src/zookeeper-access-control.adoc b/solr/solr-ref-guide/src/zookeeper-access-control.adoc index 919ccb33ca0..78944f3fb73 100644 --- a/solr/solr-ref-guide/src/zookeeper-access-control.adoc +++ b/solr/solr-ref-guide/src/zookeeper-access-control.adoc @@ -20,7 +20,6 @@ This section describes using ZooKeeper access control lists (ACLs) with Solr. For information about ZooKeeper ACLs, see the ZooKeeper documentation at http://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#sc_ZooKeeperAccessControl. -[[ZooKeeperAccessControl-AboutZooKeeperACLs]] == About ZooKeeper ACLs SolrCloud uses ZooKeeper for shared information and for coordination. @@ -44,7 +43,6 @@ Protecting ZooKeeper itself could mean many different things. **This section is But this content is also available to "the outside" via the ZooKeeper API. Outside processes can connect to ZooKeeper and create/update/delete/read content; for example, a Solr node in a SolrCloud cluster wants to create/update/delete/read, and a SolrJ client wants to read from the cluster. It is the responsibility of the outside processes that create/update content to setup ACLs on the content. ACLs describe who is allowed to read, update, delete, create, etc. Each piece of information (znode/content) in ZooKeeper has its own set of ACLs, and inheritance or sharing is not possible. The default behavior in Solr is to add one ACL on all the content it creates - one ACL that gives anyone the permission to do anything (in ZooKeeper terms this is called "the open-unsafe ACL"). -[[ZooKeeperAccessControl-HowtoEnableACLs]] == How to Enable ACLs We want to be able to: @@ -55,7 +53,6 @@ We want to be able to: Solr nodes, clients and tools (e.g. ZkCLI) always use a java class called {solr-javadocs}/solr-solrj/org/apache/solr/common/cloud/SolrZkClient.html[`SolrZkClient`] to deal with their ZooKeeper stuff. The implementation of the solution described here is all about changing `SolrZkClient`. If you use `SolrZkClient` in your application, the descriptions below will be true for your application too. -[[ZooKeeperAccessControl-ControllingCredentials]] === Controlling Credentials You control which credentials provider will be used by configuring the `zkCredentialsProvider` property in `solr.xml` 's `` section to the name of a class (on the classpath) implementing the {solr-javadocs}/solr-solrj/org/apache/solr/common/cloud/ZkCredentialsProvider[`ZkCredentialsProvider`] interface. `server/solr/solr.xml` in the Solr distribution defines the `zkCredentialsProvider` such that it will take on the value of the same-named `zkCredentialsProvider` system property if it is defined (e.g. by uncommenting the `SOLR_ZK_CREDS_AND_ACLS` environment variable definition in `solr.in.sh/.cmd` - see below), or if not, default to the `DefaultZkCredentialsProvider` implementation. @@ -69,12 +66,10 @@ You can always make you own implementation, but Solr comes with two implementati ** The schema is "digest". The username and password are defined by system properties `zkDigestUsername` and `zkDigestPassword`. This set of credentials will be added to the list of credentials returned by `getCredentials()` if both username and password are provided. ** If the one set of credentials above is not added to the list, this implementation will fall back to default behavior and use the (empty) credentials list from `DefaultZkCredentialsProvider`. -[[ZooKeeperAccessControl-ControllingACLs]] === Controlling ACLs You control which ACLs will be added by configuring `zkACLProvider` property in `solr.xml` 's `` section to the name of a class (on the classpath) implementing the {solr-javadocs}//solr-solrj/org/apache/solr/common/cloud/ZkACLProvider[`ZkACLProvider`] interface. `server/solr/solr.xml` in the Solr distribution defines the `zkACLProvider` such that it will take on the value of the same-named `zkACLProvider` system property if it is defined (e.g. by uncommenting the `SOLR_ZK_CREDS_AND_ACLS` environment variable definition in `solr.in.sh/.cmd` - see below), or if not, default to the `DefaultZkACLProvider` implementation. -[[ZooKeeperAccessControl-OutoftheBoxImplementations]] ==== Out of the Box ACL Implementations You can always make you own implementation, but Solr comes with: @@ -97,8 +92,6 @@ Notice the overlap in system property names with credentials provider `VMParamsS You can give the readonly credentials to "clients" of your SolrCloud cluster - e.g. to be used by SolrJ clients. They will be able to read whatever is necessary to run a functioning SolrJ client, but they will not be able to modify any content in ZooKeeper. - -[[ZooKeeperAccessControl-bin_solr_solr.cmd_server_scripts_cloud-scripts_zkcli.sh_zkcli.bat]] === ZooKeeper ACLs in Solr Scripts There are two scripts that impact ZooKeeper ACLs: @@ -150,7 +143,6 @@ REM -DzkDigestUsername=admin-user -DzkDigestPassword=CHANGEME-ADMIN-PASSWORD ^ REM -DzkDigestReadonlyUsername=readonly-user -DzkDigestReadonlyPassword=CHANGEME-READONLY-PASSWORD ---- -[[ZooKeeperAccessControl-ChangingACLSchemes]] == Changing ACL Schemes Over the lifetime of operating your Solr cluster, you may decide to move from an unsecured ZooKeeper to a secured instance. Changing the configured `zkACLProvider` in `solr.xml` will ensure that newly created nodes are secure, but will not protect the already existing data. To modify all existing ACLs, you can use the `updateacls` command with Solr's ZkCLI. First uncomment the `SOLR_ZK_CREDS_AND_ACLS` environment variable definition in `server/scripts/cloud-scripts/zkcli.sh` (or `zkcli.bat` on Windows) and fill in the passwords for the admin-user and the readonly-user - see above - then run `server/scripts/cloud-scripts/zkcli.sh -cmd updateacls /zk-path`, or on Windows run `server\scripts\cloud-scripts\zkcli.bat cmd updateacls /zk-path`. diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ResidualsEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ResidualsEvaluator.java new file mode 100644 index 00000000000..9a9c8699124 --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ResidualsEvaluator.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.client.solrj.io.eval; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Locale; +import java.util.List; + +import org.apache.solr.client.solrj.io.Tuple; +import org.apache.solr.client.solrj.io.stream.expr.Explanation; +import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; +import org.apache.solr.client.solrj.io.stream.expr.Expressible; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParameter; +import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; + +public class ResidualsEvaluator extends ComplexEvaluator implements Expressible { + + private static final long serialVersionUID = 1; + + public ResidualsEvaluator(StreamExpression expression, + StreamFactory factory) throws IOException { + super(expression, factory); + + if(3 != subEvaluators.size()){ + throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting three values (regression result and two numeric arrays) but found %d",expression,subEvaluators.size())); + } + } + + public List evaluate(Tuple tuple) throws IOException { + + StreamEvaluator r = subEvaluators.get(0); + StreamEvaluator a = subEvaluators.get(1); + StreamEvaluator b = subEvaluators.get(2); + + RegressionEvaluator.RegressionTuple rt= (RegressionEvaluator.RegressionTuple)r.evaluate(tuple); + List listA = (List)a.evaluate(tuple); + List listB = (List)b.evaluate(tuple); + List residuals = new ArrayList(); + + for(int i=0; i protected String routeKey; protected String instanceDir; protected String dataDir; + protected String ulogDir; protected Properties properties; protected Replica.Type type; @@ -1408,6 +1409,10 @@ public abstract class CollectionAdminRequest return instanceDir; } + public String getUlogDir() { + return ulogDir; + } + public AddReplica setInstanceDir(String instanceDir) { this.instanceDir = instanceDir; return this; @@ -1432,6 +1437,11 @@ public abstract class CollectionAdminRequest return this; } + public AddReplica setUlogDir(String ulogDir) { + this.ulogDir = ulogDir; + return this; + } + @Override public SolrParams getParams() { ModifiableSolrParams params = new ModifiableSolrParams(super.getParams()); @@ -1452,6 +1462,9 @@ public abstract class CollectionAdminRequest if (dataDir != null) { params.add("dataDir", dataDir); } + if (ulogDir != null) { + params.add("ulogDir", ulogDir); + } if (coreName != null) { params.add("name", coreName); } diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/CollectionAdminResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/CollectionAdminResponse.java index 6821075b366..c50ef37bfce 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/CollectionAdminResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/CollectionAdminResponse.java @@ -35,6 +35,11 @@ public class CollectionAdminResponse extends SolrResponseBase return getResponse().get( "success" ) != null; } + public String getWarning() + { + return (String) getResponse().get( "warning" ); + } + // this messages are typically from individual nodes, since // all the failures at the router are propagated as exceptions @SuppressWarnings("unchecked") diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/Replica.java b/solr/solrj/src/java/org/apache/solr/common/cloud/Replica.java index 863372e3bf4..42acd36de94 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/Replica.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/Replica.java @@ -22,9 +22,6 @@ import java.util.Set; import org.noggit.JSONUtil; -import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP; -import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP; - public class Replica extends ZkNodeProps { /** @@ -132,14 +129,14 @@ public class Replica extends ZkNodeProps { } public String getCoreUrl() { - return ZkCoreNodeProps.getCoreUrl(getStr(BASE_URL_PROP), getStr(CORE_NAME_PROP)); + return ZkCoreNodeProps.getCoreUrl(getStr(ZkStateReader.BASE_URL_PROP), getStr(ZkStateReader.CORE_NAME_PROP)); } public String getBaseUrl(){ return getStr(ZkStateReader.BASE_URL_PROP); } public String getCoreName() { - return getStr(CORE_NAME_PROP); + return getStr(ZkStateReader.CORE_NAME_PROP); } /** The name of the node this replica resides on */ diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java index 8524f4df67f..ff105f6a17b 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java @@ -6021,6 +6021,30 @@ public class StreamExpressionTest extends SolrCloudTestCase { assertTrue(out.get(8).intValue() == 9); } + + @Test + public void testResiduals() throws Exception { + String cexpr = "let(a=array(1,2,3,4,5,6), b=array(2,4,6,8,10,12), c=regress(a,b), tuple(res=residuals(c,a,a)))"; + ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); + paramsLoc.set("expr", cexpr); + paramsLoc.set("qt", "/stream"); + String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; + TupleStream solrStream = new SolrStream(url, paramsLoc); + StreamContext context = new StreamContext(); + solrStream.setStreamContext(context); + List tuples = getTuples(solrStream); + assertTrue(tuples.size() == 1); + List out = (List)tuples.get(0).get("res"); + assertTrue(out.size() == 6); + assertTrue(out.get(0).intValue() == -1); + assertTrue(out.get(1).intValue() == -2); + assertTrue(out.get(2).intValue() == -3); + assertTrue(out.get(3).intValue() == -4); + assertTrue(out.get(4).intValue() == -5); + assertTrue(out.get(5).intValue() == -6); + } + + @Test public void testAnova() throws Exception { String cexpr = "anova(array(1,2,3,5,4,6), array(5,2,3,5,4,6), array(1,2,7,5,4,6))"; diff --git a/solr/webapp/web/js/angular/controllers/cloud.js b/solr/webapp/web/js/angular/controllers/cloud.js index 3636d8059b2..80e7d03da4a 100644 --- a/solr/webapp/web/js/angular/controllers/cloud.js +++ b/solr/webapp/web/js/angular/controllers/cloud.js @@ -249,6 +249,7 @@ var graphSubController = function ($scope, Zookeeper, isRadial) { }; $scope.initGraph(); + $scope.pos = 0; }; solrAdminApp.directive('graph', function(Constants) {