From 73d74107dcb2d836c541654e4bf99dc2e306cf75 Mon Sep 17 00:00:00 2001 From: Chris Hostetter Date: Thu, 5 Apr 2018 17:16:00 -0700 Subject: [PATCH 1/7] Use asciidoctor's 'attribute-missing: warn' option and fix some places that were generating warnings because of unescaped/uninteded attribute syntax --- solr/solr-ref-guide/build.xml | 1 + solr/solr-ref-guide/src/_config.yml.template | 1 + solr/solr-ref-guide/src/learning-to-rank.adoc | 2 +- solr/solr-ref-guide/src/meta-docs/pdf.adoc | 2 +- solr/solr-ref-guide/src/rule-based-replica-placement.adoc | 2 +- ...structured-data-store-data-with-the-data-import-handler.adoc | 2 +- 6 files changed, 6 insertions(+), 4 deletions(-) diff --git a/solr/solr-ref-guide/build.xml b/solr/solr-ref-guide/build.xml index 92c236ac470..08fbc40de3e 100644 --- a/solr/solr-ref-guide/build.xml +++ b/solr/solr-ref-guide/build.xml @@ -213,6 +213,7 @@ imagesDir="${build.content.dir}" doctype="book" safemode="unsafe"> + diff --git a/solr/solr-ref-guide/src/_config.yml.template b/solr/solr-ref-guide/src/_config.yml.template index 1c55dc47a18..f50ae1ec6ed 100755 --- a/solr/solr-ref-guide/src/_config.yml.template +++ b/solr/solr-ref-guide/src/_config.yml.template @@ -91,6 +91,7 @@ asciidoctor: safe: 0 attributes: <<: *solr-attributes-ref + attribute-missing: "warn" icons: "font" source-highlighter: "pygments" pygments-css: "style" diff --git a/solr/solr-ref-guide/src/learning-to-rank.adoc b/solr/solr-ref-guide/src/learning-to-rank.adoc index f98f049e700..4e79a7ae267 100644 --- a/solr/solr-ref-guide/src/learning-to-rank.adoc +++ b/solr/solr-ref-guide/src/learning-to-rank.adoc @@ -61,7 +61,7 @@ The LTR contrib module includes several feature classes as well as support for c |solr query |{solr-javadocs}/solr-ltr/org/apache/solr/ltr/feature/SolrFeature.html[SolrFeature] |`{"q":"{!func}` `recip(ms(NOW,last_modified)` `,3.16e-11,1,1)"}` |supported |solr filter query |{solr-javadocs}/solr-ltr/org/apache/solr/ltr/feature/SolrFeature.html[SolrFeature] |`{"fq":["{!terms f=category}book"]}` |supported |solr query + filter query |{solr-javadocs}/solr-ltr/org/apache/solr/ltr/feature/SolrFeature.html[SolrFeature] |`{"q":"{!func}` `recip(ms(NOW,last_modified),` `3.16e-11,1,1)",` `"fq":["{!terms f=category}book"]}` |supported -|value |{solr-javadocs}/solr-ltr/org/apache/solr/ltr/feature/ValueFeature.html[ValueFeature] |`{"value":"${userFromMobile}","required":true}` |supported +|value |{solr-javadocs}/solr-ltr/org/apache/solr/ltr/feature/ValueFeature.html[ValueFeature] |`{"value":"$\{userFromMobile}","required":true}` |supported |(custom) |(custom class extending {solr-javadocs}/solr-ltr/org/apache/solr/ltr/feature/Feature.html[Feature]) | | |=== diff --git a/solr/solr-ref-guide/src/meta-docs/pdf.adoc b/solr/solr-ref-guide/src/meta-docs/pdf.adoc index 9c5cc7fbd8c..b7d4c626888 100644 --- a/solr/solr-ref-guide/src/meta-docs/pdf.adoc +++ b/solr/solr-ref-guide/src/meta-docs/pdf.adoc @@ -141,5 +141,5 @@ These attributes use variables that are inserted by Ant during the PDF creation ``:: The version of Solr covered by this guide. ``:: Sets the path for Solr javadoc links to include the right path for the current release version. ``:: Sets the path for Lucene javadoc links to the right path for the current release version. -``:: Sets the date of the build to add the date to the footer of each page of the PDF. +``:: Sets the date of the build to add the date to the footer of each page of the PDF. ``:: Sets the year of the build to add the date to the copyright notice. diff --git a/solr/solr-ref-guide/src/rule-based-replica-placement.adoc b/solr/solr-ref-guide/src/rule-based-replica-placement.adoc index 7c0953b24ae..66b1d459e5a 100644 --- a/solr/solr-ref-guide/src/rule-based-replica-placement.adoc +++ b/solr/solr-ref-guide/src/rule-based-replica-placement.adoc @@ -84,7 +84,7 @@ Tag values come from a plugin called Snitch. If there is a tag named ‘rack’ * *node*: node name * *role*: The role of the node. The only supported role is 'overseer' * *ip_1, ip_2, ip_3, ip_4*: These are ip fragments for each node. For example, in a host with ip `192.168.1.2`, `ip_1 = 2`, `ip_2 =1`, `ip_3 = 168` and` ip_4 = 192` -* *sysprop.{PROPERTY_NAME}*: These are values available from system properties. `sysprop.key` means a value that is passed to the node as `-Dkey=keyValue` during the node startup. It is possible to use rules like `sysprop.key:expectedVal,shard:*` +* *sysprop.\{PROPERTY_NAME}*: These are values available from system properties. `sysprop.key` means a value that is passed to the node as `-Dkey=keyValue` during the node startup. It is possible to use rules like `sysprop.key:expectedVal,shard:*` === How Snitches are Configured diff --git a/solr/solr-ref-guide/src/uploading-structured-data-store-data-with-the-data-import-handler.adoc b/solr/solr-ref-guide/src/uploading-structured-data-store-data-with-the-data-import-handler.adoc index fdcfe5ab7e1..7cf50ee2fb7 100644 --- a/solr/solr-ref-guide/src/uploading-structured-data-store-data-with-the-data-import-handler.adoc +++ b/solr/solr-ref-guide/src/uploading-structured-data-store-data-with-the-data-import-handler.adoc @@ -306,7 +306,7 @@ For MySQL driver, which doesn't honor fetchSize and pulls whole resultSet, which + In this case, set `batchSize=-1` that pass setFetchSize(Integer.MIN_VALUE), and switch result set to pull row by row -All of them substitute properties via `${placeholders}`. +All of them substitute properties via `$\{placeholders}`. === URLDataSource From b2d756c9f4d8e272f822682eba48d64055368c90 Mon Sep 17 00:00:00 2001 From: Cassandra Targett Date: Thu, 5 Apr 2018 13:42:44 -0500 Subject: [PATCH 2/7] Ref Guide: fix color definition so monospace links display with color --- solr/solr-ref-guide/src/css/customstyles.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/src/css/customstyles.css b/solr/solr-ref-guide/src/css/customstyles.css index 8ead55c7b9b..9a166c17a78 100755 --- a/solr/solr-ref-guide/src/css/customstyles.css +++ b/solr/solr-ref-guide/src/css/customstyles.css @@ -762,7 +762,7 @@ span.label.label-primary { .col-lg-12 .nav li a {background-color: white} a code { - color: ##2156a5; + color: #2156a5; } table th code { From abaf378d0e9e2e4af705d964edc2aaf74103cb95 Mon Sep 17 00:00:00 2001 From: Cassandra Targett Date: Fri, 6 Apr 2018 10:59:42 -0500 Subject: [PATCH 3/7] Ref Guide: add language to source blocks; split optimistic concurrency example & add explanations --- .../src/updating-parts-of-documents.adoc | 45 ++++++++++++++++--- .../src/upgrading-a-solr-cluster.adoc | 2 +- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/solr/solr-ref-guide/src/updating-parts-of-documents.adoc b/solr/solr-ref-guide/src/updating-parts-of-documents.adoc index 5e25d51f775..949b60b8bb2 100644 --- a/solr/solr-ref-guide/src/updating-parts-of-documents.adoc +++ b/solr/solr-ref-guide/src/updating-parts-of-documents.adoc @@ -58,7 +58,7 @@ The core functionality of atomically updating a document requires that all field If `` destinations are configured as stored, then Solr will attempt to index both the current value of the field as well as an additional copy from any source fields. If such fields contain some information that comes from the indexing program and some information that comes from copyField, then the information which originally came from the indexing program will be lost when an atomic update is made. -There are other kinds of derived fields that must also be set so they aren't stored. Some spatial field types use derived fields. Examples of this are solr.BBoxField and solr.LatLonType. CurrencyFieldType also uses derived fields. These types create additional fields which are normally specified by a dynamic field definition. That dynamic field definition must be not stored, or indexing will fail. +There are other kinds of derived fields that must also be set so they aren't stored. Some spatial field types, such as BBoxField and LatLonType, use derived fields. CurrencyFieldType also uses derived fields. These types create additional fields which are normally specified by a dynamic field definition. That dynamic field definition must be not stored, or indexing will fail. === Example Updating Part of a Document @@ -188,30 +188,61 @@ When the client resubmits a changed document to Solr, the `\_version_` can be in If the document being updated does not include the `\_version_` field, and atomic updates are not being used, the document will be treated by normal Solr rules, which is usually to discard the previous version. -When using Optimistic Concurrency, clients can include an optional `versions=true` request parameter to indicate that the _new_ versions of the documents being added should be included in the response. This allows clients to immediately know what the `\_version_` is of every documented added without needing to make a redundant <>. +When using Optimistic Concurrency, clients can include an optional `versions=true` request parameter to indicate that the _new_ versions of the documents being added should be included in the response. This allows clients to immediately know what the `\_version_` is of every document added without needing to make a redundant <>. -For example: +Following are some examples using `versions=true` in queries: -[source] +[source,bash] ---- $ curl -X POST -H 'Content-Type: application/json' 'http://localhost:8983/solr/techproducts/update?versions=true' --data-binary ' [ { "id" : "aaa" }, { "id" : "bbb" } ]' +---- +[source,json] +---- {"responseHeader":{"status":0,"QTime":6}, "adds":["aaa",1498562471222312960, "bbb",1498562471225458688]} +---- + +In this example, we have added 2 documents "aaa" and "bbb". Because we added `versions=true` to the request, the response shows the document version for each document. + +[source,bash] +---- $ curl -X POST -H 'Content-Type: application/json' 'http://localhost:8983/solr/techproducts/update?_version_=999999&versions=true' --data-binary ' [{ "id" : "aaa", "foo_s" : "update attempt with wrong existing version" }]' +---- +[source,json] +---- {"responseHeader":{"status":409,"QTime":3}, "error":{"msg":"version conflict for aaa expected=999999 actual=1498562471222312960", "code":409}} +---- + + +In this example, we've attempted to update document "aaa" but specified the wrong version in the request: `_version_=999999` doesn't match the document version we just got when we added the document. We get an error in response. + +[source,bash] +---- $ curl -X POST -H 'Content-Type: application/json' 'http://localhost:8983/solr/techproducts/update?_version_=1498562471222312960&versions=true&commit=true' --data-binary ' [{ "id" : "aaa", "foo_s" : "update attempt with correct existing version" }]' +---- +[source,json] +---- {"responseHeader":{"status":0,"QTime":5}, "adds":["aaa",1498562624496861184]} +---- + +Now we've sent an update with a value for `\_version_` that matches the value in the index, and it succeeds. Because we included `versions=true` to the update request, the response includes a different value for the `\_version_` field. + +[source,bash] +---- $ curl 'http://localhost:8983/solr/techproducts/query?q=*:*&fl=id,_version_' +---- +[source,json] +---- { "responseHeader":{ "status":0, @@ -229,11 +260,13 @@ $ curl 'http://localhost:8983/solr/techproducts/query?q=*:*&fl=id,_version_' }} ---- -For more information, please also see https://www.youtube.com/watch?v=WYVM6Wz-XTw[Yonik Seeley's presentation on NoSQL features in Solr 4] from Apache Lucene EuroCon 2012. +Finally, we can issue a query that requests the `\_version_` field be included in the response, and we can see that for the two documents in our example index. + +For more information, please also see Yonik Seeley's presentation on https://www.youtube.com/watch?v=WYVM6Wz-XTw[NoSQL features in Solr 4] from Apache Lucene EuroCon 2012. == Document Centric Versioning Constraints -Optimistic Concurrency is extremely powerful, and works very efficiently because it uses an internally assigned, globally unique values for the `\_version_` field. However, In some situations users may want to configure their own document specific version field, where the version values are assigned on a per-document basis by an external system, and have Solr reject updates that attempt to replace a document with an "older" version. In situations like this the {solr-javadocs}/solr-core/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessorFactory.html[`DocBasedVersionConstraintsProcessorFactory`] can be useful. +Optimistic Concurrency is extremely powerful, and works very efficiently because it uses an internally assigned, globally unique values for the `\_version_` field. However, in some situations users may want to configure their own document specific version field, where the version values are assigned on a per-document basis by an external system, and have Solr reject updates that attempt to replace a document with an "older" version. In situations like this the {solr-javadocs}/solr-core/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessorFactory.html[`DocBasedVersionConstraintsProcessorFactory`] can be useful. The basic usage of `DocBasedVersionConstraintsProcessorFactory` is to configure it in `solrconfig.xml` as part of the <> and specify the name of your custom `versionField` in your schema that should be checked when validating updates: diff --git a/solr/solr-ref-guide/src/upgrading-a-solr-cluster.adoc b/solr/solr-ref-guide/src/upgrading-a-solr-cluster.adoc index 01855f1ee7e..6abe138e4bf 100644 --- a/solr/solr-ref-guide/src/upgrading-a-solr-cluster.adoc +++ b/solr/solr-ref-guide/src/upgrading-a-solr-cluster.adoc @@ -65,7 +65,7 @@ If you have a `/var/solr/solr.in.sh` file for your existing Solr install, runnin Open `/etc/default/solr.in.sh` with a text editor and verify that the following variables are set correctly, or add them bottom of the include file as needed: -[source] +[source,properties] ZK_HOST= SOLR_HOST= SOLR_PORT= From 5c37b07a3d53e64c2f0cebd33eb7024d693d62f5 Mon Sep 17 00:00:00 2001 From: Steve Rowe Date: Fri, 6 Apr 2018 14:50:21 -0400 Subject: [PATCH 4/7] SOLR-12199: TestReplicationHandler.doTestRepeater(): TEST_PORT interpolation failure: Server refused connection at: http://127.0.0.1:TEST_PORT/solr --- solr/CHANGES.txt | 3 +++ .../org/apache/solr/handler/TestReplicationHandler.java | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 09c330bda33..c7270da5c64 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -111,6 +111,9 @@ Bug Fixes * SOLR-11929: UpdateLog metrics are not initialized on core reload. (ab, Steve Rowe) +* SOLR-12199: TestReplicationHandler.doTestRepeater(): TEST_PORT interpolation failure: + Server refused connection at: http://127.0.0.1:TEST_PORT/solr (Mikhail Khludnev, Dawid Weiss, Steve Rowe) + Optimizations ---------------------- diff --git a/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java b/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java index e8caf99a528..e4b7fa32a72 100644 --- a/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java +++ b/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java @@ -294,6 +294,7 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { public void doTestDetails() throws Exception { slaveJetty.stop(); + slave.setTestPort(masterJetty.getLocalPort()); slave.copyConfigFile(CONF_DIR + "solrconfig-slave.xml", "solrconfig.xml"); slaveJetty = createJetty(slave); @@ -706,6 +707,7 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { public void doTestIndexFetchWithMasterUrl() throws Exception { //change solrconfig on slave //this has no entry for pollinginterval + slave.setTestPort(masterJetty.getLocalPort()); slave.copyConfigFile(CONF_DIR + "solrconfig-slave1.xml", "solrconfig.xml"); slaveJetty.stop(); slaveJetty = createJetty(slave); @@ -840,7 +842,8 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { String slaveSchema = SLAVE_SCHEMA_1; try { - + + slave.setTestPort(masterJetty.getLocalPort()); slave.copyConfigFile(CONF_DIR +"solrconfig-slave1.xml", "solrconfig.xml"); slave.copyConfigFile(CONF_DIR +slaveSchema, "schema.xml"); slaveJetty.stop(); @@ -986,6 +989,7 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { @Test public void doTestRepeater() throws Exception { // no polling + slave.setTestPort(masterJetty.getLocalPort()); slave.copyConfigFile(CONF_DIR + "solrconfig-slave1.xml", "solrconfig.xml"); slaveJetty.stop(); slaveJetty = createJetty(slave); @@ -993,7 +997,7 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { slaveClient = createNewSolrClient(slaveJetty.getLocalPort()); try { - repeater = new SolrInstance(createTempDir("solr-instance").toFile(), "repeater", null); + repeater = new SolrInstance(createTempDir("solr-instance").toFile(), "repeater", masterJetty.getLocalPort()); repeater.setUp(); repeater.copyConfigFile(CONF_DIR + "solrconfig-repeater.xml", "solrconfig.xml"); From 0f53adbee49015aa01e8f66945f82e88a9172c7c Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Fri, 6 Apr 2018 15:20:22 -0400 Subject: [PATCH 5/7] LUCENE-8238: improve javadocs for WordDelimiterFilter and WordDelimiterGraphFilter --- lucene/CHANGES.txt | 5 +++++ .../miscellaneous/WordDelimiterFilter.java | 18 ++++++++++++------ .../WordDelimiterGraphFilter.java | 17 +++++++++++------ 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 84e242d5e45..f90f9e3f8fd 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -153,6 +153,11 @@ Build * LUCENE-8230: Upgrade forbiddenapis to version 2.5. (Uwe Schindler) +Documentation + +* LUCENE-8238: Improve WordDelimiterFilter and WordDelimiterGraphFilter javadocs +xo (Mike Sokolov via Mike McCandless) + ======================= Lucene 7.3.0 ======================= API Changes diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java index aef697ce4ff..313386bb523 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java @@ -55,11 +55,14 @@ import org.apache.lucene.util.InPlaceMergeSorter; * * * - * The combinations parameter affects how subwords are combined: + * The GENERATE... options affect how incoming tokens are broken into parts, and the + * various CATENATE_... parameters affect how those parts are combined. + * *
    - *
  • combinations="0" causes no subword combinations: "PowerShot" - * → 0:"Power", 1:"Shot" (0 and 1 are the token positions)
  • - *
  • combinations="1" means that in addition to the subwords, maximum runs of + *
  • If no CATENATE option is set, then no subword combinations are generated: + * "PowerShot"0:"Power", 1:"Shot" (0 and 1 are the token + * positions)
  • + *
  • CATENATE_WORDS means that in addition to the subwords, maximum runs of * non-numeric subwords are catenated and produced at the same position of the * last subword in the run: *
      @@ -72,12 +75,15 @@ import org.apache.lucene.util.InPlaceMergeSorter; * *
    *
  • + *
  • CATENATE_NUMBERS works like CATENATE_WORDS, but for adjacent digit sequences.
  • + *
  • CATENATE_ALL smushes together all the token parts without distinguishing numbers and words.
  • *
+ * * One use for {@link WordDelimiterFilter} is to help match words with different * subword delimiters. For example, if the source text contained "wi-fi" one may * want "wifi" "WiFi" "wi-fi" "wi+fi" queries to all match. One way of doing so - * is to specify combinations="1" in the analyzer used for indexing, and - * combinations="0" (the default) in the analyzer used for querying. Given that + * is to specify CATENATE options in the analyzer used for indexing, and + * not in the analyzer used for querying. Given that * the current {@link StandardTokenizer} immediately removes many intra-word * delimiters, it is recommended that this filter be used after a tokenizer that * does not do this (such as {@link WhitespaceTokenizer}). diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java index a6ade199545..7949fa2b182 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java @@ -62,11 +62,14 @@ import org.apache.lucene.util.RamUsageEstimator; * * * - * The combinations parameter affects how subwords are combined: + * The GENERATE... options affect how incoming tokens are broken into parts, and the + * various CATENATE_... parameters affect how those parts are combined. + * *
    - *
  • combinations="0" causes no subword combinations: "PowerShot" - * → 0:"Power", 1:"Shot" (0 and 1 are the token positions)
  • - *
  • combinations="1" means that in addition to the subwords, maximum runs of + *
  • If no CATENATE option is set, then no subword combinations are generated: + * "PowerShot"0:"Power", 1:"Shot" (0 and 1 are the token + * positions)
  • + *
  • CATENATE_WORDS means that in addition to the subwords, maximum runs of * non-numeric subwords are catenated and produced at the same position of the * last subword in the run: *
      @@ -79,12 +82,14 @@ import org.apache.lucene.util.RamUsageEstimator; * *
    *
  • + *
  • CATENATE_NUMBERS works like CATENATE_WORDS, but for adjacent digit sequences.
  • + *
  • CATENATE_ALL smushes together all the token parts without distinguishing numbers and words.
  • *
* One use for {@link WordDelimiterGraphFilter} is to help match words with different * subword delimiters. For example, if the source text contained "wi-fi" one may * want "wifi" "WiFi" "wi-fi" "wi+fi" queries to all match. One way of doing so - * is to specify combinations="1" in the analyzer used for indexing, and - * combinations="0" (the default) in the analyzer used for querying. Given that + * is to specify CATENATE options in the analyzer used for indexing, and not + * in the analyzer used for querying. Given that * the current {@link StandardTokenizer} immediately removes many intra-word * delimiters, it is recommended that this filter be used after a tokenizer that * does not do this (such as {@link WhitespaceTokenizer}). From 005da875211bc271257c1fb008a8355a3c1e9f3c Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Sat, 7 Apr 2018 18:29:14 +0100 Subject: [PATCH 6/7] LUCENE-8226: Don't generate unnecessarily massive indexes for index vs query sorting test --- .../src/test/org/apache/lucene/index/TestIndexSorting.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java index 6b43c162ec7..3679d20bf6a 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java @@ -2222,12 +2222,7 @@ public class TestIndexSorting extends LuceneTestCase { // pits index time sorting against query time sorting public void testRandom3() throws Exception { - int numDocs; - if (TEST_NIGHTLY) { - numDocs = atLeast(100000); - } else { - numDocs = atLeast(1000); - } + int numDocs = atLeast(1000); List docs = new ArrayList<>(); Sort sort = randomSort(); From 2ae488aae2a0601148dcd1b6aa794489a0572349 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Sat, 7 Apr 2018 18:34:22 +0100 Subject: [PATCH 7/7] SOLR-12147: Don't use MemoryPostingsFormat for TestDocTermOrds.testTriggerUnInvertLimit This can lead to OOM on nightly runs, as it needs to create a very large index, and the CI machines don't have huge amounts of RAM. --- .../org/apache/solr/uninverting/TestDocTermOrds.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestDocTermOrds.java b/solr/core/src/test/org/apache/solr/uninverting/TestDocTermOrds.java index c2d9e174164..21f44ea1534 100644 --- a/solr/core/src/test/org/apache/solr/uninverting/TestDocTermOrds.java +++ b/solr/core/src/test/org/apache/solr/uninverting/TestDocTermOrds.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.List; +import java.util.Objects; import java.util.Set; import org.apache.lucene.analysis.Analyzer; @@ -42,17 +43,17 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum.SeekStatus; -import org.apache.solr.legacy.LegacyIntField; -import org.apache.solr.legacy.LegacyLongField; -import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.TermsEnum.SeekStatus; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.TestUtil; import org.apache.solr.index.SlowCompositeReaderWrapper; +import org.apache.solr.legacy.LegacyIntField; +import org.apache.solr.legacy.LegacyLongField; +import org.apache.solr.legacy.LegacyNumericUtils; // TODO: // - test w/ del docs @@ -145,6 +146,9 @@ public class TestDocTermOrds extends LuceneTestCase { public void testTriggerUnInvertLimit() throws IOException { final boolean SHOULD_TRIGGER = false; // Set this to true to use the test with the old implementation + assumeFalse("Don't run this massive test with MemoryPostingsFormat, as it can OOM", + Objects.equals(Codec.getDefault().postingsFormat().getName(), "Memory")); + // Ensure enough terms inside of a single UnInvert-pass-structure to trigger the limit final int REF_LIMIT = (int) Math.pow(2, 24); // Maximum number of references within a single pass-structure final int DOCS = (1<<16)-1; // The number of documents within a single pass (simplified)