From 04108d993574537d9623d5d1bf2658cafad12ef8 Mon Sep 17 00:00:00 2001 From: Steve Rowe Date: Tue, 20 Dec 2016 12:05:33 -0500 Subject: [PATCH 01/83] SOLR-9847: Stop blocking further schema updates while waiting for a pending update to propagate to other replicas. This reduces the likelihood of a (time-limited) distributed deadlock during concurrent schema updates. --- solr/CHANGES.txt | 4 + .../org/apache/solr/schema/SchemaManager.java | 99 ++++++++++--------- 2 files changed, 59 insertions(+), 44 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 52112cd889b..da45f98968f 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -271,6 +271,10 @@ Bug Fixes * SOLR-1953: It may be possible for temporary files to accumulate until the Solr process is shut down. (Karl Wright, Mark Miller) +* SOLR-9847: Stop blocking further schema updates while waiting for a pending update to propagate to other replicas. + This reduces the likelihood of a (time-limited) distributed deadlock during concurrent schema updates. + (Mark Miller, Steve Rowe) + Other Changes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/schema/SchemaManager.java b/solr/core/src/java/org/apache/solr/schema/SchemaManager.java index 33406318158..8c3b5f0b87b 100644 --- a/solr/core/src/java/org/apache/solr/schema/SchemaManager.java +++ b/solr/core/src/java/org/apache/solr/schema/SchemaManager.java @@ -88,9 +88,7 @@ public class SchemaManager { IndexSchema schema = req.getCore().getLatestSchema(); if (schema instanceof ManagedIndexSchema && schema.isMutable()) { - synchronized (schema.getSchemaUpdateLock()) { - return doOperations(ops); - } + return doOperations(ops); } else { return singletonList(singletonMap(CommandOperation.ERR_MSGS, "schema is not editable")); } @@ -107,52 +105,65 @@ public class SchemaManager { TimeOut timeOut = new TimeOut(timeout, TimeUnit.SECONDS); SolrCore core = req.getCore(); String errorMsg = "Unable to persist managed schema. "; - while (!timeOut.hasTimedOut()) { - managedIndexSchema = getFreshManagedSchema(req.getCore()); - for (CommandOperation op : operations) { - OpType opType = OpType.get(op.name); - if (opType != null) { - opType.perform(op, this); - } else { - op.addError("No such operation : " + op.name); - } - } - List errs = CommandOperation.captureErrors(operations); - if (!errs.isEmpty()) return errs; - SolrResourceLoader loader = req.getCore().getResourceLoader(); - if (loader instanceof ZkSolrResourceLoader) { - ZkSolrResourceLoader zkLoader = (ZkSolrResourceLoader) loader; - StringWriter sw = new StringWriter(); - try { - managedIndexSchema.persist(sw); - } catch (IOException e) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "unable to serialize schema"); - //unlikely - } + List errors = Collections.emptyList(); + int latestVersion = -1; - try { - int latestVersion = ZkController.persistConfigResourceToZooKeeper(zkLoader, managedIndexSchema.getSchemaZkVersion(), - managedIndexSchema.getResourceName(), sw.toString().getBytes(StandardCharsets.UTF_8), true); - req.getCore().getCoreDescriptor().getCoreContainer().reload(req.getCore().getName()); - waitForOtherReplicasToUpdate(timeOut, latestVersion); - return Collections.emptyList(); - } catch (ZkController.ResourceModifiedInZkException e) { - log.info("Schema was modified by another node. Retrying.."); + synchronized (req.getSchema().getSchemaUpdateLock()) { + while (!timeOut.hasTimedOut()) { + managedIndexSchema = getFreshManagedSchema(req.getCore()); + for (CommandOperation op : operations) { + OpType opType = OpType.get(op.name); + if (opType != null) { + opType.perform(op, this); + } else { + op.addError("No such operation : " + op.name); + } } - } else { - try { - //only for non cloud stuff - managedIndexSchema.persistManagedSchema(false); - core.setLatestSchema(managedIndexSchema); - return Collections.emptyList(); - } catch (SolrException e) { - log.warn(errorMsg); - return singletonList(errorMsg + e.getMessage()); + errors = CommandOperation.captureErrors(operations); + if (!errors.isEmpty()) break; + SolrResourceLoader loader = req.getCore().getResourceLoader(); + if (loader instanceof ZkSolrResourceLoader) { + ZkSolrResourceLoader zkLoader = (ZkSolrResourceLoader) loader; + StringWriter sw = new StringWriter(); + try { + managedIndexSchema.persist(sw); + } catch (IOException e) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "unable to serialize schema"); + //unlikely + } + + try { + latestVersion = ZkController.persistConfigResourceToZooKeeper + (zkLoader, managedIndexSchema.getSchemaZkVersion(), managedIndexSchema.getResourceName(), + sw.toString().getBytes(StandardCharsets.UTF_8), true); + req.getCore().getCoreDescriptor().getCoreContainer().reload(req.getCore().getName()); + break; + } catch (ZkController.ResourceModifiedInZkException e) { + log.info("Schema was modified by another node. Retrying.."); + } + } else { + try { + //only for non cloud stuff + managedIndexSchema.persistManagedSchema(false); + core.setLatestSchema(managedIndexSchema); + } catch (SolrException e) { + log.warn(errorMsg); + errors = singletonList(errorMsg + e.getMessage()); + } + break; } } } - log.warn(errorMsg + "Timed out."); - return singletonList(errorMsg + "Timed out."); + if (req.getCore().getResourceLoader() instanceof ZkSolrResourceLoader) { + // Don't block further schema updates while waiting for a pending update to propagate to other replicas. + // This reduces the likelihood of a (time-limited) distributed deadlock during concurrent schema updates. + waitForOtherReplicasToUpdate(timeOut, latestVersion); + } + if (errors.isEmpty() && timeOut.hasTimedOut()) { + log.warn(errorMsg + "Timed out."); + errors = singletonList(errorMsg + "Timed out."); + } + return errors; } private void waitForOtherReplicasToUpdate(TimeOut timeOut, int latestVersion) { From b5cfb17bd0d56da03dbe1f179db0f03ea0acf735 Mon Sep 17 00:00:00 2001 From: Ishan Chattopadhyaya Date: Wed, 21 Dec 2016 00:42:16 +0530 Subject: [PATCH 02/83] SOLR-9513: Fix test failure on Windows and Java9 by avoiding NPE in tearDownClass() --- .../security/hadoop/TestSolrCloudWithHadoopAuthPlugin.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/security/hadoop/TestSolrCloudWithHadoopAuthPlugin.java b/solr/core/src/test/org/apache/solr/security/hadoop/TestSolrCloudWithHadoopAuthPlugin.java index 960fd9ab5ef..6ac9403cb62 100644 --- a/solr/core/src/test/org/apache/solr/security/hadoop/TestSolrCloudWithHadoopAuthPlugin.java +++ b/solr/core/src/test/org/apache/solr/security/hadoop/TestSolrCloudWithHadoopAuthPlugin.java @@ -59,7 +59,9 @@ public class TestSolrCloudWithHadoopAuthPlugin extends SolrCloudTestCase { System.clearProperty("solr.kerberos.keytab"); System.clearProperty("solr.kerberos.name.rules"); System.clearProperty("solr.jaas.debug"); - kerberosTestServices.stop(); + if (kerberosTestServices != null) { + kerberosTestServices.stop(); + } kerberosTestServices = null; } From f1e636f5611abea66efed896a95eebbb6d765300 Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Wed, 21 Dec 2016 00:01:27 +0300 Subject: [PATCH 03/83] SOLR-9878: fix ReversedWildcardFilterFactory caching in query parser --- solr/CHANGES.txt | 1 + .../solr/parser/SolrQueryParserBase.java | 2 +- .../TestReversedWildcardFilterFactory.java | 44 ++++++++++++++++++- 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index da45f98968f..c050201a2ab 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -322,6 +322,7 @@ Other Changes * SOLR-9874: Solr will reject CREATEALIAS requests if target collections don't exist (Tomás Fernández Löbbe) +* SOLR-9878: fixing lazy logic for retrieving ReversedWildcardFilterFactory in SolrQueryParserBase (Mikhail Khludnev) ================== 6.3.0 ================== diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index 0f6c7ac3475..168bd494fdf 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -787,7 +787,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder { protected ReversedWildcardFilterFactory getReversedWildcardFilterFactory(FieldType fieldType) { if (leadingWildcards == null) leadingWildcards = new HashMap<>(); ReversedWildcardFilterFactory fac = leadingWildcards.get(fieldType); - if (fac != null || leadingWildcards.containsKey(fac)) { + if (fac != null || leadingWildcards.containsKey(fieldType)) { return fac; } diff --git a/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java b/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java index 269a2c55089..3ccc352d46d 100644 --- a/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java +++ b/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java @@ -15,6 +15,11 @@ * limitations under the License. */ package org.apache.solr.analysis; +import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + import java.io.IOException; import java.util.HashMap; import java.util.Map; @@ -26,18 +31,23 @@ import org.apache.lucene.search.Query; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Operations; import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.parser.CharStream; +import org.apache.solr.parser.ParseException; +import org.apache.solr.parser.SolrQueryParserBase; import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchemaFactory; import org.apache.solr.search.QParser; import org.apache.solr.search.SolrQueryParser; +import org.apache.solr.search.SyntaxError; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; -import static org.apache.lucene.analysis.BaseTokenStreamTestCase.*; public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 { + Map args = new HashMap<>(); IndexSchema schema; @@ -183,4 +193,36 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 { req("+id:1 +one:*omez*"), "//result[@numFound=1]"); } + + private static final class SolrQParser extends SolrQueryParserBase { + @Override + public Query TopLevelQuery(String field) throws ParseException, SyntaxError { + return null; + } + + @Override + public void ReInit(CharStream stream) {} + + @Override + protected ReversedWildcardFilterFactory getReversedWildcardFilterFactory(FieldType fieldType) { + return super.getReversedWildcardFilterFactory(fieldType); + } + } + + @Test + public void testCachingInQueryParser() { + SolrQParser parser = new SolrQParser(); + + SolrQueryRequest req = req(); + String[] fields = new String[]{"one", "two", "three"}; + String aField = fields[random().nextInt(fields.length)]; + FieldType type = req.getSchema().getField(aField).getType(); + + FieldType typeSpy = spy(type); + // calling twice + parser.getReversedWildcardFilterFactory(typeSpy); + parser.getReversedWildcardFilterFactory(typeSpy); + // but it should reach only once + verify(typeSpy, times(1)).getIndexAnalyzer(); + } } From febe0019a1e5ad2a0d38b8cf000bcf6f3abfa760 Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Wed, 21 Dec 2016 00:40:56 +0300 Subject: [PATCH 04/83] SOLR-9760: solr.cmd doesn't need write permission in current directory --- solr/CHANGES.txt | 2 ++ solr/bin/solr.cmd | 15 +++------------ 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index c050201a2ab..887f45fd0be 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -275,6 +275,8 @@ Bug Fixes This reduces the likelihood of a (time-limited) distributed deadlock during concurrent schema updates. (Mark Miller, Steve Rowe) +* SOLR-9760: Windows script doesn't need write permission (Alex Crome by Mikhail Khludnev) + Other Changes ---------------------- diff --git a/solr/bin/solr.cmd b/solr/bin/solr.cmd index 80f2bbc1a62..4b6081fd50c 100644 --- a/solr/bin/solr.cmd +++ b/solr/bin/solr.cmd @@ -1631,11 +1631,7 @@ set JAVA_MAJOR_VERSION=0 set JAVA_VERSION_INFO= set JAVA_BUILD=0 -"%JAVA%" -version 2>&1 | findstr /i "version" > javavers -set /p JAVAVEROUT=^&1 ^| findstr "version"^"`) do ( set JAVA_VERSION_INFO=%%a REM Remove surrounding quotes set JAVA_VERSION_INFO=!JAVA_VERSION_INFO:"=! @@ -1656,13 +1652,8 @@ GOTO :eof REM Set which JVM vendor we have :resolve_java_vendor -set "JAVA_VENDOR=Oracle" -"%JAVA%" -version 2>&1 | findstr /i "IBM J9" > javares -set /p JAVA_VENDOR_OUT=&1 | findstr /i "IBM J9" > nul +if %ERRORLEVEL% == 1 ( set "JAVA_VENDOR=Oracle" ) else ( set "JAVA_VENDOR=IBM J9" ) set JAVA_VENDOR_OUT= GOTO :eof From 19530fa6472876de41b5d73b6a6ccc8d4feae14c Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Wed, 21 Dec 2016 05:34:46 -0500 Subject: [PATCH 05/83] remove stale comment --- lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java index 96575780b44..bf360d3502a 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java @@ -544,7 +544,6 @@ public class BKDWriter implements Closeable { MergeReader reader = queue.top(); // System.out.println("iter reader=" + reader); - // NOTE: doesn't work with subclasses (e.g. SimpleText!) oneDimWriter.add(reader.state.scratchPackedValue, reader.docID); if (reader.next()) { From 80462df86efaf093fdebeac5eef1727c5fda968f Mon Sep 17 00:00:00 2001 From: Andrzej Bialecki Date: Wed, 21 Dec 2016 12:36:18 +0100 Subject: [PATCH 06/83] SOLR-9805 Don't use FileDescriptorRatioGauge - internally it uses reflection and doesn't work under Java 9. Instead use this opportunity to implement a more detailed OperatingSystemMetricSet. Add a unit test. Simplify some of the metric names. --- .../metrics/OperatingSystemMetricSet.java | 80 +++++++++++++++++++ .../solr/servlet/SolrDispatchFilter.java | 8 +- .../apache/solr/metrics/JvmMetricsTest.java | 67 ++++++++++++++++ 3 files changed, 151 insertions(+), 4 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/metrics/OperatingSystemMetricSet.java create mode 100644 solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java diff --git a/solr/core/src/java/org/apache/solr/metrics/OperatingSystemMetricSet.java b/solr/core/src/java/org/apache/solr/metrics/OperatingSystemMetricSet.java new file mode 100644 index 00000000000..b26386a2a1d --- /dev/null +++ b/solr/core/src/java/org/apache/solr/metrics/OperatingSystemMetricSet.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.metrics; + +import javax.management.JMException; +import javax.management.MBeanServer; +import javax.management.ObjectName; +import java.lang.invoke.MethodHandles; +import java.util.HashMap; +import java.util.Map; + +import com.codahale.metrics.JmxAttributeGauge; +import com.codahale.metrics.Metric; +import com.codahale.metrics.MetricSet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This is an extended replacement for {@link com.codahale.metrics.jvm.FileDescriptorRatioGauge} + * - that class uses reflection and doesn't work under Java 9. We can also get much more + * information about OS environment once we have to go through MBeanServer anyway. + */ +public class OperatingSystemMetricSet implements MetricSet { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + /** Metric names - these correspond to known numeric MBean attributes. Depending on the OS and + * Java implementation only some of them may be actually present. + */ + public static final String[] METRICS = { + "AvailableProcessors", + "CommittedVirtualMemorySize", + "FreePhysicalMemorySize", + "FreeSwapSpaceSize", + "MaxFileDescriptorCount", + "OpenFileDescriptorCount", + "ProcessCpuLoad", + "ProcessCpuTime", + "SystemLoadAverage", + "TotalPhysicalMemorySize", + "TotalSwapSpaceSize" + }; + + private final MBeanServer mBeanServer; + + public OperatingSystemMetricSet(MBeanServer mBeanServer) { + this.mBeanServer = mBeanServer; + } + + @Override + public Map getMetrics() { + final Map metrics = new HashMap<>(); + + try { + final ObjectName on = new ObjectName("java.lang:type=OperatingSystem"); + // verify that it exists + mBeanServer.getMBeanInfo(on); + for (String metric : METRICS) { + metrics.put(metric, new JmxAttributeGauge(mBeanServer, on, metric)); + } + } catch (JMException ignored) { + log.debug("Unable to load OperatingSystem MBean", ignored); + } + + return metrics; + } +} diff --git a/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java b/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java index dbc4b35244a..a411bb34f2b 100644 --- a/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java +++ b/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java @@ -49,7 +49,6 @@ import java.util.regex.Pattern; import com.codahale.metrics.jvm.BufferPoolMetricSet; import com.codahale.metrics.jvm.ClassLoadingGaugeSet; -import com.codahale.metrics.jvm.FileDescriptorRatioGauge; import com.codahale.metrics.jvm.GarbageCollectorMetricSet; import com.codahale.metrics.jvm.MemoryUsageGaugeSet; import com.codahale.metrics.jvm.ThreadStatesGaugeSet; @@ -69,6 +68,7 @@ import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrInfoMBean; import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.core.SolrXmlConfig; +import org.apache.solr.metrics.OperatingSystemMetricSet; import org.apache.solr.metrics.SolrMetricManager; import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.security.AuthenticationPlugin; @@ -187,9 +187,9 @@ public class SolrDispatchFilter extends BaseSolrFilter { SolrMetricManager metricManager = cores.getMetricManager(); try { String registry = SolrMetricManager.getRegistryName(SolrInfoMBean.Group.jvm); - metricManager.registerAll(registry, new BufferPoolMetricSet(platformMBeanServer), true, "bufferPools"); - metricManager.registerAll(registry, new ClassLoadingGaugeSet(), true, "classLoading"); - metricManager.register(registry, new FileDescriptorRatioGauge(), true, "fileDescriptorRatio"); + metricManager.registerAll(registry, new BufferPoolMetricSet(platformMBeanServer), true, "buffers"); + metricManager.registerAll(registry, new ClassLoadingGaugeSet(), true, "classes"); + metricManager.registerAll(registry, new OperatingSystemMetricSet(platformMBeanServer), true, "os"); metricManager.registerAll(registry, new GarbageCollectorMetricSet(), true, "gc"); metricManager.registerAll(registry, new MemoryUsageGaugeSet(), true, "memory"); metricManager.registerAll(registry, new ThreadStatesGaugeSet(), true, "threads"); // todo should we use CachedThreadStatesGaugeSet instead? diff --git a/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java b/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java new file mode 100644 index 00000000000..77c4e1aa1c4 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.metrics; + +import javax.management.MBeanServer; +import java.lang.management.ManagementFactory; +import java.util.Map; + +import com.codahale.metrics.Gauge; +import com.codahale.metrics.Metric; +import org.apache.solr.SolrJettyTestBase; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Test {@link OperatingSystemMetricSet} and proper JVM metrics registration. + */ +public class JvmMetricsTest extends SolrJettyTestBase { + + @BeforeClass + public static void beforeTest() throws Exception { + createJetty(legacyExampleCollection1SolrHome()); + } + + @Test + public void testOperatingSystemMetricsSet() throws Exception { + MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer(); + OperatingSystemMetricSet set = new OperatingSystemMetricSet(mBeanServer); + Map metrics = set.getMetrics(); + assertTrue(metrics.size() > 0); + for (String metric : OperatingSystemMetricSet.METRICS) { + Gauge gauge = (Gauge)metrics.get(metric); + if (gauge == null) { // some are optional depending on OS + continue; + } + double value = ((Number)gauge.getValue()).doubleValue(); + assertTrue(value >= 0); + } + } + + @Test + public void testSetupJvmMetrics() throws Exception { + SolrMetricManager metricManager = jetty.getCoreContainer().getMetricManager(); + Map metrics = metricManager.registry("solr.jvm").getMetrics(); + assertTrue(metrics.size() > 0); + assertTrue(metrics.toString(), metrics.entrySet().stream().filter(e -> e.getKey().startsWith("buffers.")).count() > 0); + assertTrue(metrics.toString(), metrics.entrySet().stream().filter(e -> e.getKey().startsWith("classes.")).count() > 0); + assertTrue(metrics.toString(), metrics.entrySet().stream().filter(e -> e.getKey().startsWith("os.")).count() > 0); + assertTrue(metrics.toString(), metrics.entrySet().stream().filter(e -> e.getKey().startsWith("gc.")).count() > 0); + assertTrue(metrics.toString(), metrics.entrySet().stream().filter(e -> e.getKey().startsWith("memory.")).count() > 0); + assertTrue(metrics.toString(), metrics.entrySet().stream().filter(e -> e.getKey().startsWith("threads.")).count() > 0); + } +} From 6bc001ba43035667e97dc7b5503ba80d20b142c8 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Wed, 21 Dec 2016 10:31:36 -0500 Subject: [PATCH 07/83] fix generics javac warnings --- .../apache/lucene/index/TrackingTmpOutputDirectoryWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/TrackingTmpOutputDirectoryWrapper.java b/lucene/core/src/java/org/apache/lucene/index/TrackingTmpOutputDirectoryWrapper.java index 032567416b7..28278401bc9 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TrackingTmpOutputDirectoryWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/index/TrackingTmpOutputDirectoryWrapper.java @@ -28,7 +28,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; final class TrackingTmpOutputDirectoryWrapper extends FilterDirectory { - private final Map fileNames = new HashMap(); + private final Map fileNames = new HashMap<>(); TrackingTmpOutputDirectoryWrapper(Directory in) { super(in); From d9529529eed9a084fe705820ccd11d12deb89b8b Mon Sep 17 00:00:00 2001 From: Erick Erickson Date: Wed, 21 Dec 2016 10:41:27 -0800 Subject: [PATCH 08/83] SOLR-9884: Add version to segments handler output --- solr/CHANGES.txt | 2 ++ .../admin/SegmentsInfoRequestHandler.java | 1 + .../admin/SegmentsInfoRequestHandlerTest.java | 17 +++++++++++++++-- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 887f45fd0be..c32af127f42 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -191,6 +191,8 @@ New Features of which are multi-valued. Example: http://localhost:8983/solr/admin/metrics?group=jvm,jetty&type=counter (shalin) +* SOLR-9884: Add version to segments handler output (Steven Bower via Erick Erickson) + Optimizations ---------------------- * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have diff --git a/solr/core/src/java/org/apache/solr/handler/admin/SegmentsInfoRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/SegmentsInfoRequestHandler.java index ee2cf5ea0e1..df1bbd8e9e6 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/SegmentsInfoRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/SegmentsInfoRequestHandler.java @@ -84,6 +84,7 @@ public class SegmentsInfoRequestHandler extends RequestHandlerBase { segmentInfoMap.add("age", new Date(timestamp)); segmentInfoMap.add("source", segmentCommitInfo.info.getDiagnostics().get("source")); + segmentInfoMap.add("version", segmentCommitInfo.info.getVersion().toString()); return segmentInfoMap; } diff --git a/solr/core/src/test/org/apache/solr/handler/admin/SegmentsInfoRequestHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/admin/SegmentsInfoRequestHandlerTest.java index 50333a21d2c..885e4198c78 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/SegmentsInfoRequestHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/SegmentsInfoRequestHandlerTest.java @@ -16,6 +16,7 @@ */ package org.apache.solr.handler.admin; +import org.apache.lucene.util.Version; import org.apache.solr.util.AbstractSolrTestCase; import org.junit.Before; import org.junit.BeforeClass; @@ -32,6 +33,7 @@ public class SegmentsInfoRequestHandlerTest extends AbstractSolrTestCase { @BeforeClass public static void beforeClass() throws Exception { System.setProperty("enable.update.log", "false"); + System.setProperty("solr.tests.useMergePolicy", "false"); initCore("solrconfig.xml", "schema12.xml"); } @@ -44,6 +46,10 @@ public class SegmentsInfoRequestHandlerTest extends AbstractSolrTestCase { assertU(delI("SOLR100" + i)); } assertU(commit()); + for (int i = 0; i < DOC_COUNT; i++) { + assertU(adoc("id","SOLR200" + i, "name","Apache Solr:" + i)); + } + assertU(commit()); } @Test @@ -52,14 +58,21 @@ public class SegmentsInfoRequestHandlerTest extends AbstractSolrTestCase { req("qt","/admin/segments"), "0 Date: Wed, 21 Dec 2016 19:49:22 +0100 Subject: [PATCH 09/83] LUCENE-6989: Make MMapDirectory's unmap hack work with Java 9 EA (b150+): Unmapping uses new sun.misc.Unsafe#invokeCleaner(ByteBuffer). --- lucene/CHANGES.txt | 7 + .../apache/lucene/store/MMapDirectory.java | 124 ++++++++++-------- lucene/tools/junit4/tests.policy | 1 - 3 files changed, 78 insertions(+), 54 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 78fd158d962..618f73af9ce 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -153,6 +153,13 @@ Improvements CorruptIndexException instead of the more confusing EOFException (Mike Drob via Mike McCandless) +* LUCENE-6989: Make MMapDirectory's unmap hack work with Java 9 EA (b150+): + Unmapping uses new sun.misc.Unsafe#invokeCleaner(ByteBuffer). + Java 9 now needs same permissions like Java 8; + RuntimePermission("accessClassInPackage.jdk.internal.ref") + is no longer needed. Support for older Java 9 builds was removed. + (Uwe Schindler) + Optimizations * LUCENE-7568: Optimize merging when index sorting is used but the diff --git a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java index be08a1663a6..0487400c776 100644 --- a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java +++ b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java @@ -34,6 +34,7 @@ import java.util.Locale; import java.util.Objects; import java.util.concurrent.Future; import java.lang.invoke.MethodHandle; +import java.lang.reflect.Field; import java.lang.reflect.Method; import org.apache.lucene.store.ByteBufferGuard.BufferCleaner; @@ -174,14 +175,13 @@ public class MMapDirectory extends FSDirectory { * is closed while another thread is still accessing it (SIGSEGV). *

To enable the hack, the following requirements need to be * fulfilled: The used JVM must be Oracle Java / OpenJDK 8 - * (preliminary support for Java 9 was added with Lucene 6). + * (preliminary support for Java 9 EA build 150+ was added with Lucene 6.4). * In addition, the following permissions need to be granted * to {@code lucene-core.jar} in your * policy file: *

    *
  • {@code permission java.lang.reflect.ReflectPermission "suppressAccessChecks";}
  • *
  • {@code permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";}
  • - *
  • {@code permission java.lang.RuntimePermission "accessClassInPackage.jdk.internal.ref";}
  • *
* @throws IllegalArgumentException if {@link #UNMAP_SUPPORTED} * is false and the workaround cannot be enabled. @@ -335,64 +335,82 @@ public class MMapDirectory extends FSDirectory { } } - @SuppressForbidden(reason = "Needs access to private APIs in DirectBuffer and sun.misc.Cleaner to enable hack") + @SuppressForbidden(reason = "Needs access to private APIs in DirectBuffer, sun.misc.Cleaner, and sun.misc.Unsafe to enable hack") private static Object unmapHackImpl() { final Lookup lookup = lookup(); try { - final Class directBufferClass = Class.forName("java.nio.DirectByteBuffer"); - - final Method m = directBufferClass.getMethod("cleaner"); - m.setAccessible(true); - MethodHandle directBufferCleanerMethod = lookup.unreflect(m); - Class cleanerClass = directBufferCleanerMethod.type().returnType(); - - final MethodHandle cleanMethod; - if (Runnable.class.isAssignableFrom(cleanerClass)) { - // early Java 9 impl using Runnable (we do the security check early that the Runnable does at runtime): - final SecurityManager sm = System.getSecurityManager(); - if (sm != null) { - sm.checkPackageAccess("jdk.internal.ref"); - } - // cast return value of cleaner() to Runnable: - directBufferCleanerMethod = directBufferCleanerMethod.asType(directBufferCleanerMethod.type().changeReturnType(Runnable.class)); - cleanerClass = Runnable.class; - // lookup run() method on the interface instead of Cleaner: - cleanMethod = lookup.findVirtual(cleanerClass, "run", methodType(void.class)); - } else { - // can be either the old internal "sun.misc.Cleaner" or - // the new Java 9 "java.lang.ref.Cleaner$Cleanable": - cleanMethod = lookup.findVirtual(cleanerClass, "clean", methodType(void.class)); + try { + // *** sun.misc.Unsafe unmapping (Java 9+) *** + final Class unsafeClass = Class.forName("sun.misc.Unsafe"); + // first check if Unsafe has the right method, otherwise we can give up + // without doing any security critical stuff: + final MethodHandle unmapper = lookup.findVirtual(unsafeClass, "invokeCleaner", + methodType(void.class, ByteBuffer.class)); + // fetch the unsafe instance and bind it to the virtual MH: + final Field f = unsafeClass.getDeclaredField("theUnsafe"); + f.setAccessible(true); + final Object theUnsafe = f.get(null); + return newBufferCleaner(ByteBuffer.class, unmapper.bindTo(theUnsafe)); + } catch (SecurityException se) { + // rethrow to report errors correctly (we need to catch it here, as we also catch RuntimeException below!): + throw se; + } catch (ReflectiveOperationException | RuntimeException e) { + // *** sun.misc.Cleaner unmapping (Java 8) *** + final Class directBufferClass = Class.forName("java.nio.DirectByteBuffer"); + + final Method m = directBufferClass.getMethod("cleaner"); + m.setAccessible(true); + final MethodHandle directBufferCleanerMethod = lookup.unreflect(m); + final Class cleanerClass = directBufferCleanerMethod.type().returnType(); + + /* "Compile" a MH that basically is equivalent to the following code: + * void unmapper(ByteBuffer byteBuffer) { + * sun.misc.Cleaner cleaner = ((java.nio.DirectByteBuffer) byteBuffer).cleaner(); + * if (Objects.nonNull(cleaner)) { + * cleaner.clean(); + * } else { + * noop(cleaner); // the noop is needed because MethodHandles#guardWithTest always needs ELSE + * } + * } + */ + final MethodHandle cleanMethod = lookup.findVirtual(cleanerClass, "clean", methodType(void.class)); + final MethodHandle nonNullTest = lookup.findStatic(Objects.class, "nonNull", methodType(boolean.class, Object.class)) + .asType(methodType(boolean.class, cleanerClass)); + final MethodHandle noop = dropArguments(constant(Void.class, null).asType(methodType(void.class)), 0, cleanerClass); + final MethodHandle unmapper = filterReturnValue(directBufferCleanerMethod, guardWithTest(nonNullTest, cleanMethod, noop)) + .asType(methodType(void.class, ByteBuffer.class)); + return newBufferCleaner(directBufferClass, unmapper); } - - final MethodHandle nonNullTest = lookup.findStatic(Objects.class, "nonNull", methodType(boolean.class, Object.class)) - .asType(methodType(boolean.class, cleanerClass)); - final MethodHandle noop = dropArguments(constant(Void.class, null).asType(methodType(void.class)), 0, cleanerClass); - final MethodHandle unmapper = filterReturnValue(directBufferCleanerMethod, guardWithTest(nonNullTest, cleanMethod, noop)) - .asType(methodType(void.class, ByteBuffer.class)); - - return (BufferCleaner) (String resourceDescription, ByteBuffer buffer) -> { - if (directBufferClass.isInstance(buffer)) { - final Throwable error = AccessController.doPrivileged((PrivilegedAction) () -> { - try { - unmapper.invokeExact(buffer); - return null; - } catch (Throwable t) { - return t; - } - }); - if (error != null) { - throw new IOException("Unable to unmap the mapped buffer: " + resourceDescription, error); - } - } - }; - } catch (SecurityException e) { - return "Unmapping is not supported, because not all required permissions are given to the Lucene JAR file: " + e + - " [Please grant at least the following permissions: RuntimePermission(\"accessClassInPackage.sun.misc\"), " + - "RuntimePermission(\"accessClassInPackage.jdk.internal.ref\"), and " + - "ReflectPermission(\"suppressAccessChecks\")]"; + } catch (SecurityException se) { + return "Unmapping is not supported, because not all required permissions are given to the Lucene JAR file: " + se + + " [Please grant at least the following permissions: RuntimePermission(\"accessClassInPackage.sun.misc\") " + + " and ReflectPermission(\"suppressAccessChecks\")]"; } catch (ReflectiveOperationException | RuntimeException e) { return "Unmapping is not supported on this platform, because internal Java APIs are not compatible to this Lucene version: " + e; } } + private static BufferCleaner newBufferCleaner(final Class unmappableBufferClass, final MethodHandle unmapper) { + assert Objects.equals(methodType(void.class, ByteBuffer.class), unmapper.type()); + return (String resourceDescription, ByteBuffer buffer) -> { + if (!buffer.isDirect()) { + throw new IllegalArgumentException("unmapping only works with direct buffers"); + } + if (!unmappableBufferClass.isInstance(buffer)) { + throw new IllegalArgumentException("buffer is not an instance of " + unmappableBufferClass.getName()); + } + final Throwable error = AccessController.doPrivileged((PrivilegedAction) () -> { + try { + unmapper.invokeExact(buffer); + return null; + } catch (Throwable t) { + return t; + } + }); + if (error != null) { + throw new IOException("Unable to unmap the mapped buffer: " + resourceDescription, error); + } + }; + } + } diff --git a/lucene/tools/junit4/tests.policy b/lucene/tools/junit4/tests.policy index 2dde5c6f329..b351b172c26 100644 --- a/lucene/tools/junit4/tests.policy +++ b/lucene/tools/junit4/tests.policy @@ -63,7 +63,6 @@ grant { permission java.lang.RuntimePermission "createClassLoader"; // needed to test unmap hack on platforms that support it permission java.lang.RuntimePermission "accessClassInPackage.sun.misc"; - permission java.lang.RuntimePermission "accessClassInPackage.jdk.internal.ref"; // needed by cyberneko usage by benchmarks on J9 permission java.lang.RuntimePermission "accessClassInPackage.org.apache.xerces.util"; // needed by jacoco to dump coverage From 5020ea28bc4255de473e795a6638ae67f2720396 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 21 Dec 2016 19:25:54 +0100 Subject: [PATCH 10/83] LUCENE-7599: Simplify TestRandomChains using Java's built-in Predicate and Function interfaces. --- lucene/CHANGES.txt | 3 + .../analysis/core/TestRandomChains.java | 177 +++++------------- 2 files changed, 45 insertions(+), 135 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 618f73af9ce..2e2f9ab0861 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -190,6 +190,9 @@ Other * LUCENE-7559: UnifiedHighlighter: Make Passage more exposed to allow passage creation to be customized. (David Smiley) +* LUCENE-7599: Simplify TestRandomChains using Java's built-in Predicate and + Function interfaces. (Ahmet Arslan via Adrien Grand) + Build * LUCENE-7387: fix defaultCodec in build.xml to account for the line ending (hossman) diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java index 94924d30beb..0bd5e0a3f8e 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java @@ -45,6 +45,8 @@ import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; +import java.util.function.Function; +import java.util.function.Predicate; import java.util.regex.Pattern; import org.apache.lucene.analysis.Analyzer; @@ -106,15 +108,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase { static List> tokenfilters; static List> charfilters; - private static interface Predicate { - boolean apply(T o); - } - - private static final Predicate ALWAYS = new Predicate() { - public boolean apply(Object[] args) { - return true; - }; - }; + private static final Predicate ALWAYS = (objects -> true); private static final Map,Predicate> brokenConstructors = new HashMap<>(); static { @@ -124,36 +118,27 @@ public class TestRandomChains extends BaseTokenStreamTestCase { ALWAYS); brokenConstructors.put( LimitTokenCountFilter.class.getConstructor(TokenStream.class, int.class, boolean.class), - new Predicate() { - @Override - public boolean apply(Object[] args) { + args -> { assert args.length == 3; return !((Boolean) args[2]); // args are broken if consumeAllTokens is false - } }); brokenConstructors.put( LimitTokenOffsetFilter.class.getConstructor(TokenStream.class, int.class), ALWAYS); brokenConstructors.put( LimitTokenOffsetFilter.class.getConstructor(TokenStream.class, int.class, boolean.class), - new Predicate() { - @Override - public boolean apply(Object[] args) { + args -> { assert args.length == 3; return !((Boolean) args[2]); // args are broken if consumeAllTokens is false - } }); brokenConstructors.put( LimitTokenPositionFilter.class.getConstructor(TokenStream.class, int.class), ALWAYS); brokenConstructors.put( LimitTokenPositionFilter.class.getConstructor(TokenStream.class, int.class, boolean.class), - new Predicate() { - @Override - public boolean apply(Object[] args) { + args -> { assert args.length == 3; return !((Boolean) args[2]); // args are broken if consumeAllTokens is false - } }); for (Class c : Arrays.>asList( // TODO: can we promote some of these to be only @@ -247,12 +232,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase { } } - final Comparator> ctorComp = new Comparator>() { - @Override - public int compare(Constructor arg0, Constructor arg1) { - return arg0.toGenericString().compareTo(arg1.toGenericString()); - } - }; + final Comparator> ctorComp = (arg0, arg1) -> arg0.toGenericString().compareTo(arg1.toGenericString()); Collections.sort(tokenizers, ctorComp); Collections.sort(tokenfilters, ctorComp); Collections.sort(charfilters, ctorComp); @@ -318,21 +298,14 @@ public class TestRandomChains extends BaseTokenStreamTestCase { } } - private static interface ArgProducer { - Object create(Random random); - } - - private static final Map,ArgProducer> argProducers = new IdentityHashMap,ArgProducer>() {{ - put(int.class, new ArgProducer() { - @Override public Object create(Random random) { + private static final Map,Function> argProducers = new IdentityHashMap,Function>() {{ + put(int.class, random -> { // TODO: could cause huge ram usage to use full int range for some filters // (e.g. allocate enormous arrays) // return Integer.valueOf(random.nextInt()); return Integer.valueOf(TestUtil.nextInt(random, -50, 50)); - } }); - put(char.class, new ArgProducer() { - @Override public Object create(Random random) { + put(char.class, random -> { // TODO: fix any filters that care to throw IAE instead. // also add a unicode validating filter to validate termAtt? // return Character.valueOf((char)random.nextInt(65536)); @@ -342,49 +315,19 @@ public class TestRandomChains extends BaseTokenStreamTestCase { return Character.valueOf(c); } } - } }); - put(float.class, new ArgProducer() { - @Override public Object create(Random random) { - return Float.valueOf(random.nextFloat()); - } - }); - put(boolean.class, new ArgProducer() { - @Override public Object create(Random random) { - return Boolean.valueOf(random.nextBoolean()); - } - }); - put(byte.class, new ArgProducer() { - @Override public Object create(Random random) { - // this wraps to negative when casting to byte - return Byte.valueOf((byte) random.nextInt(256)); - } - }); - put(byte[].class, new ArgProducer() { - @Override public Object create(Random random) { + put(float.class, Random::nextFloat); + put(boolean.class, Random::nextBoolean); + put(byte.class, random -> (byte) random.nextInt(256)); + put(byte[].class, random -> { byte bytes[] = new byte[random.nextInt(256)]; random.nextBytes(bytes); return bytes; - } }); - put(Random.class, new ArgProducer() { - @Override public Object create(Random random) { - return new Random(random.nextLong()); - } - }); - put(Version.class, new ArgProducer() { - @Override public Object create(Random random) { - // we expect bugs in emulating old versions - return Version.LATEST; - } - }); - put(AttributeFactory.class, new ArgProducer() { - @Override public Object create(Random random) { - return newAttributeFactory(random); - } - }); - put(Set.class, new ArgProducer() { - @Override public Object create(Random random) { + put(Random.class, random -> new Random(random.nextLong())); + put(Version.class, random -> Version.LATEST); + put(AttributeFactory.class, BaseTokenStreamTestCase::newAttributeFactory); + put(Set.class,random -> { // TypeTokenFilter Set set = new HashSet<>(); int num = random.nextInt(5); @@ -392,10 +335,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase { set.add(StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.length)]); } return set; - } }); - put(Collection.class, new ArgProducer() { - @Override public Object create(Random random) { + put(Collection.class, random -> { // CapitalizationFilter Collection col = new ArrayList<>(); int num = random.nextInt(5); @@ -403,10 +344,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase { col.add(TestUtil.randomSimpleString(random).toCharArray()); } return col; - } }); - put(CharArraySet.class, new ArgProducer() { - @Override public Object create(Random random) { + put(CharArraySet.class, random -> { int num = random.nextInt(10); CharArraySet set = new CharArraySet(num, random.nextBoolean()); for (int i = 0; i < num; i++) { @@ -414,28 +353,13 @@ public class TestRandomChains extends BaseTokenStreamTestCase { set.add(TestUtil.randomSimpleString(random)); } return set; - } }); - put(Pattern.class, new ArgProducer() { - @Override public Object create(Random random) { - // TODO: don't want to make the exponentially slow ones Dawid documents - // in TestPatternReplaceFilter, so dont use truly random patterns (for now) - return Pattern.compile("a"); - } - }); - - put(Pattern[].class, new ArgProducer() { - @Override public Object create(Random random) { - return new Pattern[] {Pattern.compile("([a-z]+)"), Pattern.compile("([0-9]+)")}; - } - }); - put(PayloadEncoder.class, new ArgProducer() { - @Override public Object create(Random random) { - return new IdentityEncoder(); // the other encoders will throw exceptions if tokens arent numbers? - } - }); - put(Dictionary.class, new ArgProducer() { - @Override public Object create(Random random) { + // TODO: don't want to make the exponentially slow ones Dawid documents + // in TestPatternReplaceFilter, so dont use truly random patterns (for now) + put(Pattern.class, random -> Pattern.compile("a")); + put(Pattern[].class, random -> new Pattern[] {Pattern.compile("([a-z]+)"), Pattern.compile("([0-9]+)")}); + put(PayloadEncoder.class, random -> new IdentityEncoder()); // the other encoders will throw exceptions if tokens arent numbers? + put(Dictionary.class, random -> { // TODO: make nastier InputStream affixStream = TestHunspellStemFilter.class.getResourceAsStream("simple.aff"); InputStream dictStream = TestHunspellStemFilter.class.getResourceAsStream("simple.dic"); @@ -445,10 +369,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase { Rethrow.rethrow(ex); return null; // unreachable code } - } }); - put(HyphenationTree.class, new ArgProducer() { - @Override public Object create(Random random) { + put(HyphenationTree.class, random -> { // TODO: make nastier try { InputSource is = new InputSource(TestCompoundWordTokenFilter.class.getResource("da_UTF8.xml").toExternalForm()); @@ -458,10 +380,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase { Rethrow.rethrow(ex); return null; // unreachable code } - } }); - put(SnowballProgram.class, new ArgProducer() { - @Override public Object create(Random random) { + put(SnowballProgram.class, random -> { try { String lang = TestSnowball.SNOWBALL_LANGS[random.nextInt(TestSnowball.SNOWBALL_LANGS.length)]; Class clazz = Class.forName("org.tartarus.snowball.ext." + lang + "Stemmer").asSubclass(SnowballProgram.class); @@ -470,10 +390,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase { Rethrow.rethrow(ex); return null; // unreachable code } - } }); - put(String.class, new ArgProducer() { - @Override public Object create(Random random) { + put(String.class, random -> { // TODO: make nastier if (random.nextBoolean()) { // a token type @@ -481,10 +399,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase { } else { return TestUtil.randomSimpleString(random); } - } }); - put(NormalizeCharMap.class, new ArgProducer() { - @Override public Object create(Random random) { + put(NormalizeCharMap.class, random -> { NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); // we can't add duplicate keys, or NormalizeCharMap gets angry Set keys = new HashSet<>(); @@ -500,10 +416,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase { } } return builder.build(); - } }); - put(CharacterRunAutomaton.class, new ArgProducer() { - @Override public Object create(Random random) { + put(CharacterRunAutomaton.class, random -> { // TODO: could probably use a purely random automaton switch(random.nextInt(5)) { case 0: return MockTokenizer.KEYWORD; @@ -512,10 +426,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase { case 3: return MockTokenFilter.EMPTY_STOPSET; default: return MockTokenFilter.ENGLISH_STOPSET; } - } }); - put(CharArrayMap.class, new ArgProducer() { - @Override public Object create(Random random) { + put(CharArrayMap.class, random -> { int num = random.nextInt(10); CharArrayMap map = new CharArrayMap<>(num, random.nextBoolean()); for (int i = 0; i < num; i++) { @@ -523,10 +435,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase { map.put(TestUtil.randomSimpleString(random), TestUtil.randomSimpleString(random)); } return map; - } }); - put(StemmerOverrideMap.class, new ArgProducer() { - @Override public Object create(Random random) { + put(StemmerOverrideMap.class, random -> { int num = random.nextInt(10); StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random.nextBoolean()); for (int i = 0; i < num; i++) { @@ -545,11 +455,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase { } catch (Exception ex) { Rethrow.rethrow(ex); return null; // unreachable code - } } }); - put(SynonymMap.class, new ArgProducer() { - @Override public Object create(Random random) { + put(SynonymMap.class, new Function() { + @Override public Object apply(Random random) { SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean()); final int numEntries = atLeast(10); for (int j = 0; j < numEntries; j++) { @@ -578,12 +487,9 @@ public class TestRandomChains extends BaseTokenStreamTestCase { } } }); - put(DateFormat.class, new ArgProducer() { - @Override - public Object create(Random random) { + put(DateFormat.class, random -> { if (random.nextBoolean()) return null; return DateFormat.getDateInstance(DateFormat.DEFAULT, randomLocale(random)); - } }); }}; @@ -608,9 +514,9 @@ public class TestRandomChains extends BaseTokenStreamTestCase { @SuppressWarnings("unchecked") static T newRandomArg(Random random, Class paramType) { - final ArgProducer producer = argProducers.get(paramType); + final Function producer = argProducers.get(paramType); assertNotNull("No producer for arguments of type " + paramType.getName() + " found", producer); - return (T) producer.create(random); + return (T) producer.apply(random); } static Object[] newTokenizerArgs(Random random, Class[] paramTypes) { @@ -707,7 +613,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase { sb.append("filters="); sb.append(tokenFilterSpec.toString); sb.append("\n"); - sb.append("offsetsAreCorrect=" + tokenFilterSpec.offsetsAreCorrect); + sb.append("offsetsAreCorrect="); + sb.append(tokenFilterSpec.offsetsAreCorrect); return sb.toString(); } @@ -745,12 +652,12 @@ public class TestRandomChains extends BaseTokenStreamTestCase { private boolean broken(Constructor ctor, Object[] args) { final Predicate pred = brokenConstructors.get(ctor); - return pred != null && pred.apply(args); + return pred != null && pred.test(args); } private boolean brokenOffsets(Constructor ctor, Object[] args) { final Predicate pred = brokenOffsetsConstructors.get(ctor); - return pred != null && pred.apply(args); + return pred != null && pred.test(args); } // create a new random tokenizer from classpath From 18d53a43f7b4536572ef98a94ec1d9b529084d1b Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 21 Dec 2016 19:33:52 +0100 Subject: [PATCH 11/83] LUCENE-7594: Fixed point range queries on floating-point types to recommend using helpers for exclusive bounds that are consistent with Double.compare. --- lucene/CHANGES.txt | 4 +++ .../apache/lucene/document/DoublePoint.java | 30 +++++++++++++++++-- .../apache/lucene/document/FloatPoint.java | 30 +++++++++++++++++-- .../lucene/search/TestPointQueries.java | 28 +++++++++++++++++ .../lucene/document/TestHalfFloatPoint.java | 2 ++ 5 files changed, 90 insertions(+), 4 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 2e2f9ab0861..912974da3fc 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -109,6 +109,10 @@ Bug Fixes there are too many merges running and one of the merges hits a tragic exception (Joey Echeverria via Mike McCandless) +* LUCENE-7594: Fixed point range queries on floating-point types to recommend + using helpers for exclusive bounds that are consistent with Double.compare. + (Adrien Grand, Dawid Weiss) + Improvements * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery, diff --git a/lucene/core/src/java/org/apache/lucene/document/DoublePoint.java b/lucene/core/src/java/org/apache/lucene/document/DoublePoint.java index 9a383a489c0..6547402a265 100644 --- a/lucene/core/src/java/org/apache/lucene/document/DoublePoint.java +++ b/lucene/core/src/java/org/apache/lucene/document/DoublePoint.java @@ -45,6 +45,32 @@ import org.apache.lucene.util.NumericUtils; */ public final class DoublePoint extends Field { + /** + * Return the least double that compares greater than {@code d} consistently + * with {@link Double#compare}. The only difference with + * {@link Math#nextUp(double)} is that this method returns {@code +0d} when + * the argument is {@code -0d}. + */ + public static double nextUp(double d) { + if (Double.doubleToLongBits(d) == 0x8000_0000_0000_0000L) { // -0d + return +0d; + } + return Math.nextUp(d); + } + + /** + * Return the greatest double that compares less than {@code d} consistently + * with {@link Double#compare}. The only difference with + * {@link Math#nextDown(double)} is that this method returns {@code -0d} when + * the argument is {@code +0d}. + */ + public static double nextDown(double d) { + if (Double.doubleToLongBits(d) == 0L) { // +0d + return -0f; + } + return Math.nextDown(d); + } + private static FieldType getType(int numDims) { FieldType type = new FieldType(); type.setDimensions(numDims, Double.BYTES); @@ -164,8 +190,8 @@ public final class DoublePoint extends Field { *

* You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting {@code lowerValue = Double.NEGATIVE_INFINITY} or {@code upperValue = Double.POSITIVE_INFINITY}. - *

Ranges are inclusive. For exclusive ranges, pass {@code Math#nextUp(lowerValue)} - * or {@code Math.nextDown(upperValue)}. + *

Ranges are inclusive. For exclusive ranges, pass {@link #nextUp(double) nextUp(lowerValue)} + * or {@link #nextUp(double) nextDown(upperValue)}. *

* Range comparisons are consistent with {@link Double#compareTo(Double)}. * diff --git a/lucene/core/src/java/org/apache/lucene/document/FloatPoint.java b/lucene/core/src/java/org/apache/lucene/document/FloatPoint.java index 8d84269d2e1..0ec67fd746c 100644 --- a/lucene/core/src/java/org/apache/lucene/document/FloatPoint.java +++ b/lucene/core/src/java/org/apache/lucene/document/FloatPoint.java @@ -45,6 +45,32 @@ import org.apache.lucene.util.NumericUtils; */ public final class FloatPoint extends Field { + /** + * Return the least float that compares greater than {@code f} consistently + * with {@link Float#compare}. The only difference with + * {@link Math#nextUp(float)} is that this method returns {@code +0f} when + * the argument is {@code -0f}. + */ + public static float nextUp(float f) { + if (Float.floatToIntBits(f) == 0x8000_0000) { // -0f + return +0f; + } + return Math.nextUp(f); + } + + /** + * Return the greatest float that compares less than {@code f} consistently + * with {@link Float#compare}. The only difference with + * {@link Math#nextDown(float)} is that this method returns {@code -0f} when + * the argument is {@code +0f}. + */ + public static float nextDown(float f) { + if (Float.floatToIntBits(f) == 0) { // +0f + return -0f; + } + return Math.nextDown(f); + } + private static FieldType getType(int numDims) { FieldType type = new FieldType(); type.setDimensions(numDims, Float.BYTES); @@ -164,8 +190,8 @@ public final class FloatPoint extends Field { *

* You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting {@code lowerValue = Float.NEGATIVE_INFINITY} or {@code upperValue = Float.POSITIVE_INFINITY}. - *

Ranges are inclusive. For exclusive ranges, pass {@code Math#nextUp(lowerValue)} - * or {@code Math.nextDown(upperValue)}. + *

Ranges are inclusive. For exclusive ranges, pass {@link #nextUp(float) nextUp(lowerValue)} + * or {@link #nextUp(float) nextDown(upperValue)}. *

* Range comparisons are consistent with {@link Float#compareTo(Float)}. * diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java index 73b28139f9e..5c66478c981 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java @@ -2052,4 +2052,32 @@ public class TestPointQueries extends LuceneTestCase { }); assertEquals("lowerPoint has length=4 but upperPoint has different length=8", e.getMessage()); } + + public void testNextUp() { + assertTrue(Double.compare(0d, DoublePoint.nextUp(-0d)) == 0); + assertTrue(Double.compare(Double.MIN_VALUE, DoublePoint.nextUp(0d)) == 0); + assertTrue(Double.compare(Double.POSITIVE_INFINITY, DoublePoint.nextUp(Double.MAX_VALUE)) == 0); + assertTrue(Double.compare(Double.POSITIVE_INFINITY, DoublePoint.nextUp(Double.POSITIVE_INFINITY)) == 0); + assertTrue(Double.compare(-Double.MAX_VALUE, DoublePoint.nextUp(Double.NEGATIVE_INFINITY)) == 0); + + assertTrue(Float.compare(0f, FloatPoint.nextUp(-0f)) == 0); + assertTrue(Float.compare(Float.MIN_VALUE, FloatPoint.nextUp(0f)) == 0); + assertTrue(Float.compare(Float.POSITIVE_INFINITY, FloatPoint.nextUp(Float.MAX_VALUE)) == 0); + assertTrue(Float.compare(Float.POSITIVE_INFINITY, FloatPoint.nextUp(Float.POSITIVE_INFINITY)) == 0); + assertTrue(Float.compare(-Float.MAX_VALUE, FloatPoint.nextUp(Float.NEGATIVE_INFINITY)) == 0); + } + + public void testNextDown() { + assertTrue(Double.compare(-0d, DoublePoint.nextDown(0d)) == 0); + assertTrue(Double.compare(-Double.MIN_VALUE, DoublePoint.nextDown(-0d)) == 0); + assertTrue(Double.compare(Double.NEGATIVE_INFINITY, DoublePoint.nextDown(-Double.MAX_VALUE)) == 0); + assertTrue(Double.compare(Double.NEGATIVE_INFINITY, DoublePoint.nextDown(Double.NEGATIVE_INFINITY)) == 0); + assertTrue(Double.compare(Double.MAX_VALUE, DoublePoint.nextDown(Double.POSITIVE_INFINITY)) == 0); + + assertTrue(Float.compare(-0f, FloatPoint.nextDown(0f)) == 0); + assertTrue(Float.compare(-Float.MIN_VALUE, FloatPoint.nextDown(-0f)) == 0); + assertTrue(Float.compare(Float.NEGATIVE_INFINITY, FloatPoint.nextDown(-Float.MAX_VALUE)) == 0); + assertTrue(Float.compare(Float.NEGATIVE_INFINITY, FloatPoint.nextDown(Float.NEGATIVE_INFINITY)) == 0); + assertTrue(Float.compare(Float.MAX_VALUE, FloatPoint.nextDown(Float.POSITIVE_INFINITY)) == 0); + } } diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestHalfFloatPoint.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestHalfFloatPoint.java index a24d99279b3..0bcb3f8b844 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/document/TestHalfFloatPoint.java +++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestHalfFloatPoint.java @@ -229,6 +229,7 @@ public class TestHalfFloatPoint extends LuceneTestCase { // values that cannot be exactly represented as a half float assertEquals(HalfFloatPoint.nextUp(0f), HalfFloatPoint.nextUp(Float.MIN_VALUE), 0f); assertEquals(Float.floatToIntBits(-0f), Float.floatToIntBits(HalfFloatPoint.nextUp(-Float.MIN_VALUE))); + assertEquals(Float.floatToIntBits(0f), Float.floatToIntBits(HalfFloatPoint.nextUp(-0f))); } public void testNextDown() { @@ -239,5 +240,6 @@ public class TestHalfFloatPoint extends LuceneTestCase { // values that cannot be exactly represented as a half float assertEquals(Float.floatToIntBits(0f), Float.floatToIntBits(HalfFloatPoint.nextDown(Float.MIN_VALUE))); assertEquals(HalfFloatPoint.nextDown(-0f), HalfFloatPoint.nextDown(-Float.MIN_VALUE), 0f); + assertEquals(Float.floatToIntBits(-0f), Float.floatToIntBits(HalfFloatPoint.nextDown(+0f))); } } From 0d3c64ab099a1ddd168971e05e6199894fe5dfe7 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 21 Dec 2016 19:34:19 +0100 Subject: [PATCH 12/83] LUCENE-7600: Simplify DocIdMerger. --- .../lucene/codecs/DocValuesConsumer.java | 10 +- .../apache/lucene/codecs/NormsConsumer.java | 2 +- .../lucene/codecs/StoredFieldsWriter.java | 2 +- .../lucene/codecs/TermVectorsWriter.java | 2 +- .../CompressingStoredFieldsWriter.java | 2 +- .../org/apache/lucene/index/DocIDMerger.java | 225 ++++++++++-------- .../index/MappingMultiPostingsEnum.java | 2 +- .../apache/lucene/index/TestDocIDMerger.java | 4 +- 8 files changed, 136 insertions(+), 113 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java index ba2f2aa501c..3d06b51a4dd 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java @@ -198,7 +198,7 @@ public abstract class DocValuesConsumer implements Closeable { } } - final DocIDMerger docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort); + final DocIDMerger docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); final long finalCost = cost; @@ -296,7 +296,7 @@ public abstract class DocValuesConsumer implements Closeable { } } - final DocIDMerger docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort); + final DocIDMerger docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); final long finalCost = cost; return new BinaryDocValues() { @@ -397,7 +397,7 @@ public abstract class DocValuesConsumer implements Closeable { final long finalCost = cost; - final DocIDMerger docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort); + final DocIDMerger docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); return new SortedNumericDocValues() { @@ -555,7 +555,7 @@ public abstract class DocValuesConsumer implements Closeable { final long finalCost = cost; - final DocIDMerger docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort); + final DocIDMerger docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); return new SortedDocValues() { private int docID = -1; @@ -721,7 +721,7 @@ public abstract class DocValuesConsumer implements Closeable { subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i))); } - final DocIDMerger docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort); + final DocIDMerger docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); final long finalCost = cost; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java index 7ad7a7c52a4..c21fc016bc9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java @@ -130,7 +130,7 @@ public abstract class NormsConsumer implements Closeable { } } - final DocIDMerger docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort); + final DocIDMerger docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); return new NumericDocValues() { private int docID = -1; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java index 80a9c498d55..0540f4f7ce8 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java @@ -117,7 +117,7 @@ public abstract class StoredFieldsWriter implements Closeable { subs.add(new StoredFieldsMergeSub(new MergeVisitor(mergeState, i), mergeState.docMaps[i], storedFieldsReader, mergeState.maxDocs[i])); } - final DocIDMerger docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort); + final DocIDMerger docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); int docCount = 0; while (true) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java index c8ad9f6099f..b84065af66a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java @@ -205,7 +205,7 @@ public abstract class TermVectorsWriter implements Closeable { subs.add(new TermVectorsMergeSub(mergeState.docMaps[i], reader, mergeState.maxDocs[i])); } - final DocIDMerger docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort); + final DocIDMerger docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); int docCount = 0; while (true) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java index c775e1cbf11..7ab20afd9ef 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java @@ -514,7 +514,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter { } final DocIDMerger docIDMerger = - new DocIDMerger<>(subs, true); + DocIDMerger.of(subs, true); while (true) { CompressingStoredFieldsMergeSub sub = docIDMerger.next(); if (sub == null) { diff --git a/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java index cd4726b23b7..dd0c9b715a2 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java @@ -29,17 +29,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; * concatenated (unsorted) order, or by a specified index-time sort, skipping * deleted documents and remapping non-deleted documents. */ -public class DocIDMerger { - - private final List subs; - - // Used when indexSort != null: - private final PriorityQueue queue; - private boolean first; - - // Used when indexIsSorted - private T current; - private int nextIndex; +public abstract class DocIDMerger { /** Represents one sub-reader being merged */ public static abstract class Sub { @@ -58,104 +48,56 @@ public class DocIDMerger { } /** Construct this from the provided subs, specifying the maximum sub count */ - public DocIDMerger(List subs, int maxCount, boolean indexIsSorted) throws IOException { - this.subs = subs; - + public static DocIDMerger of(List subs, int maxCount, boolean indexIsSorted) throws IOException { if (indexIsSorted && maxCount > 1) { - queue = new PriorityQueue(maxCount) { - @Override - protected boolean lessThan(Sub a, Sub b) { - assert a.mappedDocID != b.mappedDocID; - return a.mappedDocID < b.mappedDocID; - } - }; + return new SortedDocIDMerger<>(subs, maxCount); } else { - // We simply concatentate - queue = null; + return new SequentialDocIDMerger<>(subs); } - - reset(); } /** Construct this from the provided subs */ - public DocIDMerger(List subs, boolean indexIsSorted) throws IOException { - this(subs, subs.size(), indexIsSorted); + public static DocIDMerger of(List subs, boolean indexIsSorted) throws IOException { + return of(subs, subs.size(), indexIsSorted); } /** Reuse API, currently only used by postings during merge */ - public void reset() throws IOException { - if (queue != null) { - // caller may not have fully consumed the queue: - queue.clear(); - for(T sub : subs) { - while (true) { - int docID = sub.nextDoc(); - if (docID == NO_MORE_DOCS) { - // all docs in this sub were deleted; do not add it to the queue! - break; - } - - int mappedDocID = sub.docMap.get(docID); - if (mappedDocID == -1) { - // doc was deleted - continue; - } else { - sub.mappedDocID = mappedDocID; - queue.add(sub); - break; - } - } - } - first = true; - } else if (subs.size() > 0) { - current = subs.get(0); - nextIndex = 1; - } else { - current = null; - nextIndex = 0; - } - } + public abstract void reset() throws IOException; /** Returns null when done */ - public T next() throws IOException { - // Loop until we find a non-deleted document - if (queue != null) { - T top = queue.top(); - if (top == null) { + public abstract T next() throws IOException; + + private DocIDMerger() {} + + private static class SequentialDocIDMerger extends DocIDMerger { + + private final List subs; + private T current; + private int nextIndex; + + private SequentialDocIDMerger(List subs) throws IOException { + this.subs = subs; + reset(); + } + + @Override + public void reset() throws IOException { + if (subs.size() > 0) { + current = subs.get(0); + nextIndex = 1; + } else { + current = null; + nextIndex = 0; + } + } + + @Override + public T next() throws IOException { + if (current == null) { // NOTE: it's annoying that caller is allowed to call us again even after we returned null before return null; } - - if (first == false) { - while (true) { - int docID = top.nextDoc(); - if (docID == NO_MORE_DOCS) { - queue.pop(); - top = queue.top(); - break; - } - int mappedDocID = top.docMap.get(docID); - if (mappedDocID == -1) { - // doc was deleted - continue; - } else { - top.mappedDocID = mappedDocID; - top = queue.updateTop(); - break; - } - } - } - - first = false; - - return top; - - } else { while (true) { - if (current == null) { - // NOTE: it's annoying that caller is allowed to call us again even after we returned null before - return null; - } int docID = current.nextDoc(); if (docID == NO_MORE_DOCS) { if (nextIndex == subs.size()) { @@ -166,15 +108,96 @@ public class DocIDMerger { nextIndex++; continue; } - int mappedDocID = current.docMap.get(docID); - if (mappedDocID == -1) { - // doc is deleted - continue; - } - current.mappedDocID = mappedDocID; - return current; + int mappedDocID = current.docMap.get(docID); + if (mappedDocID != -1) { + current.mappedDocID = mappedDocID; + return current; + } } } + } + + private static class SortedDocIDMerger extends DocIDMerger { + + private final List subs; + private final PriorityQueue queue; + + private SortedDocIDMerger(List subs, int maxCount) throws IOException { + this.subs = subs; + queue = new PriorityQueue(maxCount) { + @Override + protected boolean lessThan(Sub a, Sub b) { + assert a.mappedDocID != b.mappedDocID; + return a.mappedDocID < b.mappedDocID; + } + }; + reset(); + } + + @Override + public void reset() throws IOException { + // caller may not have fully consumed the queue: + queue.clear(); + boolean first = true; + for(T sub : subs) { + if (first) { + // by setting mappedDocID = -1, this entry is guaranteed to be the top of the queue + // so the first call to next() will advance it + sub.mappedDocID = -1; + first = false; + } else { + int mappedDocID; + while (true) { + int docID = sub.nextDoc(); + if (docID == NO_MORE_DOCS) { + mappedDocID = NO_MORE_DOCS; + break; + } + mappedDocID = sub.docMap.get(docID); + if (mappedDocID != -1) { + break; + } + } + if (mappedDocID == NO_MORE_DOCS) { + // all docs in this sub were deleted; do not add it to the queue! + continue; + } + sub.mappedDocID = mappedDocID; + } + queue.add(sub); + } + } + + @Override + public T next() throws IOException { + T top = queue.top(); + if (top == null) { + // NOTE: it's annoying that caller is allowed to call us again even after we returned null before + return null; + } + + while (true) { + int docID = top.nextDoc(); + if (docID == NO_MORE_DOCS) { + queue.pop(); + top = queue.top(); + break; + } + int mappedDocID = top.docMap.get(docID); + if (mappedDocID == -1) { + // doc was deleted + continue; + } else { + top.mappedDocID = mappedDocID; + top = queue.updateTop(); + break; + } + } + + return top; + } + } + } diff --git a/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java index d93c7715e93..6672d648151 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java @@ -62,7 +62,7 @@ final class MappingMultiPostingsEnum extends PostingsEnum { for(int i=0;i(subs, allSubs.length, mergeState.needsIndexSort); + this.docIDMerger = DocIDMerger.of(subs, allSubs.length, mergeState.needsIndexSort); } MappingMultiPostingsEnum reset(MultiPostingsEnum postingsEnum) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java b/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java index d9576464fc9..5e19bbbe698 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java @@ -70,7 +70,7 @@ public class TestDocIDMerger extends LuceneTestCase { valueStart += maxDoc; } - DocIDMerger merger = new DocIDMerger<>(subs, false); + DocIDMerger merger = DocIDMerger.of(subs, false); int count = 0; while (true) { @@ -175,7 +175,7 @@ public class TestDocIDMerger extends LuceneTestCase { }, docMap.length, i)); } - DocIDMerger merger = new DocIDMerger<>(subs, true); + DocIDMerger merger = DocIDMerger.of(subs, true); int count = 0; while (true) { From de0a046b21b7c8698fff0db685eb287e24087b22 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Wed, 21 Dec 2016 19:21:50 +0000 Subject: [PATCH 13/83] SOLR-9758: refactor preferLocalShards implementation --- solr/CHANGES.txt | 2 + .../handler/component/HttpShardHandler.java | 48 +---------- .../component/HttpShardHandlerFactory.java | 79 +++++++++++++++++++ .../handler/component/ResponseBuilder.java | 1 - .../solr/handler/component/SearchHandler.java | 2 +- .../solr/handler/component/ShardHandler.java | 5 +- .../solr/core/MockShardHandlerFactory.java | 2 +- .../solrj/impl/CloudSolrClientTest.java | 2 +- .../solr/common/params/CommonParamsTest.java | 2 + .../TrackingShardHandlerFactory.java | 2 +- 10 files changed, 92 insertions(+), 53 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index c32af127f42..d17afce4262 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -328,6 +328,8 @@ Other Changes * SOLR-9878: fixing lazy logic for retrieving ReversedWildcardFilterFactory in SolrQueryParserBase (Mikhail Khludnev) +* SOLR-9758: refactor preferLocalShards implementation (Christine Poerschke) + ================== 6.3.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java index 6a55a0d917b..40e17a94052 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java @@ -15,15 +15,14 @@ * limitations under the License. */ package org.apache.solr.handler.component; + import java.lang.invoke.MethodHandles; import java.net.ConnectException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.ListIterator; import java.util.Map; import java.util.Set; import java.util.concurrent.Callable; @@ -114,51 +113,19 @@ public class HttpShardHandler extends ShardHandler { // Not thread safe... don't use in Callable. // Don't modify the returned URL list. - private List getURLs(String shard, String preferredHostAddress) { + private List getURLs(String shard) { List urls = shardToURLs.get(shard); if (urls == null) { urls = httpShardHandlerFactory.buildURLList(shard); - if (preferredHostAddress != null && urls.size() > 1) { - preferCurrentHostForDistributedReq(preferredHostAddress, urls); - } shardToURLs.put(shard, urls); } return urls; } - /** - * A distributed request is made via {@link LBHttpSolrClient} to the first live server in the URL list. - * This means it is just as likely to choose current host as any of the other hosts. - * This function makes sure that the cores of current host are always put first in the URL list. - * If all nodes prefer local-cores then a bad/heavily-loaded node will receive less requests from healthy nodes. - * This will help prevent a distributed deadlock or timeouts in all the healthy nodes due to one bad node. - */ - private void preferCurrentHostForDistributedReq(final String currentHostAddress, final List urls) { - if (log.isDebugEnabled()) - log.debug("Trying to prefer local shard on {} among the urls: {}", - currentHostAddress, Arrays.toString(urls.toArray())); - - ListIterator itr = urls.listIterator(); - while (itr.hasNext()) { - String url = itr.next(); - if (url.startsWith(currentHostAddress)) { - // move current URL to the fore-front - itr.remove(); - urls.add(0, url); - - if (log.isDebugEnabled()) - log.debug("Applied local shard preference for urls: {}", - Arrays.toString(urls.toArray())); - - break; - } - } - } - @Override - public void submit(final ShardRequest sreq, final String shard, final ModifiableSolrParams params, String preferredHostAddress) { + public void submit(final ShardRequest sreq, final String shard, final ModifiableSolrParams params) { // do this outside of the callable for thread safety reasons - final List urls = getURLs(shard, preferredHostAddress); + final List urls = getURLs(shard); Callable task = () -> { @@ -314,13 +281,6 @@ public class HttpShardHandler extends ShardHandler { CloudDescriptor cloudDescriptor = coreDescriptor.getCloudDescriptor(); ZkController zkController = coreDescriptor.getCoreContainer().getZkController(); - if (params.getBool(CommonParams.PREFER_LOCAL_SHARDS, false)) { - rb.preferredHostAddress = (zkController != null) ? zkController.getBaseUrl() : null; - if (rb.preferredHostAddress == null) { - log.warn("Couldn't determine current host address to prefer local shards"); - } - } - final ReplicaListTransformer replicaListTransformer = httpShardHandlerFactory.getReplicaListTransformer(req); if (shards != null) { diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java index e1b743a88fa..e910443ea47 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java +++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java @@ -24,11 +24,16 @@ import org.apache.solr.client.solrj.impl.HttpClientUtil; import org.apache.solr.client.solrj.impl.LBHttpSolrClient; import org.apache.solr.client.solrj.impl.LBHttpSolrClient.Builder; import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.cloud.ZkController; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.StrUtils; import org.apache.solr.common.util.URLUtil; +import org.apache.solr.core.CoreDescriptor; import org.apache.solr.core.PluginInfo; import org.apache.solr.update.UpdateShardHandlerConfig; import org.apache.solr.request.SolrQueryRequest; @@ -38,6 +43,8 @@ import org.slf4j.LoggerFactory; import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.util.Arrays; +import java.util.Comparator; import java.util.List; import java.util.Random; import java.util.concurrent.ArrayBlockingQueue; @@ -245,8 +252,80 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org. return urls; } + /** + * A distributed request is made via {@link LBHttpSolrClient} to the first live server in the URL list. + * This means it is just as likely to choose current host as any of the other hosts. + * This function makes sure that the cores of current host are always put first in the URL list. + * If all nodes prefer local-cores then a bad/heavily-loaded node will receive less requests from healthy nodes. + * This will help prevent a distributed deadlock or timeouts in all the healthy nodes due to one bad node. + */ + private class IsOnPreferredHostComparator implements Comparator { + final private String preferredHostAddress; + public IsOnPreferredHostComparator(String preferredHostAddress) { + this.preferredHostAddress = preferredHostAddress; + } + @Override + public int compare(Object left, Object right) { + final boolean lhs = hasPrefix(objectToString(left)); + final boolean rhs = hasPrefix(objectToString(right)); + if (lhs != rhs) { + if (lhs) { + return -1; + } else { + return +1; + } + } else { + return 0; + } + } + private String objectToString(Object o) { + final String s; + if (o instanceof String) { + s = (String)o; + } + else if (o instanceof Replica) { + s = ((Replica)o).getCoreUrl(); + } else { + s = null; + } + return s; + } + private boolean hasPrefix(String s) { + return s != null && s.startsWith(preferredHostAddress); + } + } ReplicaListTransformer getReplicaListTransformer(final SolrQueryRequest req) { + final SolrParams params = req.getParams(); + + if (params.getBool(CommonParams.PREFER_LOCAL_SHARDS, false)) { + final CoreDescriptor coreDescriptor = req.getCore().getCoreDescriptor(); + final ZkController zkController = coreDescriptor.getCoreContainer().getZkController(); + final String preferredHostAddress = (zkController != null) ? zkController.getBaseUrl() : null; + if (preferredHostAddress == null) { + log.warn("Couldn't determine current host address to prefer local shards"); + } else { + return new ShufflingReplicaListTransformer(r) { + @Override + public void transform(List choices) + { + if (choices.size() > 1) { + super.transform(choices); + if (log.isDebugEnabled()) { + log.debug("Trying to prefer local shard on {} among the choices: {}", + preferredHostAddress, Arrays.toString(choices.toArray())); + } + choices.sort(new IsOnPreferredHostComparator(preferredHostAddress)); + if (log.isDebugEnabled()) { + log.debug("Applied local shard preference for choices: {}", + Arrays.toString(choices.toArray())); + } + } + } + }; + } + } + return shufflingReplicaListTransformer; } diff --git a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java index 6a378056814..e5eaff90d42 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java @@ -136,7 +136,6 @@ public class ResponseBuilder public int shards_start = -1; public List outgoing; // requests to be sent public List finished; // requests that have received responses from all shards - public String preferredHostAddress = null; public String shortCircuitedURL; public int getShardNum(String shard) { diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java index 166fbc6e580..ba581d43131 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java @@ -383,7 +383,7 @@ public class SearchHandler extends RequestHandlerBase implements SolrCoreAware , params.set(CommonParams.QT, reqPath); } // else if path is /select, then the qt gets passed thru if set } - shardHandler1.submit(sreq, shard, params, rb.preferredHostAddress); + shardHandler1.submit(sreq, shard, params); } } diff --git a/solr/core/src/java/org/apache/solr/handler/component/ShardHandler.java b/solr/core/src/java/org/apache/solr/handler/component/ShardHandler.java index 98294247c12..4c8980659fd 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/ShardHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ShardHandler.java @@ -19,10 +19,7 @@ import org.apache.solr.common.params.ModifiableSolrParams; public abstract class ShardHandler { public abstract void prepDistributed(ResponseBuilder rb); - public void submit(ShardRequest sreq, String shard, ModifiableSolrParams params) { - submit(sreq, shard, params, null); - } - public abstract void submit(ShardRequest sreq, String shard, ModifiableSolrParams params, String preferredHostAddress); + public abstract void submit(ShardRequest sreq, String shard, ModifiableSolrParams params); public abstract ShardResponse takeCompletedIncludingErrors(); public abstract ShardResponse takeCompletedOrError(); public abstract void cancelAll(); diff --git a/solr/core/src/test/org/apache/solr/core/MockShardHandlerFactory.java b/solr/core/src/test/org/apache/solr/core/MockShardHandlerFactory.java index 3f4bb1de3f1..98266b2ae84 100644 --- a/solr/core/src/test/org/apache/solr/core/MockShardHandlerFactory.java +++ b/solr/core/src/test/org/apache/solr/core/MockShardHandlerFactory.java @@ -42,7 +42,7 @@ public class MockShardHandlerFactory extends ShardHandlerFactory implements Plug @Override public void submit(ShardRequest sreq, String shard, - ModifiableSolrParams params, String preferredHostAddress) {} + ModifiableSolrParams params) {} @Override public ShardResponse takeCompletedIncludingErrors() { diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientTest.java index 5e8f6ce2915..541ffedaa41 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientTest.java @@ -349,7 +349,7 @@ public class CloudSolrClientTest extends SolrCloudTestCase { SolrQuery qRequest = new SolrQuery("*:*"); ModifiableSolrParams qParams = new ModifiableSolrParams(); - qParams.add("preferLocalShards", Boolean.toString(preferLocalShards)); + qParams.add(CommonParams.PREFER_LOCAL_SHARDS, Boolean.toString(preferLocalShards)); qParams.add(ShardParams.SHARDS_INFO, "true"); qRequest.add(qParams); diff --git a/solr/solrj/src/test/org/apache/solr/common/params/CommonParamsTest.java b/solr/solrj/src/test/org/apache/solr/common/params/CommonParamsTest.java index 73643d7a86e..65f555b16ea 100755 --- a/solr/solrj/src/test/org/apache/solr/common/params/CommonParamsTest.java +++ b/solr/solrj/src/test/org/apache/solr/common/params/CommonParamsTest.java @@ -31,4 +31,6 @@ public class CommonParamsTest extends LuceneTestCase public void testRows() { assertEquals("rows", CommonParams.ROWS); } public void testRowsDefault() { assertEquals(10, CommonParams.ROWS_DEFAULT); } + + public void testPreferLocalShards() { assertEquals("preferLocalShards", CommonParams.PREFER_LOCAL_SHARDS); } } diff --git a/solr/test-framework/src/java/org/apache/solr/handler/component/TrackingShardHandlerFactory.java b/solr/test-framework/src/java/org/apache/solr/handler/component/TrackingShardHandlerFactory.java index 95a3179bb45..8b440a2e00e 100644 --- a/solr/test-framework/src/java/org/apache/solr/handler/component/TrackingShardHandlerFactory.java +++ b/solr/test-framework/src/java/org/apache/solr/handler/component/TrackingShardHandlerFactory.java @@ -91,7 +91,7 @@ public class TrackingShardHandlerFactory extends HttpShardHandlerFactory { } @Override - public void submit(ShardRequest sreq, String shard, ModifiableSolrParams params, String preferredHostAddress) { + public void submit(ShardRequest sreq, String shard, ModifiableSolrParams params) { synchronized (TrackingShardHandlerFactory.this) { if (isTracking()) { queue.offer(new ShardRequestAndParams(sreq, shard, params)); From 6565a5cb2c1d07ce41a184917ca0d2e74511d622 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Wed, 21 Dec 2016 19:44:41 +0000 Subject: [PATCH 14/83] Remove unusedPrivateMember in ChaosMonkey. --- .../src/java/org/apache/solr/cloud/ChaosMonkey.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java b/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java index a803824349d..5cae35623d9 100644 --- a/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java +++ b/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java @@ -27,7 +27,6 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.util.LuceneTestCase; -import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.embedded.JettySolrRunner; import org.apache.solr.cloud.AbstractFullDistribZkTestBase.CloudJettyRunner; import org.apache.solr.common.cloud.Replica; @@ -74,7 +73,6 @@ public class ChaosMonkey { private AtomicInteger expires = new AtomicInteger(); private AtomicInteger connloss = new AtomicInteger(); - private Map> shardToClient; private boolean expireSessions; private boolean causeConnectionLoss; private boolean aggressivelyKillLeaders; From d5e87898b1842b0a0792a3b342e9bed76bc6ee62 Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Wed, 21 Dec 2016 23:07:50 +0100 Subject: [PATCH 15/83] LUCENE-6989: Fix some tests that hardcode MMapDirectory (and also the FSDirectory randomizer), to only use MMapDirectory on Windows, if it supports unmapping. Otherwise tests will fail. --- .../lucene/index/Test4GBStoredFields.java | 2 ++ .../apache/lucene/index/TestIndexWriter.java | 3 ++- .../apache/lucene/store/TestDirectory.java | 13 +++++++----- .../lucene/store/TestMmapDirectory.java | 3 +-- .../apache/lucene/store/TestMultiMMap.java | 3 +-- .../org/apache/lucene/util/fst/Test2BFST.java | 2 ++ .../index/BaseStoredFieldsFormatTestCase.java | 2 ++ .../apache/lucene/util/LuceneTestCase.java | 20 ++++++++++++++++--- 8 files changed, 35 insertions(+), 13 deletions(-) diff --git a/lucene/core/src/test/org/apache/lucene/index/Test4GBStoredFields.java b/lucene/core/src/test/org/apache/lucene/index/Test4GBStoredFields.java index 7e173c84522..2242a1e856d 100644 --- a/lucene/core/src/test/org/apache/lucene/index/Test4GBStoredFields.java +++ b/lucene/core/src/test/org/apache/lucene/index/Test4GBStoredFields.java @@ -41,6 +41,8 @@ public class Test4GBStoredFields extends LuceneTestCase { @Nightly public void test() throws Exception { + assumeWorkingMMapOnWindows(); + MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("4GBStoredFields"))); dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index a99576305a1..e4f0ab0d55d 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -1251,8 +1251,9 @@ public class TestIndexWriter extends LuceneTestCase { public void testDeleteUnusedFiles() throws Exception { - assumeFalse("test relies on exact filenames", Codec.getDefault() instanceof SimpleTextCodec); + assumeWorkingMMapOnWindows(); + for(int iter=0;iter<2;iter++) { // relies on windows semantics Path path = createTempDir(); diff --git a/lucene/core/src/test/org/apache/lucene/store/TestDirectory.java b/lucene/core/src/test/org/apache/lucene/store/TestDirectory.java index 88f304b74fa..5e4a5934d34 100644 --- a/lucene/core/src/test/org/apache/lucene/store/TestDirectory.java +++ b/lucene/core/src/test/org/apache/lucene/store/TestDirectory.java @@ -20,6 +20,7 @@ package org.apache.lucene.store; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -37,11 +38,13 @@ public class TestDirectory extends LuceneTestCase { largeBuffer[i] = (byte) i; // automatically loops with modulo } - final FSDirectory[] dirs = new FSDirectory[] { - new SimpleFSDirectory(path), - new NIOFSDirectory(path), - new MMapDirectory(path) - }; + final List dirs0 = new ArrayList<>(); + dirs0.add(new SimpleFSDirectory(path)); + dirs0.add(new NIOFSDirectory(path)); + if (hasWorkingMMapOnWindows()) { + dirs0.add(new MMapDirectory(path)); + } + final FSDirectory[] dirs = dirs0.stream().toArray(FSDirectory[]::new); for (int i=0; i FS_DIRECTORIES = Arrays.asList( "SimpleFSDirectory", "NIOFSDirectory", - "MMapDirectory" + // SimpleFSDirectory as replacement for MMapDirectory if unmapping is not supported on Windows (to make randomization stable): + hasWorkingMMapOnWindows() ? "MMapDirectory" : "SimpleFSDirectory" ); /** All {@link Directory} implementations. */ @@ -469,7 +483,7 @@ public abstract class LuceneTestCase extends Assert { CORE_DIRECTORIES = new ArrayList<>(FS_DIRECTORIES); CORE_DIRECTORIES.add("RAMDirectory"); } - + /** A {@link org.apache.lucene.search.QueryCachingPolicy} that randomly caches. */ public static final QueryCachingPolicy MAYBE_CACHE_POLICY = new QueryCachingPolicy() { @@ -853,7 +867,7 @@ public abstract class LuceneTestCase extends Assert { public static void assumeNoException(String msg, Exception e) { RandomizedTest.assumeNoException(msg, e); } - + /** * Return args as a {@link Set} instance. The order of elements is not * preserved in iterators. From 393e36e1ce0144a412bc8ea78e98a897a0ac77dd Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Thu, 22 Dec 2016 08:17:20 +0300 Subject: [PATCH 16/83] SOLR-9699: fixing exception on core status during concurrent reload --- solr/CHANGES.txt | 2 + .../handler/admin/LukeRequestHandler.java | 18 +++- .../handler/admin/StatsReloadRaceTest.java | 84 ++++++++++++------- 3 files changed, 75 insertions(+), 29 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index d17afce4262..b2ec5efb500 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -279,6 +279,8 @@ Bug Fixes * SOLR-9760: Windows script doesn't need write permission (Alex Crome by Mikhail Khludnev) +* SOLR-9699,SOLR-4668: fix exception from core status in parallel with core reload (Mikhail Khludnev) + Other Changes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index d0dd15252dd..d7dedf1a46d 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -58,6 +58,7 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -577,7 +578,7 @@ public class LukeRequestHandler extends RequestHandlerBase indexInfo.add("version", reader.getVersion()); // TODO? Is this different then: IndexReader.getCurrentVersion( dir )? indexInfo.add("segmentCount", reader.leaves().size()); - indexInfo.add("current", reader.isCurrent() ); + indexInfo.add("current", closeSafe( reader::isCurrent)); indexInfo.add("hasDeletions", reader.hasDeletions() ); indexInfo.add("directory", dir ); IndexCommit indexCommit = reader.getIndexCommit(); @@ -593,6 +594,21 @@ public class LukeRequestHandler extends RequestHandlerBase return indexInfo; } + @FunctionalInterface + interface IOSupplier { + boolean get() throws IOException; + } + + private static Object closeSafe(IOSupplier isCurrent) { + try { + return isCurrent.get(); + }catch(AlreadyClosedException | IOException exception) { + } + return false; + } + + + private static long getFileLength(Directory dir, String filename) { try { return dir.fileLength(filename); diff --git a/solr/core/src/test/org/apache/solr/handler/admin/StatsReloadRaceTest.java b/solr/core/src/test/org/apache/solr/handler/admin/StatsReloadRaceTest.java index 619e7d51e83..7bf493923e5 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/StatsReloadRaceTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/StatsReloadRaceTest.java @@ -18,6 +18,7 @@ package org.apache.solr.handler.admin; import java.util.List; import java.util.Map; +import java.util.Random; import java.util.concurrent.atomic.AtomicInteger; import org.apache.solr.SolrTestCaseJ4; @@ -50,11 +51,13 @@ public class StatsReloadRaceTest extends SolrTestCaseJ4 { @Test public void testParallelReloadAndStats() throws Exception { - for (int i = 0; i < atLeast(2); i++) { + Random random = random(); + + for (int i = 0; i < atLeast(random, 2); i++) { int asyncId = taskNum.incrementAndGet(); - SolrQueryResponse rsp = new SolrQueryResponse(); + h.getCoreContainer().getMultiCoreHandler().handleRequest(req( CommonParams.QT, "/admin/cores", CoreAdminParams.ACTION, @@ -64,36 +67,61 @@ public class StatsReloadRaceTest extends SolrTestCaseJ4 { boolean isCompleted; do { - String stats = h.query(req( - CommonParams.QT, "/admin/mbeans", - "stats", "true")); - - NamedList> actualStats = SolrInfoMBeanHandler.fromXML(stats).get("CORE"); - - for (Map.Entry> tuple : actualStats) { - if (tuple.getKey().contains("earcher")) { // catches "searcher" and "Searcher@345345 blah" - NamedList searcherStats = tuple.getValue(); - @SuppressWarnings("unchecked") - NamedList statsList = (NamedList)searcherStats.get("stats"); - assertEquals("expect to have exactly one indexVersion at "+statsList, 1, statsList.getAll("indexVersion").size()); - assertTrue(statsList.get("indexVersion") instanceof Long); - } + if (random.nextBoolean()) { + requestMbeans(); + } else { + requestCoreStatus(); } - h.getCoreContainer().getMultiCoreHandler().handleRequest(req( - CoreAdminParams.ACTION, - CoreAdminParams.CoreAdminAction.REQUESTSTATUS.toString(), - CoreAdminParams.REQUESTID, "" + asyncId), rsp); - - @SuppressWarnings("unchecked") - List statusLog = rsp.getValues().getAll(CoreAdminAction.STATUS.name()); - - assertFalse("expect status check w/o error, got:" + statusLog, - statusLog.contains(CoreAdminHandler.FAILED)); - - isCompleted = statusLog.contains(CoreAdminHandler.COMPLETED); + isCompleted = checkReloadComlpetion(asyncId); } while (!isCompleted); } } + private void requestCoreStatus() throws Exception { + SolrQueryResponse rsp = new SolrQueryResponse(); + h.getCoreContainer().getMultiCoreHandler().handleRequest(req( + CoreAdminParams.ACTION, + CoreAdminParams.CoreAdminAction.STATUS.toString(), + "core", DEFAULT_TEST_CORENAME), rsp); + assertNull(""+rsp.getException(),rsp.getException()); + + } + + private boolean checkReloadComlpetion(int asyncId) { + boolean isCompleted; + SolrQueryResponse rsp = new SolrQueryResponse(); + h.getCoreContainer().getMultiCoreHandler().handleRequest(req( + CoreAdminParams.ACTION, + CoreAdminParams.CoreAdminAction.REQUESTSTATUS.toString(), + CoreAdminParams.REQUESTID, "" + asyncId), rsp); + + @SuppressWarnings("unchecked") + List statusLog = rsp.getValues().getAll(CoreAdminAction.STATUS.name()); + + assertFalse("expect status check w/o error, got:" + statusLog, + statusLog.contains(CoreAdminHandler.FAILED)); + + isCompleted = statusLog.contains(CoreAdminHandler.COMPLETED); + return isCompleted; + } + + private void requestMbeans() throws Exception { + String stats = h.query(req( + CommonParams.QT, "/admin/mbeans", + "stats", "true")); + + NamedList> actualStats = SolrInfoMBeanHandler.fromXML(stats).get("CORE"); + + for (Map.Entry> tuple : actualStats) { + if (tuple.getKey().contains("earcher")) { // catches "searcher" and "Searcher@345345 blah" + NamedList searcherStats = tuple.getValue(); + @SuppressWarnings("unchecked") + NamedList statsList = (NamedList)searcherStats.get("stats"); + assertEquals("expect to have exactly one indexVersion at "+statsList, 1, statsList.getAll("indexVersion").size()); + assertTrue(statsList.get("indexVersion") instanceof Long); + } + } + } + } From 3366724fa0c4dfdecf568dd99c4aa8e98a9b1330 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Thu, 22 Dec 2016 09:35:46 +0000 Subject: [PATCH 17/83] Remove unused private ValueHolder class in TestGrouping. --- .../org/apache/lucene/search/grouping/TestGrouping.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java index 6e2422cdc97..72c858f752c 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java @@ -1327,13 +1327,4 @@ public class TestGrouping extends LuceneTestCase { return "ShardSearcher(" + ctx.get(0).reader() + ")"; } } - - private static class ValueHolder { - - V value; - - private ValueHolder(V value) { - this.value = value; - } - } } From b73314998ebf2cd67e9c952413c13804b049e267 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Thu, 22 Dec 2016 09:37:09 +0000 Subject: [PATCH 18/83] Remove unused private AnalyzingComparator class from FreeTextSuggester. --- .../suggest/analyzing/FreeTextSuggester.java | 45 ------------------- 1 file changed, 45 deletions(-) diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java index cc4ed4b00bb..3e4845bf16b 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java @@ -54,7 +54,6 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.search.suggest.Lookup; -import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.Directory; @@ -221,50 +220,6 @@ public class FreeTextSuggester extends Lookup implements Accountable { } } - private static class AnalyzingComparator implements Comparator { - - private final ByteArrayDataInput readerA = new ByteArrayDataInput(); - private final ByteArrayDataInput readerB = new ByteArrayDataInput(); - private final BytesRef scratchA = new BytesRef(); - private final BytesRef scratchB = new BytesRef(); - - @Override - public int compare(BytesRef a, BytesRef b) { - readerA.reset(a.bytes, a.offset, a.length); - readerB.reset(b.bytes, b.offset, b.length); - - // By token: - scratchA.length = readerA.readShort(); - scratchA.bytes = a.bytes; - scratchA.offset = readerA.getPosition(); - - scratchB.bytes = b.bytes; - scratchB.length = readerB.readShort(); - scratchB.offset = readerB.getPosition(); - - int cmp = scratchA.compareTo(scratchB); - if (cmp != 0) { - return cmp; - } - readerA.skipBytes(scratchA.length); - readerB.skipBytes(scratchB.length); - - // By length (smaller surface forms sorted first): - cmp = a.length - b.length; - if (cmp != 0) { - return cmp; - } - - // By surface form: - scratchA.offset = readerA.getPosition(); - scratchA.length = a.length - scratchA.offset; - scratchB.offset = readerB.getPosition(); - scratchB.length = b.length - scratchB.offset; - - return scratchA.compareTo(scratchB); - } - } - private Analyzer addShingles(final Analyzer other) { if (grams == 1) { return other; From 30a52277dea16f4d15c40a0ec17f69076242496d Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 22 Dec 2016 19:49:12 +0100 Subject: [PATCH 19/83] LUCENE-7601: Make DocIdMerger.next() unspecified when exhausted. --- .../java/org/apache/lucene/index/DocIDMerger.java | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java index dd0c9b715a2..1ef04c25cba 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java @@ -64,7 +64,9 @@ public abstract class DocIDMerger { /** Reuse API, currently only used by postings during merge */ public abstract void reset() throws IOException; - /** Returns null when done */ + /** Returns null when done. + * NOTE: after the iterator has exhausted you should not call this + * method, as it may result in unpredicted behavior. */ public abstract T next() throws IOException; private DocIDMerger() {} @@ -93,10 +95,6 @@ public abstract class DocIDMerger { @Override public T next() throws IOException { - if (current == null) { - // NOTE: it's annoying that caller is allowed to call us again even after we returned null before - return null; - } while (true) { int docID = current.nextDoc(); if (docID == NO_MORE_DOCS) { @@ -173,10 +171,6 @@ public abstract class DocIDMerger { @Override public T next() throws IOException { T top = queue.top(); - if (top == null) { - // NOTE: it's annoying that caller is allowed to call us again even after we returned null before - return null; - } while (true) { int docID = top.nextDoc(); From c0467bb929133605fca2bc63fe1ebba758332d41 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Thu, 22 Dec 2016 15:39:17 -0500 Subject: [PATCH 20/83] LUCENE-6664: add SynonymGraphFilter for correct multi-token synonym handling --- lucene/CHANGES.txt | 8 + .../analysis/synonym/FlattenGraphFilter.java | 424 ++++ .../synonym/FlattenGraphFilterFactory.java | 44 + .../analysis/synonym/SynonymFilter.java | 4 + .../synonym/SynonymFilterFactory.java | 4 + .../analysis/synonym/SynonymGraphFilter.java | 586 +++++ .../synonym/SynonymGraphFilterFactory.java | 204 ++ .../lucene/analysis/synonym/SynonymMap.java | 7 +- .../lucene/analysis/util/CharTokenizer.java | 6 +- ...he.lucene.analysis.util.TokenFilterFactory | 2 + .../TestWordDelimiterFilter.java | 46 +- .../synonym/TestFlattenGraphFilter.java | 284 +++ .../synonym/TestSynonymGraphFilter.java | 1956 +++++++++++++++++ .../lucene/util/automaton/Automaton.java | 5 +- .../lucene/util/automaton/Operations.java | 6 +- .../lucene/util/automaton/StatePair.java | 4 +- .../analysis/BaseTokenStreamTestCase.java | 37 +- .../apache/lucene/analysis/MockTokenizer.java | 3 +- 18 files changed, 3594 insertions(+), 36 deletions(-) create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FlattenGraphFilter.java create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FlattenGraphFilterFactory.java create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilterFactory.java create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestFlattenGraphFilter.java create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 912974da3fc..0099f9771d5 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -78,6 +78,14 @@ New features make it simpler to execute drill down when drill sideways counts are not needed (Emmanuel Keller via Mike McCandless) +* LUCENE-6664: A new SynonymGraphFilter outputs a correct graph + structure for multi-token synonyms, separating out a + FlattenGraphFilter that is hardwired into the current + SynonymFilter. This finally makes it possible to implement + correct multi-token synonyms at search time. See + http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html + for details. (Mike McCandless) + Bug Fixes * LUCENE-7547: JapaneseTokenizerFactory was failing to close the diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FlattenGraphFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FlattenGraphFilter.java new file mode 100644 index 00000000000..7ede190b61d --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FlattenGraphFilter.java @@ -0,0 +1,424 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.analysis.synonym; + +/** + * This filter "casts" token graphs down into a "flat" form, + * for indexing. This is an inherently lossy process: nodes (positions) + * along side paths are forcefully merged. + * + *

In general this means the output graph will accept token sequences + * that the input graph did not accept, and will also fail to accept + * token sequences that the input graph did accept. + * + *

This is only necessary at indexing time because Lucene cannot yet index + * an arbitrary token graph. At search time there are better options, e.g. + * the experimental TermAutomatonQuery in sandbox. + * + * @lucene.experimental + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.RollingBuffer; + +/** + * Converts an incoming graph token stream, such as one from + * {@link SynonymGraphFilter}, into a flat form so that + * all nodes form a single linear chain with no side paths. Every + * path through the graph touches every node. + * + *

If the graph was not already flat to start, this + * is likely a lossy process, i.e. it will often cause the + * graph to accept token sequences it should not, and to + * reject token sequences it should not. + * + *

However, when applying synonyms during indexing, this + * is necessary because Lucene already does not index a graph + * and so the indexing process is already lossy + * (it ignores the {@link PositionLengthAttribute}). + * + * @lucene.experimental + */ +public final class FlattenGraphFilter extends TokenFilter { + + /** Holds all tokens leaving a given input position. */ + private final static class InputNode implements RollingBuffer.Resettable { + private final List tokens = new ArrayList<>(); + + /** Our input node, or -1 if we haven't been assigned yet */ + int node = -1; + + /** Maximum to input node for all tokens leaving here; we use this + * to know when we can freeze. */ + int maxToNode = -1; + + /** Where we currently map to; this changes (can only + * increase as we see more input tokens), until we are finished + * with this position. */ + int outputNode = -1; + + /** Which token (index into {@link #tokens}) we will next output. */ + int nextOut; + + @Override + public void reset() { + tokens.clear(); + node = -1; + outputNode = -1; + maxToNode = -1; + nextOut = 0; + } + } + + /** Gathers up merged input positions into a single output position, + * only for the current "frontier" of nodes we've seen but can't yet + * output because they are not frozen. */ + private final static class OutputNode implements RollingBuffer.Resettable { + private final List inputNodes = new ArrayList<>(); + + /** Node ID for this output, or -1 if we haven't been assigned yet. */ + int node = -1; + + /** Which input node (index into {@link #inputNodes}) we will next output. */ + int nextOut; + + /** Start offset of tokens leaving this node. */ + int startOffset = -1; + + /** End offset of tokens arriving to this node. */ + int endOffset = -1; + + @Override + public void reset() { + inputNodes.clear(); + node = -1; + nextOut = 0; + startOffset = -1; + endOffset = -1; + } + } + + private final RollingBuffer inputNodes = new RollingBuffer() { + @Override + protected InputNode newInstance() { + return new InputNode(); + } + }; + + private final RollingBuffer outputNodes = new RollingBuffer() { + @Override + protected OutputNode newInstance() { + return new OutputNode(); + } + }; + + private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); + private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + + /** Which input node the last seen token leaves from */ + private int inputFrom; + + /** We are currently releasing tokens leaving from this output node */ + private int outputFrom; + + // for debugging: + //private int retOutputFrom; + + private boolean done; + + private int lastOutputFrom; + + private int finalOffset; + + private int finalPosInc; + + private int maxLookaheadUsed; + + private int lastStartOffset; + + public FlattenGraphFilter(TokenStream in) { + super(in); + } + + private boolean releaseBufferedToken() { + + // We only need the while loop (retry) if we have a hole (an output node that has no tokens leaving): + while (outputFrom < outputNodes.getMaxPos()) { + OutputNode output = outputNodes.get(outputFrom); + if (output.inputNodes.isEmpty()) { + // No tokens arrived to this node, which happens for the first node + // after a hole: + //System.out.println(" skip empty outputFrom=" + outputFrom); + outputFrom++; + continue; + } + + int maxToNode = -1; + for(int inputNodeID : output.inputNodes) { + InputNode inputNode = inputNodes.get(inputNodeID); + assert inputNode.outputNode == outputFrom; + maxToNode = Math.max(maxToNode, inputNode.maxToNode); + } + //System.out.println(" release maxToNode=" + maxToNode + " vs inputFrom=" + inputFrom); + + // TODO: we could shrink the frontier here somewhat if we + // always output posLen=1 as part of our "sausagizing": + if (maxToNode <= inputFrom || done) { + //System.out.println(" output node merged these inputs: " + output.inputNodes); + // These tokens are now frozen + assert output.nextOut < output.inputNodes.size(): "output.nextOut=" + output.nextOut + " vs output.inputNodes.size()=" + output.inputNodes.size(); + InputNode inputNode = inputNodes.get(output.inputNodes.get(output.nextOut)); + if (done && inputNode.tokens.size() == 0 && outputFrom >= outputNodes.getMaxPos()) { + return false; + } + if (inputNode.tokens.size() == 0) { + assert inputNode.nextOut == 0; + assert output.nextOut == 0; + // Hole dest nodes should never be merged since 1) we always + // assign them to a new output position, and 2) since they never + // have arriving tokens they cannot be pushed: + assert output.inputNodes.size() == 1: output.inputNodes.size(); + outputFrom++; + inputNodes.freeBefore(output.inputNodes.get(0)); + outputNodes.freeBefore(outputFrom); + continue; + } + + assert inputNode.nextOut < inputNode.tokens.size(); + + restoreState(inputNode.tokens.get(inputNode.nextOut)); + + // Correct posInc + assert outputFrom >= lastOutputFrom; + posIncAtt.setPositionIncrement(outputFrom - lastOutputFrom); + int toInputNodeID = inputNode.node + posLenAtt.getPositionLength(); + InputNode toInputNode = inputNodes.get(toInputNodeID); + + // Correct posLen + assert toInputNode.outputNode > outputFrom; + posLenAtt.setPositionLength(toInputNode.outputNode - outputFrom); + lastOutputFrom = outputFrom; + inputNode.nextOut++; + //System.out.println(" ret " + this); + + OutputNode outputEndNode = outputNodes.get(toInputNode.outputNode); + + // Correct offsets + + // This is a bit messy; we must do this so offset don't go backwards, + // which would otherwise happen if the replacement has more tokens + // than the input: + int startOffset = Math.max(lastStartOffset, output.startOffset); + offsetAtt.setOffset(startOffset, outputEndNode.endOffset); + lastStartOffset = startOffset; + + if (inputNode.nextOut == inputNode.tokens.size()) { + output.nextOut++; + if (output.nextOut == output.inputNodes.size()) { + outputFrom++; + inputNodes.freeBefore(output.inputNodes.get(0)); + outputNodes.freeBefore(outputFrom); + } + } + + return true; + } else { + return false; + } + } + + //System.out.println(" break false"); + return false; + } + + @Override + public boolean incrementToken() throws IOException { + //System.out.println("\nF.increment inputFrom=" + inputFrom + " outputFrom=" + outputFrom); + + while (true) { + if (releaseBufferedToken()) { + //retOutputFrom += posIncAtt.getPositionIncrement(); + //System.out.println(" return buffered: " + termAtt + " " + retOutputFrom + "-" + (retOutputFrom + posLenAtt.getPositionLength())); + //printStates(); + return true; + } else if (done) { + //System.out.println(" done, return false"); + return false; + } + + if (input.incrementToken()) { + // Input node this token leaves from: + inputFrom += posIncAtt.getPositionIncrement(); + + int startOffset = offsetAtt.startOffset(); + int endOffset = offsetAtt.endOffset(); + + // Input node this token goes to: + int inputTo = inputFrom + posLenAtt.getPositionLength(); + //System.out.println(" input.inc " + termAtt + ": " + inputFrom + "-" + inputTo); + + InputNode src = inputNodes.get(inputFrom); + if (src.node == -1) { + // This means the "from" node of this token was never seen as a "to" node, + // which should only happen if we just crossed a hole. This is a challenging + // case for us because we normally rely on the full dependencies expressed + // by the arcs to assign outgoing node IDs. It would be better if tokens + // were never dropped but instead just marked deleted with a new + // TermDeletedAttribute (boolean valued) ... but until that future, we have + // a hack here to forcefully jump the output node ID: + assert src.outputNode == -1; + src.node = inputFrom; + + src.outputNode = outputNodes.getMaxPos() + 1; + //System.out.println(" hole: force to outputNode=" + src.outputNode); + OutputNode outSrc = outputNodes.get(src.outputNode); + + // Not assigned yet: + assert outSrc.node == -1; + outSrc.node = src.outputNode; + outSrc.inputNodes.add(inputFrom); + outSrc.startOffset = startOffset; + } else { + OutputNode outSrc = outputNodes.get(src.outputNode); + if (outSrc.startOffset == -1 || startOffset > outSrc.startOffset) { + // "shrink wrap" the offsets so the original tokens (with most + // restrictive offsets) win: + outSrc.startOffset = Math.max(startOffset, outSrc.startOffset); + } + } + + // Buffer this token: + src.tokens.add(captureState()); + src.maxToNode = Math.max(src.maxToNode, inputTo); + maxLookaheadUsed = Math.max(maxLookaheadUsed, inputNodes.getBufferSize()); + + InputNode dest = inputNodes.get(inputTo); + if (dest.node == -1) { + // Common case: first time a token is arriving to this input position: + dest.node = inputTo; + } + + // Always number output nodes sequentially: + int outputEndNode = src.outputNode + 1; + + if (outputEndNode > dest.outputNode) { + if (dest.outputNode != -1) { + boolean removed = outputNodes.get(dest.outputNode).inputNodes.remove(Integer.valueOf(inputTo)); + assert removed; + } + //System.out.println(" increase output node: " + dest.outputNode + " vs " + outputEndNode); + outputNodes.get(outputEndNode).inputNodes.add(inputTo); + dest.outputNode = outputEndNode; + + // Since all we ever do is merge incoming nodes together, and then renumber + // the merged nodes sequentially, we should only ever assign smaller node + // numbers: + assert outputEndNode <= inputTo: "outputEndNode=" + outputEndNode + " vs inputTo=" + inputTo; + } + + OutputNode outDest = outputNodes.get(dest.outputNode); + // "shrink wrap" the offsets so the original tokens (with most + // restrictive offsets) win: + if (outDest.endOffset == -1 || endOffset < outDest.endOffset) { + outDest.endOffset = endOffset; + } + + } else { + //System.out.println(" got false from input"); + input.end(); + finalPosInc = posIncAtt.getPositionIncrement(); + finalOffset = offsetAtt.endOffset(); + done = true; + // Don't return false here: we need to force release any buffered tokens now + } + } + } + + // Only for debugging: + /* + private void printStates() { + System.out.println("states:"); + for(int i=outputFrom;i args) { + super(args); + if (!args.isEmpty()) { + throw new IllegalArgumentException("Unknown parameters: " + args); + } + } + + @Override + public TokenStream create(TokenStream input) { + return new FlattenGraphFilter(input); + } +} diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java index 6a72920d2f6..29f6e1c860f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java @@ -81,6 +81,9 @@ import org.apache.lucene.util.fst.FST; * used for parsing. Subsequent tokens simply pass through * and are not parsed. A future improvement would be to * allow these tokens to also be matched.

+ * + * @deprecated Use {@link SynonymGraphFilter} instead, but be sure to also + * use {@link FlattenGraphFilter} at index time (not at search time) as well. */ // TODO: maybe we should resolve token -> wordID then run @@ -105,6 +108,7 @@ import org.apache.lucene.util.fst.FST; // // Another possible solution is described at http://www.cis.uni-muenchen.de/people/Schulz/Pub/dictle5.ps +@Deprecated public final class SynonymFilter extends TokenFilter { public static final String TYPE_SYNONYM = "SYNONYM"; diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java index 8bab9a7eaf6..df10e9b75a9 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java @@ -72,7 +72,11 @@ import org.apache.lucene.analysis.util.TokenizerFactory; *
  • {@link Analyzer} analyzer - an analyzer used for each raw synonym
  • * * @see SolrSynonymParser SolrSynonymParser: default format + * + * @deprecated Use {@link SynonymGraphFilterFactory} instead, but be sure to also + * use {@link FlattenGraphFilterFactory} at index time (not at search time) as well. */ +@Deprecated public class SynonymFilterFactory extends TokenFilterFactory implements ResourceLoaderAware { private final boolean ignoreCase; private final String tokenizerFactory; diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java new file mode 100644 index 00000000000..3d50e08de64 --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java @@ -0,0 +1,586 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.analysis.synonym; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRefBuilder; +import org.apache.lucene.util.RollingBuffer; +import org.apache.lucene.util.fst.FST; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +// TODO: maybe we should resolve token -> wordID then run +// FST on wordIDs, for better perf? + +// TODO: a more efficient approach would be Aho/Corasick's +// algorithm +// http://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_string_matching_algorithm +// It improves over the current approach here +// because it does not fully re-start matching at every +// token. For example if one pattern is "a b c x" +// and another is "b c d" and the input is "a b c d", on +// trying to parse "a b c x" but failing when you got to x, +// rather than starting over again your really should +// immediately recognize that "b c d" matches at the next +// input. I suspect this won't matter that much in +// practice, but it's possible on some set of synonyms it +// will. We'd have to modify Aho/Corasick to enforce our +// conflict resolving (eg greedy matching) because that algo +// finds all matches. This really amounts to adding a .* +// closure to the FST and then determinizing it. +// +// Another possible solution is described at http://www.cis.uni-muenchen.de/people/Schulz/Pub/dictle5.ps + +/** Applies single- or multi-token synonyms from a {@link SynonymMap} + * to an incoming {@link TokenStream}, producing a fully correct graph + * output. This is a replacement for {@link SynonymFilter}, which produces + * incorrect graphs for multi-token synonyms. + * + *

    However, if you use this during indexing, you must follow it with + * {@link FlattenGraphFilter} to squash tokens on top of one another + * like {@link SynonymFilter}, because the indexer can't directly + * consume a graph. To get fully correct positional queries when your + * synonym replacements are multiple tokens, you should instead apply + * synonyms using this {@code TokenFilter} at query time and translate + * the resulting graph to a {@code TermAutomatonQuery} e.g. using + * {@code TokenStreamToTermAutomatonQuery}. + * + *

    NOTE: this cannot consume an incoming graph; results will + * be undefined. + * + * @lucene.experimental */ + +public final class SynonymGraphFilter extends TokenFilter { + + public static final String TYPE_SYNONYM = "SYNONYM"; + + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + + private final SynonymMap synonyms; + private final boolean ignoreCase; + + private final FST fst; + + private final FST.BytesReader fstReader; + private final FST.Arc scratchArc; + private final ByteArrayDataInput bytesReader = new ByteArrayDataInput(); + private final BytesRef scratchBytes = new BytesRef(); + private final CharsRefBuilder scratchChars = new CharsRefBuilder(); + private final LinkedList outputBuffer = new LinkedList<>(); + + private int nextNodeOut; + private int lastNodeOut; + private int maxLookaheadUsed; + + // For testing: + private int captureCount; + + private boolean liveToken; + + // Start/end offset of the current match: + private int matchStartOffset; + private int matchEndOffset; + + // True once the input TokenStream is exhausted: + private boolean finished; + + private int lookaheadNextRead; + private int lookaheadNextWrite; + + private RollingBuffer lookahead = new RollingBuffer() { + @Override + protected BufferedInputToken newInstance() { + return new BufferedInputToken(); + } + }; + + static class BufferedInputToken implements RollingBuffer.Resettable { + final CharsRefBuilder term = new CharsRefBuilder(); + AttributeSource.State state; + int startOffset = -1; + int endOffset = -1; + + @Override + public void reset() { + state = null; + term.clear(); + + // Intentionally invalid to ferret out bugs: + startOffset = -1; + endOffset = -1; + } + } + + static class BufferedOutputToken { + final String term; + + // Non-null if this was an incoming token: + final State state; + + final int startNode; + final int endNode; + + public BufferedOutputToken(State state, String term, int startNode, int endNode) { + this.state = state; + this.term = term; + this.startNode = startNode; + this.endNode = endNode; + } + } + + public SynonymGraphFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) { + super(input); + this.synonyms = synonyms; + this.fst = synonyms.fst; + if (fst == null) { + throw new IllegalArgumentException("fst must be non-null"); + } + this.fstReader = fst.getBytesReader(); + scratchArc = new FST.Arc<>(); + this.ignoreCase = ignoreCase; + } + + @Override + public boolean incrementToken() throws IOException { + //System.out.println("\nS: incrToken lastNodeOut=" + lastNodeOut + " nextNodeOut=" + nextNodeOut); + + assert lastNodeOut <= nextNodeOut; + + if (outputBuffer.isEmpty() == false) { + // We still have pending outputs from a prior synonym match: + releaseBufferedToken(); + //System.out.println(" syn: ret buffered=" + this); + assert liveToken == false; + return true; + } + + // Try to parse a new synonym match at the current token: + + if (parse()) { + // A new match was found: + releaseBufferedToken(); + //System.out.println(" syn: after parse, ret buffered=" + this); + assert liveToken == false; + return true; + } + + if (lookaheadNextRead == lookaheadNextWrite) { + + // Fast path: parse pulled one token, but it didn't match + // the start for any synonym, so we now return it "live" w/o having + // cloned all of its atts: + if (finished) { + //System.out.println(" syn: ret END"); + return false; + } + + assert liveToken; + liveToken = false; + + // NOTE: no need to change posInc since it's relative, i.e. whatever + // node our output is upto will just increase by the incoming posInc. + // We also don't need to change posLen, but only because we cannot + // consume a graph, so the incoming token can never span a future + // synonym match. + + } else { + // We still have buffered lookahead tokens from a previous + // parse attempt that required lookahead; just replay them now: + //System.out.println(" restore buffer"); + assert lookaheadNextRead < lookaheadNextWrite: "read=" + lookaheadNextRead + " write=" + lookaheadNextWrite; + BufferedInputToken token = lookahead.get(lookaheadNextRead); + lookaheadNextRead++; + + restoreState(token.state); + + lookahead.freeBefore(lookaheadNextRead); + + //System.out.println(" after restore offset=" + offsetAtt.startOffset() + "-" + offsetAtt.endOffset()); + assert liveToken == false; + } + + lastNodeOut += posIncrAtt.getPositionIncrement(); + nextNodeOut = lastNodeOut + posLenAtt.getPositionLength(); + + //System.out.println(" syn: ret lookahead=" + this); + + return true; + } + + private void releaseBufferedToken() throws IOException { + //System.out.println(" releaseBufferedToken"); + + BufferedOutputToken token = outputBuffer.pollFirst(); + + if (token.state != null) { + // This is an original input token (keepOrig=true case): + //System.out.println(" hasState"); + restoreState(token.state); + //System.out.println(" startOffset=" + offsetAtt.startOffset() + " endOffset=" + offsetAtt.endOffset()); + } else { + clearAttributes(); + //System.out.println(" no state"); + termAtt.append(token.term); + + // We better have a match already: + assert matchStartOffset != -1; + + offsetAtt.setOffset(matchStartOffset, matchEndOffset); + //System.out.println(" startOffset=" + matchStartOffset + " endOffset=" + matchEndOffset); + typeAtt.setType(TYPE_SYNONYM); + } + + //System.out.println(" lastNodeOut=" + lastNodeOut); + //System.out.println(" term=" + termAtt); + + posIncrAtt.setPositionIncrement(token.startNode - lastNodeOut); + lastNodeOut = token.startNode; + posLenAtt.setPositionLength(token.endNode - token.startNode); + } + + /** Scans the next input token(s) to see if a synonym matches. Returns true + * if a match was found. */ + private boolean parse() throws IOException { + // System.out.println(Thread.currentThread().getName() + ": S: parse: " + System.identityHashCode(this)); + + // Holds the longest match we've seen so far: + BytesRef matchOutput = null; + int matchInputLength = 0; + + BytesRef pendingOutput = fst.outputs.getNoOutput(); + fst.getFirstArc(scratchArc); + + assert scratchArc.output == fst.outputs.getNoOutput(); + + // How many tokens in the current match + int matchLength = 0; + boolean doFinalCapture = false; + + int lookaheadUpto = lookaheadNextRead; + matchStartOffset = -1; + + byToken: + while (true) { + //System.out.println(" cycle lookaheadUpto=" + lookaheadUpto + " maxPos=" + lookahead.getMaxPos()); + + // Pull next token's chars: + final char[] buffer; + final int bufferLen; + final int inputEndOffset; + + if (lookaheadUpto <= lookahead.getMaxPos()) { + // Still in our lookahead buffer + BufferedInputToken token = lookahead.get(lookaheadUpto); + lookaheadUpto++; + buffer = token.term.chars(); + bufferLen = token.term.length(); + inputEndOffset = token.endOffset; + //System.out.println(" use buffer now max=" + lookahead.getMaxPos()); + if (matchStartOffset == -1) { + matchStartOffset = token.startOffset; + } + } else { + + // We used up our lookahead buffer of input tokens + // -- pull next real input token: + + assert finished || liveToken == false; + + if (finished) { + //System.out.println(" break: finished"); + break; + } else if (input.incrementToken()) { + //System.out.println(" input.incrToken"); + liveToken = true; + buffer = termAtt.buffer(); + bufferLen = termAtt.length(); + if (matchStartOffset == -1) { + matchStartOffset = offsetAtt.startOffset(); + } + inputEndOffset = offsetAtt.endOffset(); + + lookaheadUpto++; + } else { + // No more input tokens + finished = true; + //System.out.println(" break: now set finished"); + break; + } + } + + matchLength++; + //System.out.println(" cycle term=" + new String(buffer, 0, bufferLen)); + + // Run each char in this token through the FST: + int bufUpto = 0; + while (bufUpto < bufferLen) { + final int codePoint = Character.codePointAt(buffer, bufUpto, bufferLen); + if (fst.findTargetArc(ignoreCase ? Character.toLowerCase(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == null) { + break byToken; + } + + // Accum the output + pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); + bufUpto += Character.charCount(codePoint); + } + + assert bufUpto == bufferLen; + + // OK, entire token matched; now see if this is a final + // state in the FST (a match): + if (scratchArc.isFinal()) { + matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput); + matchInputLength = matchLength; + matchEndOffset = inputEndOffset; + //System.out.println(" ** match"); + } + + // See if the FST can continue matching (ie, needs to + // see the next input token): + if (fst.findTargetArc(SynonymMap.WORD_SEPARATOR, scratchArc, scratchArc, fstReader) == null) { + // No further rules can match here; we're done + // searching for matching rules starting at the + // current input position. + break; + } else { + // More matching is possible -- accum the output (if + // any) of the WORD_SEP arc: + pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); + doFinalCapture = true; + if (liveToken) { + capture(); + } + } + } + + if (doFinalCapture && liveToken && finished == false) { + // Must capture the final token if we captured any prior tokens: + capture(); + } + + if (matchOutput != null) { + + if (liveToken) { + // Single input token synonym; we must buffer it now: + capture(); + } + + // There is a match! + bufferOutputTokens(matchOutput, matchInputLength); + lookaheadNextRead += matchInputLength; + //System.out.println(" precmatch; set lookaheadNextRead=" + lookaheadNextRead + " now max=" + lookahead.getMaxPos()); + lookahead.freeBefore(lookaheadNextRead); + //System.out.println(" match; set lookaheadNextRead=" + lookaheadNextRead + " now max=" + lookahead.getMaxPos()); + return true; + } else { + //System.out.println(" no match; lookaheadNextRead=" + lookaheadNextRead); + return false; + } + + //System.out.println(" parse done inputSkipCount=" + inputSkipCount + " nextRead=" + nextRead + " nextWrite=" + nextWrite); + } + + /** Expands the output graph into the necessary tokens, adding + * synonyms as side paths parallel to the input tokens, and + * buffers them in the output token buffer. */ + private void bufferOutputTokens(BytesRef bytes, int matchInputLength) { + bytesReader.reset(bytes.bytes, bytes.offset, bytes.length); + + final int code = bytesReader.readVInt(); + final boolean keepOrig = (code & 0x1) == 0; + //System.out.println(" buffer: keepOrig=" + keepOrig + " matchInputLength=" + matchInputLength); + + // How many nodes along all paths; we need this to assign the + // node ID for the final end node where all paths merge back: + int totalPathNodes; + if (keepOrig) { + assert matchInputLength > 0; + totalPathNodes = matchInputLength - 1; + } else { + totalPathNodes = 0; + } + + // How many synonyms we will insert over this match: + final int count = code >>> 1; + + // TODO: we could encode this instead into the FST: + + // 1st pass: count how many new nodes we need + List> paths = new ArrayList<>(); + for(int outputIDX=0;outputIDX path = new ArrayList<>(); + paths.add(path); + int chEnd = scratchChars.length(); + for(int chUpto=0; chUpto<=chEnd; chUpto++) { + if (chUpto == chEnd || scratchChars.charAt(chUpto) == SynonymMap.WORD_SEPARATOR) { + path.add(new String(scratchChars.chars(), lastStart, chUpto - lastStart)); + lastStart = 1 + chUpto; + } + } + + assert path.size() > 0; + totalPathNodes += path.size() - 1; + } + //System.out.println(" totalPathNodes=" + totalPathNodes); + + // 2nd pass: buffer tokens for the graph fragment + + // NOTE: totalPathNodes will be 0 in the case where the matched + // input is a single token and all outputs are also a single token + + // We "spawn" a side-path for each of the outputs for this matched + // synonym, all ending back at this end node: + + int startNode = nextNodeOut; + + int endNode = startNode + totalPathNodes + 1; + //System.out.println(" " + paths.size() + " new side-paths"); + + // First, fanout all tokens departing start node for these new side paths: + int newNodeCount = 0; + for(List path : paths) { + int pathEndNode; + //System.out.println(" path size=" + path.size()); + if (path.size() == 1) { + // Single token output, so there are no intermediate nodes: + pathEndNode = endNode; + } else { + pathEndNode = nextNodeOut + newNodeCount + 1; + newNodeCount += path.size() - 1; + } + outputBuffer.add(new BufferedOutputToken(null, path.get(0), startNode, pathEndNode)); + } + + // We must do the original tokens last, else the offsets "go backwards": + if (keepOrig) { + BufferedInputToken token = lookahead.get(lookaheadNextRead); + int inputEndNode; + if (matchInputLength == 1) { + // Single token matched input, so there are no intermediate nodes: + inputEndNode = endNode; + } else { + inputEndNode = nextNodeOut + newNodeCount + 1; + } + + //System.out.println(" keepOrig first token: " + token.term); + + outputBuffer.add(new BufferedOutputToken(token.state, token.term.toString(), startNode, inputEndNode)); + } + + nextNodeOut = endNode; + + // Do full side-path for each syn output: + for(int pathID=0;pathID path = paths.get(pathID); + if (path.size() > 1) { + int lastNode = outputBuffer.get(pathID).endNode; + for(int i=1;i 1) { + // Do full "side path" with the original tokens: + int lastNode = outputBuffer.get(paths.size()).endNode; + for(int i=1;i + * <fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" + * format="solr" ignoreCase="false" expand="true" + * tokenizerFactory="solr.WhitespaceTokenizerFactory" + * [optional tokenizer factory parameters]/> + * </analyzer> + * </fieldType> + * + *

    + * An optional param name prefix of "tokenizerFactory." may be used for any + * init params that the SynonymGraphFilterFactory needs to pass to the specified + * TokenizerFactory. If the TokenizerFactory expects an init parameters with + * the same name as an init param used by the SynonymGraphFilterFactory, the prefix + * is mandatory. + *

    + * + *

    + * The optional {@code format} parameter controls how the synonyms will be parsed: + * It supports the short names of {@code solr} for {@link SolrSynonymParser} + * and {@code wordnet} for and {@link WordnetSynonymParser}, or your own + * {@code SynonymMap.Parser} class name. The default is {@code solr}. + * A custom {@link SynonymMap.Parser} is expected to have a constructor taking: + *

      + *
    • boolean dedup - true if duplicates should be ignored, false otherwise
    • + *
    • boolean expand - true if conflation groups should be expanded, false if they are one-directional
    • + *
    • {@link Analyzer} analyzer - an analyzer used for each raw synonym
    • + *
    + * @see SolrSynonymParser SolrSynonymParser: default format + * + * @lucene.experimental + */ +public class SynonymGraphFilterFactory extends TokenFilterFactory implements ResourceLoaderAware { + private final boolean ignoreCase; + private final String tokenizerFactory; + private final String synonyms; + private final String format; + private final boolean expand; + private final String analyzerName; + private final Map tokArgs = new HashMap<>(); + + private SynonymMap map; + + public SynonymGraphFilterFactory(Map args) { + super(args); + ignoreCase = getBoolean(args, "ignoreCase", false); + synonyms = require(args, "synonyms"); + format = get(args, "format"); + expand = getBoolean(args, "expand", true); + + analyzerName = get(args, "analyzer"); + tokenizerFactory = get(args, "tokenizerFactory"); + if (analyzerName != null && tokenizerFactory != null) { + throw new IllegalArgumentException("Analyzer and TokenizerFactory can't be specified both: " + + analyzerName + " and " + tokenizerFactory); + } + + if (tokenizerFactory != null) { + tokArgs.put("luceneMatchVersion", getLuceneMatchVersion().toString()); + for (Iterator itr = args.keySet().iterator(); itr.hasNext();) { + String key = itr.next(); + tokArgs.put(key.replaceAll("^tokenizerFactory\\.",""), args.get(key)); + itr.remove(); + } + } + if (!args.isEmpty()) { + throw new IllegalArgumentException("Unknown parameters: " + args); + } + } + + @Override + public TokenStream create(TokenStream input) { + // if the fst is null, it means there's actually no synonyms... just return the original stream + // as there is nothing to do here. + return map.fst == null ? input : new SynonymGraphFilter(input, map, ignoreCase); + } + + @Override + public void inform(ResourceLoader loader) throws IOException { + final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory); + Analyzer analyzer; + + if (analyzerName != null) { + analyzer = loadAnalyzer(loader, analyzerName); + } else { + analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer() : factory.create(); + TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer; + return new TokenStreamComponents(tokenizer, stream); + } + }; + } + + try (Analyzer a = analyzer) { + String formatClass = format; + if (format == null || format.equals("solr")) { + formatClass = SolrSynonymParser.class.getName(); + } else if (format.equals("wordnet")) { + formatClass = WordnetSynonymParser.class.getName(); + } + // TODO: expose dedup as a parameter? + map = loadSynonyms(loader, formatClass, true, a); + } catch (ParseException e) { + throw new IOException("Error parsing synonyms file:", e); + } + } + + /** + * Load synonyms with the given {@link SynonymMap.Parser} class. + */ + protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException { + CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT); + + SynonymMap.Parser parser; + Class clazz = loader.findClass(cname, SynonymMap.Parser.class); + try { + parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer); + } catch (Exception e) { + throw new RuntimeException(e); + } + + List files = splitFileNames(synonyms); + for (String file : files) { + decoder.reset(); + parser.parse(new InputStreamReader(loader.openResource(file), decoder)); + } + return parser.build(); + } + + // (there are no tests for this functionality) + private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) throws IOException { + Class clazz = loader.findClass(cname, TokenizerFactory.class); + try { + TokenizerFactory tokFactory = clazz.getConstructor(Map.class).newInstance(tokArgs); + if (tokFactory instanceof ResourceLoaderAware) { + ((ResourceLoaderAware) tokFactory).inform(loader); + } + return tokFactory; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private Analyzer loadAnalyzer(ResourceLoader loader, String cname) throws IOException { + Class clazz = loader.findClass(cname, Analyzer.class); + try { + Analyzer analyzer = clazz.getConstructor().newInstance(); + if (analyzer instanceof ResourceLoaderAware) { + ((ResourceLoaderAware) analyzer).inform(loader); + } + return analyzer; + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java index fc8703f57b2..7371e235085 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java @@ -74,6 +74,11 @@ public class SynonymMap { private int maxHorizontalContext; private final boolean dedup; + /** Default constructor, passes {@code dedup=true}. */ + public Builder() { + this(true); + } + /** If dedup is true then identical rules (same input, * same output) will be added only once. */ public Builder(boolean dedup) { @@ -109,8 +114,6 @@ public class SynonymMap { reuse.setLength(upto); return reuse.get(); } - - /** only used for asserting! */ private boolean hasHoles(CharsRef chars) { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java index 9100345251f..13289bee1bd 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java @@ -256,10 +256,12 @@ public abstract class CharTokenizer extends Tokenizer { } end += charCount; length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized - if (length >= MAX_WORD_LEN) // buffer overflow! make sure to check for >= surrogate pair could break == test + if (length >= MAX_WORD_LEN) { // buffer overflow! make sure to check for >= surrogate pair could break == test break; - } else if (length > 0) // at non-Letter w/ chars + } + } else if (length > 0) { // at non-Letter w/ chars break; // return 'em + } } termAtt.setLength(length); diff --git a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory index 70120c5221b..73986d73fec 100644 --- a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory +++ b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory @@ -101,5 +101,7 @@ org.apache.lucene.analysis.standard.ClassicFilterFactory org.apache.lucene.analysis.standard.StandardFilterFactory org.apache.lucene.analysis.sv.SwedishLightStemFilterFactory org.apache.lucene.analysis.synonym.SynonymFilterFactory +org.apache.lucene.analysis.synonym.SynonymGraphFilterFactory +org.apache.lucene.analysis.synonym.FlattenGraphFilterFactory org.apache.lucene.analysis.tr.TurkishLowerCaseFilterFactory org.apache.lucene.analysis.util.ElisionFilterFactory diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java index a22d9c905d7..580b17e205f 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java @@ -224,18 +224,27 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { assertAnalyzesTo(a, "LUCENE / SOLR", new String[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, - new int[] { 1, 1 }); + null, + new int[] { 1, 1 }, + null, + false); /* only in this case, posInc of 2 ?! */ assertAnalyzesTo(a, "LUCENE / solR", new String[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, - new int[] { 1, 1, 0, 1 }); + null, + new int[] { 1, 1, 0, 1 }, + null, + false); assertAnalyzesTo(a, "LUCENE / NUTCH SOLR", new String[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, - new int[] { 1, 1, 1 }); + null, + new int[] { 1, 1, 1 }, + null, + false); /* analyzer that will consume tokens with large position increments */ Analyzer a2 = new Analyzer() { @@ -252,24 +261,36 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { assertAnalyzesTo(a2, "LUCENE largegap SOLR", new String[] { "LUCENE", "largegap", "SOLR" }, new int[] { 0, 7, 16 }, new int[] { 6, 15, 20 }, - new int[] { 1, 10, 1 }); + null, + new int[] { 1, 10, 1 }, + null, + false); /* the "/" had a position increment of 10, where did it go?!?!! */ assertAnalyzesTo(a2, "LUCENE / SOLR", new String[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, - new int[] { 1, 11 }); + null, + new int[] { 1, 11 }, + null, + false); /* in this case, the increment of 10 from the "/" is carried over */ assertAnalyzesTo(a2, "LUCENE / solR", new String[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, - new int[] { 1, 11, 0, 1 }); + null, + new int[] { 1, 11, 0, 1 }, + null, + false); assertAnalyzesTo(a2, "LUCENE / NUTCH SOLR", new String[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, - new int[] { 1, 11, 1 }); + null, + new int[] { 1, 11, 1 }, + null, + false); Analyzer a3 = new Analyzer() { @Override @@ -284,14 +305,21 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { new String[] { "lucene", "lucenesolr", "solr" }, new int[] { 0, 0, 7 }, new int[] { 6, 11, 11 }, - new int[] { 1, 0, 1 }); + null, + new int[] { 1, 0, 1 }, + null, + false); /* the stopword should add a gap here */ assertAnalyzesTo(a3, "the lucene.solr", new String[] { "lucene", "lucenesolr", "solr" }, new int[] { 4, 4, 11 }, new int[] { 10, 15, 15 }, - new int[] { 2, 0, 1 }); + null, + new int[] { 2, 0, 1 }, + null, + false); + IOUtils.close(a, a2, a3); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestFlattenGraphFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestFlattenGraphFilter.java new file mode 100644 index 00000000000..d61fa96669f --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestFlattenGraphFilter.java @@ -0,0 +1,284 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.analysis.synonym; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.CannedTokenStream; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; + +public class TestFlattenGraphFilter extends BaseTokenStreamTestCase { + + private static Token token(String term, int posInc, int posLength, int startOffset, int endOffset) { + final Token t = new Token(term, startOffset, endOffset); + t.setPositionIncrement(posInc); + t.setPositionLength(posLength); + return t; + } + + public void testSimpleMock() throws Exception { + Analyzer a = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true); + TokenStream ts = new FlattenGraphFilter(tokenizer); + return new TokenStreamComponents(tokenizer, ts); + } + }; + + assertAnalyzesTo(a, "wtf happened", + new String[] {"wtf", "happened"}, + new int[] { 0, 4}, + new int[] { 3, 12}, + null, + new int[] { 1, 1}, + new int[] { 1, 1}, + true); + } + + // Make sure graph is unchanged if it's already flat + public void testAlreadyFlatten() throws Exception { + TokenStream in = new CannedTokenStream(0, 12, new Token[] { + token("wtf", 1, 1, 0, 3), + token("what", 0, 1, 0, 3), + token("wow", 0, 1, 0, 3), + token("the", 1, 1, 0, 3), + token("that's", 0, 1, 0, 3), + token("fudge", 1, 1, 0, 3), + token("funny", 0, 1, 0, 3), + token("happened", 1, 1, 4, 12) + }); + + TokenStream out = new FlattenGraphFilter(in); + + // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened: + assertTokenStreamContents(out, + new String[] {"wtf", "what", "wow", "the", "that's", "fudge", "funny", "happened"}, + new int[] {0, 0, 0, 0, 0, 0, 0, 4}, + new int[] {3, 3, 3, 3, 3, 3, 3, 12}, + new int[] {1, 0, 0, 1, 0, 1, 0, 1}, + new int[] {1, 1, 1, 1, 1, 1, 1, 1}, + 12); + } + + public void testWTF1() throws Exception { + + // "wow that's funny" and "what the fudge" are separate side paths, in parallel with "wtf", on input: + TokenStream in = new CannedTokenStream(0, 12, new Token[] { + token("wtf", 1, 5, 0, 3), + token("what", 0, 1, 0, 3), + token("wow", 0, 3, 0, 3), + token("the", 1, 1, 0, 3), + token("fudge", 1, 3, 0, 3), + token("that's", 1, 1, 0, 3), + token("funny", 1, 1, 0, 3), + token("happened", 1, 1, 4, 12) + }); + + + TokenStream out = new FlattenGraphFilter(in); + + // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened: + assertTokenStreamContents(out, + new String[] {"wtf", "what", "wow", "the", "that's", "fudge", "funny", "happened"}, + new int[] {0, 0, 0, 0, 0, 0, 0, 4}, + new int[] {3, 3, 3, 3, 3, 3, 3, 12}, + new int[] {1, 0, 0, 1, 0, 1, 0, 1}, + new int[] {3, 1, 1, 1, 1, 1, 1, 1}, + 12); + + } + + /** Same as testWTF1 except the "wtf" token comes out later */ + public void testWTF2() throws Exception { + + // "wow that's funny" and "what the fudge" are separate side paths, in parallel with "wtf", on input: + TokenStream in = new CannedTokenStream(0, 12, new Token[] { + token("what", 1, 1, 0, 3), + token("wow", 0, 3, 0, 3), + token("wtf", 0, 5, 0, 3), + token("the", 1, 1, 0, 3), + token("fudge", 1, 3, 0, 3), + token("that's", 1, 1, 0, 3), + token("funny", 1, 1, 0, 3), + token("happened", 1, 1, 4, 12) + }); + + + TokenStream out = new FlattenGraphFilter(in); + + // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened: + assertTokenStreamContents(out, + new String[] {"what", "wow", "wtf", "the", "that's", "fudge", "funny", "happened"}, + new int[] {0, 0, 0, 0, 0, 0, 0, 4}, + new int[] {3, 3, 3, 3, 3, 3, 3, 12}, + new int[] {1, 0, 0, 1, 0, 1, 0, 1}, + new int[] {1, 1, 3, 1, 1, 1, 1, 1}, + 12); + + } + + public void testNonGreedySynonyms() throws Exception { + // This is just "hypothetical" for Lucene today, because SynFilter is + // greedy: when two syn rules match on overlapping tokens, only one + // (greedily) wins. This test pretends all syn matches could match: + + TokenStream in = new CannedTokenStream(0, 20, new Token[] { + token("wizard", 1, 1, 0, 6), + token("wizard_of_oz", 0, 3, 0, 12), + token("of", 1, 1, 7, 9), + token("oz", 1, 1, 10, 12), + token("oz_screams", 0, 2, 10, 20), + token("screams", 1, 1, 13, 20), + }); + + + TokenStream out = new FlattenGraphFilter(in); + + // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened: + assertTokenStreamContents(out, + new String[] {"wizard", "wizard_of_oz", "of", "oz", "oz_screams", "screams"}, + new int[] {0, 0, 7, 10, 10, 13}, + new int[] {6, 12, 9, 12, 20, 20}, + new int[] {1, 0, 1, 1, 0, 1}, + new int[] {1, 3, 1, 1, 2, 1}, + 20); + + } + + public void testNonGraph() throws Exception { + TokenStream in = new CannedTokenStream(0, 22, new Token[] { + token("hello", 1, 1, 0, 5), + token("pseudo", 1, 1, 6, 12), + token("world", 1, 1, 13, 18), + token("fun", 1, 1, 19, 22), + }); + + + TokenStream out = new FlattenGraphFilter(in); + + // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened: + assertTokenStreamContents(out, + new String[] {"hello", "pseudo", "world", "fun"}, + new int[] {0, 6, 13, 19}, + new int[] {5, 12, 18, 22}, + new int[] {1, 1, 1, 1}, + new int[] {1, 1, 1, 1}, + 22); + } + + public void testSimpleHole() throws Exception { + TokenStream in = new CannedTokenStream(0, 13, new Token[] { + token("hello", 1, 1, 0, 5), + token("hole", 2, 1, 6, 10), + token("fun", 1, 1, 11, 13), + }); + + + TokenStream out = new FlattenGraphFilter(in); + + // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened: + assertTokenStreamContents(out, + new String[] {"hello", "hole", "fun"}, + new int[] {0, 6, 11}, + new int[] {5, 10, 13}, + new int[] {1, 2, 1}, + new int[] {1, 1, 1}, + 13); + } + + public void testHoleUnderSyn() throws Exception { + // Tests a StopFilter after SynFilter where a stopword in a syn is removed + // + // wizard of oz -> woz syn, but then "of" becomes a hole + + TokenStream in = new CannedTokenStream(0, 12, new Token[] { + token("wizard", 1, 1, 0, 6), + token("woz", 0, 3, 0, 12), + token("oz", 2, 1, 10, 12), + }); + + + TokenStream out = new FlattenGraphFilter(in); + + assertTokenStreamContents(out, + new String[] {"wizard", "woz", "oz"}, + new int[] {0, 0, 10}, + new int[] {6, 12, 12}, + new int[] {1, 0, 2}, + new int[] {1, 3, 1}, + 12); + } + + public void testStrangelyNumberedNodes() throws Exception { + + // Uses only nodes 0, 2, 3, i.e. 1 is just never used (it is not a hole!!) + TokenStream in = new CannedTokenStream(0, 27, new Token[] { + token("dog", 1, 3, 0, 5), + token("puppy", 0, 3, 0, 5), + token("flies", 3, 1, 6, 11), + }); + + TokenStream out = new FlattenGraphFilter(in); + + assertTokenStreamContents(out, + new String[] {"dog", "puppy", "flies"}, + new int[] {0, 0, 6}, + new int[] {5, 5, 11}, + new int[] {1, 0, 1}, + new int[] {1, 1, 1}, + 27); + } + + public void testTwoLongParallelPaths() throws Exception { + + // "a a a a a a" in parallel with "b b b b b b" + TokenStream in = new CannedTokenStream(0, 11, new Token[] { + token("a", 1, 1, 0, 1), + token("b", 0, 2, 0, 1), + token("a", 1, 2, 2, 3), + token("b", 1, 2, 2, 3), + token("a", 1, 2, 4, 5), + token("b", 1, 2, 4, 5), + token("a", 1, 2, 6, 7), + token("b", 1, 2, 6, 7), + token("a", 1, 2, 8, 9), + token("b", 1, 2, 8, 9), + token("a", 1, 2, 10, 11), + token("b", 1, 2, 10, 11), + }); + + + TokenStream out = new FlattenGraphFilter(in); + + // ... becomes flattened to a single path with overlapping a/b token between each node: + assertTokenStreamContents(out, + new String[] {"a", "b", "a", "b", "a", "b", "a", "b", "a", "b", "a", "b"}, + new int[] {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10}, + new int[] {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11}, + new int[] {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}, + new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + 11); + + } + + // NOTE: TestSynonymGraphFilter's testRandomSyns also tests FlattenGraphFilter +} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java new file mode 100644 index 00000000000..edf2d2a96c5 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java @@ -0,0 +1,1956 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.analysis.synonym; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockGraphTokenFilter; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.TokenStreamToAutomaton; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.*; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.CharsRefBuilder; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.IntsRefBuilder; +import org.apache.lucene.util.TestUtil; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.AutomatonTestUtil; +import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; +import org.apache.lucene.util.automaton.Transition; +import org.apache.lucene.util.fst.Util; + +import java.io.IOException; +import java.io.StringReader; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class TestSynonymGraphFilter extends BaseTokenStreamTestCase { + + /** Set as a side effect by {@link #getAnalyzer} and {@link #getFlattenAnalyzer}. */ + private SynonymGraphFilter synFilter; + private FlattenGraphFilter flattenFilter; + + public void testBasicKeepOrigOneOutput() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b", "x", true); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "c a b", + new String[] {"c", "x", "a", "b"}, + new int[] { 0, 2, 2, 4}, + new int[] { 1, 5, 3, 5}, + new String[] {"word", "SYNONYM", "word", "word"}, + new int[] { 1, 1, 0, 1}, + new int[] { 1, 2, 1, 1}); + a.close(); + } + + public void testMixedKeepOrig() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b", "x", true); + add(b, "e f", "y", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "c a b c e f g", + new String[] {"c", "x", "a", "b", "c", "y", "g"}, + new int[] { 0, 2, 2, 4, 6, 8, 12}, + new int[] { 1, 5, 3, 5, 7, 11, 13}, + new String[] {"word", "SYNONYM", "word", "word", "word", "SYNONYM", "word"}, + new int[] { 1, 1, 0, 1, 1, 1, 1}, + new int[] { 1, 2, 1, 1, 1, 1, 1}); + a.close(); + } + + public void testNoParseAfterBuffer() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "b a", "x", true); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "b b b", + new String[] {"b", "b", "b"}, + new int[] { 0, 2, 4}, + new int[] { 1, 3, 5}, + new String[] {"word", "word", "word"}, + new int[] { 1, 1, 1}, + new int[] { 1, 1, 1}); + a.close(); + } + + public void testOneInputMultipleOutputKeepOrig() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b", "x", true); + add(b, "a b", "y", true); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "c a b c", + new String[] {"c", "x", "y", "a", "b", "c"}, + new int[] { 0, 2, 2, 2, 4, 6}, + new int[] { 1, 5, 5, 3, 5, 7}, + new String[] {"word", "SYNONYM", "SYNONYM", "word", "word", "word"}, + new int[] { 1, 1, 0, 0, 1, 1, 1, 1}, + new int[] { 1, 2, 2, 1, 1, 1, 1, 1}); + a.close(); + } + + /** + * Verify type of token and positionLength after analyzer. + */ + public void testPositionLengthAndTypeSimple() throws Exception { + String testFile = + "spider man, spiderman"; + + Analyzer analyzer = solrSynsToAnalyzer(testFile); + + assertAnalyzesToPositions(analyzer, "spider man", + new String[]{"spiderman", "spider", "man"}, + new String[]{"SYNONYM", "word", "word"}, + new int[]{1, 0, 1}, + new int[]{2, 1, 1}); + } + + /** + * parse a syn file with some escaped syntax chars + */ + public void testEscapedStuff() throws Exception { + String testFile = + "a\\=>a => b\\=>b\n" + + "a\\,a => b\\,b"; + Analyzer analyzer = solrSynsToAnalyzer(testFile); + + assertAnalyzesTo(analyzer, "ball", + new String[]{"ball"}, + new int[]{1}); + + assertAnalyzesTo(analyzer, "a=>a", + new String[]{"b=>b"}, + new int[]{1}); + + assertAnalyzesTo(analyzer, "a,a", + new String[]{"b,b"}, + new int[]{1}); + analyzer.close(); + } + + /** + * parse a syn file with bad syntax + */ + public void testInvalidAnalyzesToNothingOutput() throws Exception { + String testFile = "a => 1"; + Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, false); + SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer); + try { + parser.parse(new StringReader(testFile)); + fail("didn't get expected exception"); + } catch (ParseException expected) { + // expected exc + } + analyzer.close(); + } + + /** + * parse a syn file with bad syntax + */ + public void testInvalidDoubleMap() throws Exception { + String testFile = "a => b => c"; + Analyzer analyzer = new MockAnalyzer(random()); + SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer); + try { + parser.parse(new StringReader(testFile)); + fail("didn't get expected exception"); + } catch (ParseException expected) { + // expected exc + } + analyzer.close(); + } + + /** + * Tests some simple examples from the solr wiki + */ + public void testSimple() throws Exception { + String testFile = + "i-pod, ipod, ipoooood\n" + + "foo => foo bar\n" + + "foo => baz\n" + + "this test, that testing"; + + Analyzer analyzer = solrSynsToAnalyzer(testFile); + + assertAnalyzesTo(analyzer, "ball", + new String[]{"ball"}, + new int[]{1}); + + assertAnalyzesTo(analyzer, "i-pod", + new String[]{"ipod", "ipoooood", "i-pod"}, + new int[]{1, 0, 0}); + + assertAnalyzesTo(analyzer, "foo", + new String[]{"foo", "baz", "bar"}, + new int[]{1, 0, 1}); + + assertAnalyzesTo(analyzer, "this test", + new String[]{"that", "this", "testing", "test"}, + new int[]{1, 0, 1, 0}); + analyzer.close(); + } + + public void testBufferLength() throws Exception { + String testFile = + "c => 8 2 5 6 7\n" + + "f c e d f, 1\n" + + "c g a f d, 6 5 5\n" + + "e c => 4\n" + + "g => 5\n" + + "a g b f e => 5 0 7 7\n" + + "b => 1"; + Analyzer analyzer = solrSynsToAnalyzer(testFile); + + String doc = "b c g a f b d"; + String[] expected = new String[]{"1", "8", "2", "5", "6", "7", "5", "a", "f", "1", "d"}; + assertAnalyzesTo(analyzer, doc, expected); + } + + private Analyzer solrSynsToAnalyzer(String syns) throws IOException, ParseException { + Analyzer analyzer = new MockAnalyzer(random()); + SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer); + parser.parse(new StringReader(syns)); + analyzer.close(); + return getFlattenAnalyzer(parser, true); + } + + public void testMoreThanOneLookAhead() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b c d", "x", true); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "a b c e", + new String[] {"a", "b", "c", "e"}, + new int[] { 0, 2, 4, 6}, + new int[] { 1, 3, 5, 7}, + new String[] {"word", "word", "word", "word"}, + new int[] { 1, 1, 1, 1}, + new int[] { 1, 1, 1, 1}); + a.close(); + } + + public void testLookaheadAfterParse() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "b b", "x", true); + add(b, "b", "y", true); + + Analyzer a = getAnalyzer(b, true); + + assertAnalyzesTo(a, "b a b b", + new String[] {"y", "b", "a", "x", "b", "b"}, + new int[] {0, 0, 2, 4, 4, 6}, + new int[] {1, 1, 3, 7, 5, 7}, + null, + new int[] {1, 0, 1, 1, 0, 1}, + new int[] {1, 1, 1, 2, 1, 1}, + true); + } + + public void testLookaheadSecondParse() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "b b b", "x", true); + add(b, "b", "y", true); + + Analyzer a = getAnalyzer(b, true); + + assertAnalyzesTo(a, "b b", + new String[] {"y", "b", "y", "b"}, + new int[] { 0, 0, 2, 2}, + new int[] { 1, 1, 3, 3}, + null, + new int[] { 1, 0, 1, 0}, + new int[] { 1, 1, 1, 1}, + true); + } + + public void testOneInputMultipleOutputNoKeepOrig() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b", "x", false); + add(b, "a b", "y", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "c a b c", + new String[] {"c", "x", "y", "c"}, + new int[] { 0, 2, 2, 6}, + new int[] { 1, 5, 5, 7}, + new String[] {"word", "SYNONYM", "SYNONYM", "word"}, + new int[] { 1, 1, 0, 1}, + new int[] { 1, 1, 1, 1}); + a.close(); + } + + public void testOneInputMultipleOutputMixedKeepOrig() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b", "x", true); + add(b, "a b", "y", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "c a b c", + new String[] {"c", "x", "y", "a", "b", "c"}, + new int[] { 0, 2, 2, 2, 4, 6}, + new int[] { 1, 5, 5, 3, 5, 7}, + new String[] {"word", "SYNONYM", "SYNONYM", "word", "word", "word"}, + new int[] { 1, 1, 0, 0, 1, 1, 1, 1}, + new int[] { 1, 2, 2, 1, 1, 1, 1, 1}); + a.close(); + } + + public void testSynAtEnd() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b", "x", true); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "c d e a b", + new String[] {"c", "d", "e", "x", "a", "b"}, + new int[] { 0, 2, 4, 6, 6, 8}, + new int[] { 1, 3, 5, 9, 7, 9}, + new String[] {"word", "word", "word", "SYNONYM", "word", "word"}, + new int[] { 1, 1, 1, 1, 0, 1}, + new int[] { 1, 1, 1, 2, 1, 1}); + a.close(); + } + + public void testTwoSynsInARow() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a", "x", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "c a a b", + new String[] {"c", "x", "x", "b"}, + new int[] { 0, 2, 4, 6}, + new int[] { 1, 3, 5, 7}, + new String[] {"word", "SYNONYM", "SYNONYM", "word"}, + new int[] { 1, 1, 1, 1}, + new int[] { 1, 1, 1, 1}); + a.close(); + } + + public void testBasicKeepOrigTwoOutputs() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b", "x y", true); + add(b, "a b", "m n o", true); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "c a b d", + new String[] {"c", "x", "m", "a", "y", "n", "o", "b", "d"}, + new int[] { 0, 2, 2, 2, 2, 2, 2, 4, 6}, + new int[] { 1, 5, 5, 3, 5, 5, 5, 5, 7}, + new String[] {"word", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "word"}, + new int[] { 1, 1, 0, 0, 1, 1, 1, 1, 1}, + new int[] { 1, 1, 2, 4, 4, 1, 2, 1, 1}); + a.close(); + } + + public void testNoCaptureIfNoMatch() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b", "x y", true); + + Analyzer a = getAnalyzer(b, true); + + assertAnalyzesTo(a, + "c d d", + new String[] {"c", "d", "d"}, + new int[] { 0, 2, 4}, + new int[] { 1, 3, 5}, + new String[] {"word", "word", "word"}, + new int[] { 1, 1, 1}, + new int[] { 1, 1, 1}); + assertEquals(0, synFilter.getCaptureCount()); + a.close(); + } + + public void testBasicNotKeepOrigOneOutput() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b", "x", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "c a b", + new String[] {"c", "x"}, + new int[] {0, 2}, + new int[] {1, 5}, + new String[] {"word", "SYNONYM"}, + new int[] {1, 1}, + new int[] {1, 1}); + a.close(); + } + + public void testBasicNoKeepOrigTwoOutputs() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b", "x y", false); + add(b, "a b", "m n o", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "c a b d", + new String[] {"c", "x", "m", "y", "n", "o", "d"}, + new int[] { 0, 2, 2, 2, 2, 2, 6}, + new int[] { 1, 5, 5, 5, 5, 5, 7}, + new String[] {"word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "word"}, + new int[] { 1, 1, 0, 1, 1, 1, 1}, + new int[] { 1, 1, 2, 3, 1, 1, 1}); + a.close(); + } + + public void testIgnoreCase() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b", "x y", false); + add(b, "a b", "m n o", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "c A B D", + new String[] {"c", "x", "m", "y", "n", "o", "D"}, + new int[] { 0, 2, 2, 2, 2, 2, 6}, + new int[] { 1, 5, 5, 5, 5, 5, 7}, + new String[] {"word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "word"}, + new int[] { 1, 1, 0, 1, 1, 1, 1}, + new int[] { 1, 1, 2, 3, 1, 1, 1}); + a.close(); + } + + public void testDoNotIgnoreCase() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b", "x y", false); + add(b, "a b", "m n o", false); + + Analyzer a = getAnalyzer(b, false); + assertAnalyzesTo(a, + "c A B D", + new String[] {"c", "A", "B", "D"}, + new int[] { 0, 2, 4, 6}, + new int[] { 1, 3, 5, 7}, + new String[] {"word", "word", "word", "word"}, + new int[] { 1, 1, 1, 1}, + new int[] { 1, 1, 1, 1}); + a.close(); + } + + public void testBufferedFinish1() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b c", "m n o", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "c a b", + new String[] {"c", "a", "b"}, + new int[] { 0, 2, 4}, + new int[] { 1, 3, 5}, + new String[] {"word", "word", "word"}, + new int[] { 1, 1, 1}, + new int[] { 1, 1, 1}); + a.close(); + } + + public void testBufferedFinish2() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b", "m n o", false); + add(b, "d e", "m n o", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "c a d", + new String[] {"c", "a", "d"}, + new int[] { 0, 2, 4}, + new int[] { 1, 3, 5}, + new String[] {"word", "word", "word"}, + new int[] { 1, 1, 1}, + new int[] { 1, 1, 1}); + a.close(); + } + + public void testCanReuse() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b", "x", true); + Analyzer a = getAnalyzer(b, true); + for(int i=0;i<10;i++) { + assertAnalyzesTo(a, + "c a b", + new String[] {"c", "x", "a", "b"}, + new int[] { 0, 2, 2, 4}, + new int[] { 1, 5, 3, 5}, + new String[] {"word", "SYNONYM", "word", "word"}, + new int[] { 1, 1, 0, 1}, + new int[] { 1, 2, 1, 1}); + } + a.close(); + } + + /** Multiple input tokens map to a single output token */ + public void testManyToOne() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b c", "z", true); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "a b c d", + new String[] {"z", "a", "b", "c", "d"}, + new int[] { 0, 0, 2, 4, 6}, + new int[] { 5, 1, 3, 5, 7}, + new String[] {"SYNONYM", "word", "word", "word", "word"}, + new int[] { 1, 0, 1, 1, 1}, + new int[] { 3, 1, 1, 1, 1}); + a.close(); + } + + public void testBufferAfterMatch() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "a b c d", "x", true); + add(b, "a b", "y", false); + + // The 'c' token has to be buffered because SynGraphFilter + // needs to know whether a b c d -> x matches: + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, + "f a b c e", + new String[] {"f", "y", "c", "e"}, + new int[] { 0, 2, 6, 8}, + new int[] { 1, 5, 7, 9}, + new String[] {"word", "SYNONYM", "word", "word"}, + new int[] { 1, 1, 1, 1}, + new int[] { 1, 1, 1, 1}); + a.close(); + } + + public void testZeroSyns() throws Exception { + Tokenizer tokenizer = new MockTokenizer(); + tokenizer.setReader(new StringReader("aa bb")); + try { + new SynonymGraphFilter(tokenizer, new SynonymMap.Builder(true).build(), true); + fail("did not hit expected exception"); + } catch (IllegalArgumentException iae) { + // expected + assertEquals("fst must be non-null", iae.getMessage()); + } + } + + public void testOutputHangsOffEnd() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + final boolean keepOrig = false; + // b hangs off the end (no input token under it): + add(b, "a", "a b", keepOrig); + Analyzer a = getFlattenAnalyzer(b, true); + assertAnalyzesTo(a, "a", + new String[] {"a", "b"}, + new int[] { 0, 0}, + new int[] { 1, 1}, + null, + new int[] { 1, 1}, + new int[] { 1, 1}, + true); + a.close(); + } + + public void testDedup() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + final boolean keepOrig = false; + add(b, "a b", "ab", keepOrig); + add(b, "a b", "ab", keepOrig); + add(b, "a b", "ab", keepOrig); + Analyzer a = getFlattenAnalyzer(b, true); + + assertAnalyzesTo(a, "a b", + new String[]{"ab"}, + new int[]{1}); + a.close(); + } + + public void testNoDedup() throws Exception { + // dedup is false: + SynonymMap.Builder b = new SynonymMap.Builder(false); + final boolean keepOrig = false; + add(b, "a b", "ab", keepOrig); + add(b, "a b", "ab", keepOrig); + add(b, "a b", "ab", keepOrig); + Analyzer a = getFlattenAnalyzer(b, true); + + assertAnalyzesTo(a, "a b", + new String[]{"ab", "ab", "ab"}, + new int[]{1, 0, 0}); + a.close(); + } + + public void testMatching() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + final boolean keepOrig = false; + add(b, "a b", "ab", keepOrig); + add(b, "a c", "ac", keepOrig); + add(b, "a", "aa", keepOrig); + add(b, "b", "bb", keepOrig); + add(b, "z x c v", "zxcv", keepOrig); + add(b, "x c", "xc", keepOrig); + + Analyzer a = getFlattenAnalyzer(b, true); + + checkOneTerm(a, "$", "$"); + checkOneTerm(a, "a", "aa"); + checkOneTerm(a, "b", "bb"); + + assertAnalyzesTo(a, "a $", + new String[]{"aa", "$"}, + new int[]{1, 1}); + + assertAnalyzesTo(a, "$ a", + new String[]{"$", "aa"}, + new int[]{1, 1}); + + assertAnalyzesTo(a, "a a", + new String[]{"aa", "aa"}, + new int[]{1, 1}); + + assertAnalyzesTo(a, "z x c v", + new String[]{"zxcv"}, + new int[]{1}); + + assertAnalyzesTo(a, "z x c $", + new String[]{"z", "xc", "$"}, + new int[]{1, 1, 1}); + a.close(); + } + + public void testBasic1() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a", "foo", true); + add(b, "a b", "bar fee", true); + add(b, "b c", "dog collar", true); + add(b, "c d", "dog harness holder extras", true); + add(b, "m c e", "dog barks loudly", false); + add(b, "i j k", "feep", true); + + add(b, "e f", "foo bar", false); + add(b, "e f", "baz bee", false); + + add(b, "z", "boo", false); + add(b, "y", "bee", true); + Analyzer a = getFlattenAnalyzer(b, true); + + assertAnalyzesTo(a, "a b c", + new String[] {"bar", "a", "fee", "b", "c"}, + new int[] {1, 0, 1, 0, 1}); + + assertAnalyzesTo(a, "x a b c d", + new String[] {"x", "bar", "a", "fee", "b", "dog", "c", "harness", "d", "holder", "extras"}, + new int[] {1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1}); + + assertAnalyzesTo(a, "a b a", + new String[] {"bar", "a", "fee", "b", "foo", "a"}, + new int[] {1, 0, 1, 0, 1, 0}); + + // outputs no longer add to one another: + assertAnalyzesTo(a, "c d c d", + new String[] {"dog", "c", "harness", "d", "holder", "extras", "dog", "c", "harness", "d", "holder", "extras"}, + new int[] {1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1}); + + // two outputs for same input + assertAnalyzesTo(a, "e f", + new String[] {"foo", "baz", "bar", "bee"}, + new int[] {1, 0, 1, 0}); + + // verify multi-word / single-output offsets: + assertAnalyzesTo(a, "g i j k g", + new String[] {"g", "feep", "i", "j", "k", "g"}, + new int[] {1, 1, 0, 1, 1, 1}); + + // mixed keepOrig true/false: + assertAnalyzesTo(a, "a m c e x", + new String[] {"foo", "a", "dog", "barks", "loudly", "x"}, + new int[] {1, 0, 1, 1, 1, 1}); + assertAnalyzesTo(a, "c d m c e x", + new String[] {"dog", "c", "harness", "d", "holder", "extras", "dog", "barks", "loudly","x"}, + new int[] {1, 0, 1, 0, 1, 1, 1, 1, 1, 1}); + assertTrue(synFilter.getCaptureCount() > 0); + + // no captureStates when no syns matched + assertAnalyzesTo(a, "p q r s t", + new String[] {"p", "q", "r", "s", "t"}, + new int[] {1, 1, 1, 1, 1}); + assertEquals(0, synFilter.getCaptureCount()); + + // captureStates are necessary for the single-token syn case: + assertAnalyzesTo(a, "p q z y t", + new String[] {"p", "q", "boo", "bee", "y", "t"}, + new int[] {1, 1, 1, 1, 0, 1}); + assertTrue(synFilter.getCaptureCount() > 0); + } + + public void testBasic2() throws Exception { + boolean keepOrig = true; + do { + keepOrig = !keepOrig; + + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b,"aaa", "aaaa1 aaaa2 aaaa3", keepOrig); + add(b, "bbb", "bbbb1 bbbb2", keepOrig); + Analyzer a = getFlattenAnalyzer(b, true); + + if (keepOrig) { + assertAnalyzesTo(a, "xyzzy bbb pot of gold", + new String[] {"xyzzy", "bbbb1", "bbb", "bbbb2", "pot", "of", "gold"}, + new int[] {1, 1, 0, 1, 1, 1, 1}); + assertAnalyzesTo(a, "xyzzy aaa pot of gold", + new String[] {"xyzzy", "aaaa1", "aaa", "aaaa2", "aaaa2", "pot", "of", "gold"}, + new int[] {1, 1, 0, 1, 1, 1, 1, 1}); + } else { + assertAnalyzesTo(a, "xyzzy bbb pot of gold", + new String[] {"xyzzy", "bbbb1", "bbbb2", "pot", "of", "gold"}, + new int[] {1, 1, 1, 1, 1, 1}); + assertAnalyzesTo(a, "xyzzy aaa pot of gold", + new String[] {"xyzzy", "aaaa1", "aaaa2", "aaaa3", "pot", "of", "gold"}, + new int[] {1, 1, 1, 1, 1, 1, 1}); + } + } while (keepOrig); + } + + /** If we expand synonyms during indexing, it's a bit better than + * SynonymFilter is today, but still necessarily has false + * positive and negative PhraseQuery matches because we do not + * index posLength, so we lose information. */ + public void testFlattenedGraph() throws Exception { + + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "wtf", "what the fudge", true); + + Analyzer a = getFlattenAnalyzer(b, true); + + assertAnalyzesTo(a, "wtf happened", + new String[] {"what", "wtf", "the", "fudge", "happened"}, + new int[] { 0, 0, 0, 0, 4}, + new int[] { 3, 3, 3, 3, 12}, + null, + new int[] { 1, 0, 1, 1, 1}, + new int[] { 1, 3, 1, 1, 1}, + true); + + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, a); + Document doc = new Document(); + doc.add(newTextField("field", "wtf happened", Field.Store.NO)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + IndexSearcher s = newSearcher(r); + + // Good (this should not match, and doesn't): + assertEquals(0, s.count(new PhraseQuery("field", "what", "happened"))); + + // Bad (this should match, but doesn't): + assertEquals(0, s.count(new PhraseQuery("field", "wtf", "happened"))); + + // Good (this should match, and does): + assertEquals(1, s.count(new PhraseQuery("field", "what", "the", "fudge", "happened"))); + + // Bad (this should not match, but does): + assertEquals(1, s.count(new PhraseQuery("field", "wtf", "the"))); + + IOUtils.close(r, dir); + } + + // Needs TermAutomatonQuery, which is in sandbox still: + /* + public void testAccurateGraphQuery1() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(newTextField("field", "wtf happened", Field.Store.NO)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + IndexSearcher s = newSearcher(r); + + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "what the fudge", "wtf", true); + + SynonymMap map = b.build(); + + TokenStreamToTermAutomatonQuery ts2q = new TokenStreamToTermAutomatonQuery(); + + TokenStream in = new CannedTokenStream(0, 23, new Token[] { + token("what", 1, 1, 0, 4), + token("the", 1, 1, 5, 8), + token("fudge", 1, 1, 9, 14), + token("happened", 1, 1, 15, 23), + }); + + assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true)))); + + in = new CannedTokenStream(0, 12, new Token[] { + token("wtf", 1, 1, 0, 3), + token("happened", 1, 1, 4, 12), + }); + + assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true)))); + + // "what happened" should NOT match: + in = new CannedTokenStream(0, 13, new Token[] { + token("what", 1, 1, 0, 4), + token("happened", 1, 1, 5, 13), + }); + assertEquals(0, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true)))); + + IOUtils.close(r, dir); + } + */ + + /** If we expand synonyms at search time, the results are correct. */ + // Needs TermAutomatonQuery, which is in sandbox still: + /* + public void testAccurateGraphQuery2() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(newTextField("field", "say wtf happened", Field.Store.NO)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + IndexSearcher s = newSearcher(r); + + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "what the fudge", "wtf", true); + + SynonymMap map = b.build(); + + TokenStream in = new CannedTokenStream(0, 26, new Token[] { + token("say", 1, 1, 0, 3), + token("what", 1, 1, 3, 7), + token("the", 1, 1, 8, 11), + token("fudge", 1, 1, 12, 17), + token("happened", 1, 1, 18, 26), + }); + + TokenStreamToTermAutomatonQuery ts2q = new TokenStreamToTermAutomatonQuery(); + + assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true)))); + + // "what happened" should NOT match: + in = new CannedTokenStream(0, 13, new Token[] { + token("what", 1, 1, 0, 4), + token("happened", 1, 1, 5, 13), + }); + assertEquals(0, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true)))); + + IOUtils.close(r, dir); + } + */ + + // Needs TermAutomatonQuery, which is in sandbox still: + /* + public void testAccurateGraphQuery3() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(newTextField("field", "say what the fudge happened", Field.Store.NO)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + IndexSearcher s = newSearcher(r); + + SynonymMap.Builder b = new SynonymMap.Builder(); + add(b, "wtf", "what the fudge", true); + + SynonymMap map = b.build(); + + TokenStream in = new CannedTokenStream(0, 15, new Token[] { + token("say", 1, 1, 0, 3), + token("wtf", 1, 1, 3, 6), + token("happened", 1, 1, 7, 15), + }); + + TokenStreamToTermAutomatonQuery ts2q = new TokenStreamToTermAutomatonQuery(); + + assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true)))); + + // "what happened" should NOT match: + in = new CannedTokenStream(0, 13, new Token[] { + token("what", 1, 1, 0, 4), + token("happened", 1, 1, 5, 13), + }); + assertEquals(0, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true)))); + + IOUtils.close(r, dir); + } + + private static Token token(String term, int posInc, int posLength, int startOffset, int endOffset) { + final Token t = new Token(term, startOffset, endOffset); + t.setPositionIncrement(posInc); + t.setPositionLength(posLength); + return t; + } + */ + + private String randomNonEmptyString() { + while(true) { + String s = TestUtil.randomUnicodeString(random()).trim(); + //String s = TestUtil.randomSimpleString(random()).trim(); + if (s.length() != 0 && s.indexOf('\u0000') == -1) { + return s; + } + } + } + + // Adds MockGraphTokenFilter after SynFilter: + public void testRandomGraphAfter() throws Exception { + final int numIters = atLeast(3); + for (int i = 0; i < numIters; i++) { + SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean()); + final int numEntries = atLeast(10); + for (int j = 0; j < numEntries; j++) { + add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean()); + } + final SynonymMap map = b.build(); + final boolean ignoreCase = random().nextBoolean(); + final boolean doFlatten = random().nextBoolean(); + + final Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true); + TokenStream syns = new SynonymGraphFilter(tokenizer, map, ignoreCase); + TokenStream graph = new MockGraphTokenFilter(random(), syns); + if (doFlatten) { + graph = new FlattenGraphFilter(graph); + } + return new TokenStreamComponents(tokenizer, graph); + } + }; + + checkRandomData(random(), analyzer, 100); + analyzer.close(); + } + } + + public void testEmptyStringInput() throws IOException { + final int numIters = atLeast(10); + for (int i = 0; i < numIters; i++) { + SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean()); + final int numEntries = atLeast(10); + for (int j = 0; j < numEntries; j++) { + add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean()); + } + final boolean ignoreCase = random().nextBoolean(); + + Analyzer analyzer = getAnalyzer(b, ignoreCase); + + checkAnalysisConsistency(random(), analyzer, random().nextBoolean(), ""); + analyzer.close(); + } + } + + /** simple random test, doesn't verify correctness. + * does verify it doesnt throw exceptions, or that the stream doesn't misbehave + */ + public void testRandom2() throws Exception { + final int numIters = atLeast(3); + for (int i = 0; i < numIters; i++) { + SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean()); + final int numEntries = atLeast(10); + for (int j = 0; j < numEntries; j++) { + add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean()); + } + final boolean ignoreCase = random().nextBoolean(); + final boolean doFlatten = random().nextBoolean(); + + Analyzer analyzer; + if (doFlatten) { + analyzer = getFlattenAnalyzer(b, ignoreCase); + } else { + analyzer = getAnalyzer(b, ignoreCase); + } + + checkRandomData(random(), analyzer, 100); + analyzer.close(); + } + } + + /** simple random test like testRandom2, but for larger docs + */ + public void testRandomHuge() throws Exception { + final int numIters = atLeast(3); + for (int i = 0; i < numIters; i++) { + SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean()); + final int numEntries = atLeast(10); + if (VERBOSE) { + System.out.println("TEST: iter=" + i + " numEntries=" + numEntries); + } + for (int j = 0; j < numEntries; j++) { + add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean()); + } + final boolean ignoreCase = random().nextBoolean(); + final boolean doFlatten = random().nextBoolean(); + + Analyzer analyzer; + if (doFlatten) { + analyzer = getFlattenAnalyzer(b, ignoreCase); + } else { + analyzer = getAnalyzer(b, ignoreCase); + } + + checkRandomData(random(), analyzer, 100, 1024); + analyzer.close(); + } + } + + public void testEmptyTerm() throws IOException { + final int numIters = atLeast(10); + for (int i = 0; i < numIters; i++) { + SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean()); + final int numEntries = atLeast(10); + for (int j = 0; j < numEntries; j++) { + add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean()); + } + final boolean ignoreCase = random().nextBoolean(); + + final Analyzer analyzer = getAnalyzer(b, ignoreCase); + + checkAnalysisConsistency(random(), analyzer, random().nextBoolean(), ""); + analyzer.close(); + } + } + + // LUCENE-3375 + public void testVanishingTermsNoFlatten() throws Exception { + String testFile = + "aaa => aaaa1 aaaa2 aaaa3\n" + + "bbb => bbbb1 bbbb2\n"; + Analyzer analyzer = solrSynsToAnalyzer(testFile); + + assertAnalyzesTo(analyzer, "xyzzy bbb pot of gold", + new String[] { "xyzzy", "bbbb1", "bbbb2", "pot", "of", "gold" }); + + // xyzzy aaa pot of gold -> xyzzy aaaa1 aaaa2 aaaa3 gold + assertAnalyzesTo(analyzer, "xyzzy aaa pot of gold", + new String[] { "xyzzy", "aaaa1", "aaaa2", "aaaa3", "pot", "of", "gold" }); + analyzer.close(); + } + + // LUCENE-3375 + public void testVanishingTermsWithFlatten() throws Exception { + String testFile = + "aaa => aaaa1 aaaa2 aaaa3\n" + + "bbb => bbbb1 bbbb2\n"; + + Analyzer analyzer = solrSynsToAnalyzer(testFile); + + assertAnalyzesTo(analyzer, "xyzzy bbb pot of gold", + new String[] { "xyzzy", "bbbb1", "bbbb2", "pot", "of", "gold" }); + + // xyzzy aaa pot of gold -> xyzzy aaaa1 aaaa2 aaaa3 gold + assertAnalyzesTo(analyzer, "xyzzy aaa pot of gold", + new String[] { "xyzzy", "aaaa1", "aaaa2", "aaaa3", "pot", "of", "gold" }); + analyzer.close(); + } + + public void testBuilderDedup() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + final boolean keepOrig = false; + add(b, "a b", "ab", keepOrig); + add(b, "a b", "ab", keepOrig); + add(b, "a b", "ab", keepOrig); + Analyzer a = getAnalyzer(b, true); + + assertAnalyzesTo(a, "a b", + new String[] { "ab" }, + new int[] { 1 }); + a.close(); + } + + public void testBuilderNoDedup() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(false); + final boolean keepOrig = false; + add(b, "a b", "ab", keepOrig); + add(b, "a b", "ab", keepOrig); + add(b, "a b", "ab", keepOrig); + Analyzer a = getAnalyzer(b, true); + + assertAnalyzesTo(a, "a b", + new String[] { "ab", "ab", "ab" }, + new int[] { 1, 0, 0 }); + a.close(); + } + + public void testRecursion1() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + final boolean keepOrig = false; + add(b, "zoo", "zoo", keepOrig); + Analyzer a = getAnalyzer(b, true); + + assertAnalyzesTo(a, "zoo zoo $ zoo", + new String[] { "zoo", "zoo", "$", "zoo" }, + new int[] { 1, 1, 1, 1 }); + a.close(); + } + + public void testRecursion2() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + final boolean keepOrig = false; + add(b, "zoo", "zoo", keepOrig); + add(b, "zoo", "zoo zoo", keepOrig); + Analyzer a = getAnalyzer(b, true); + + // verify("zoo zoo $ zoo", "zoo/zoo zoo/zoo/zoo $/zoo zoo/zoo zoo"); + assertAnalyzesTo(a, "zoo zoo $ zoo", + new String[] { "zoo", "zoo", "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo" }, + new int[] { 1, 0, 1, 1, 0, 1, 1, 1, 0, 1 }); + a.close(); + } + + public void testRecursion3() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + final boolean keepOrig = true; + add(b, "zoo zoo", "zoo", keepOrig); + Analyzer a = getFlattenAnalyzer(b, true); + + assertAnalyzesTo(a, "zoo zoo $ zoo", + new String[]{"zoo", "zoo", "zoo", "$", "zoo"}, + new int[]{1, 0, 1, 1, 1}); + a.close(); + } + + public void testRecursion4() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + final boolean keepOrig = true; + add(b, "zoo zoo", "zoo", keepOrig); + add(b, "zoo", "zoo zoo", keepOrig); + Analyzer a = getFlattenAnalyzer(b, true); + assertAnalyzesTo(a, "zoo zoo $ zoo", + new String[]{"zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo"}, + new int[]{1, 0, 1, 1, 1, 0, 1}); + a.close(); + } + + public void testKeepOrig() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + final boolean keepOrig = true; + add(b, "a b", "ab", keepOrig); + add(b, "a c", "ac", keepOrig); + add(b, "a", "aa", keepOrig); + add(b, "b", "bb", keepOrig); + add(b, "z x c v", "zxcv", keepOrig); + add(b, "x c", "xc", keepOrig); + Analyzer a = getAnalyzer(b, true); + + assertAnalyzesTo(a, "$", + new String[] { "$" }, + new int[] { 1 }); + assertAnalyzesTo(a, "a", + new String[] { "aa", "a" }, + new int[] { 1, 0 }); + assertAnalyzesTo(a, "a", + new String[] { "aa", "a" }, + new int[] { 1, 0 }); + assertAnalyzesTo(a, "$ a", + new String[] { "$", "aa", "a" }, + new int[] { 1, 1, 0 }); + assertAnalyzesTo(a, "a $", + new String[] { "aa", "a", "$" }, + new int[] { 1, 0, 1 }); + assertAnalyzesTo(a, "$ a !", + new String[] { "$", "aa", "a", "!" }, + new int[] { 1, 1, 0, 1 }); + assertAnalyzesTo(a, "a a", + new String[] { "aa", "a", "aa", "a" }, + new int[] { 1, 0, 1, 0 }); + assertAnalyzesTo(a, "b", + new String[] { "bb", "b" }, + new int[] { 1, 0 }); + assertAnalyzesTo(a, "z x c v", + new String[] { "zxcv", "z", "x", "c", "v" }, + new int[] { 1, 0, 1, 1, 1 }); + assertAnalyzesTo(a, "z x c $", + new String[] { "z", "xc", "x", "c", "$" }, + new int[] { 1, 1, 0, 1, 1 }); + a.close(); + } + + /** + * verify type of token and positionLengths on synonyms of different word counts, with non preserving, explicit rules. + */ + public void testNonPreservingMultiwordSynonyms() throws Exception { + String testFile = + "aaa => two words\n" + + "bbb => one two, very many multiple words\n" + + "ee ff, gg, h i j k, h i => one\n" + + "cc dd => usa,united states,u s a,united states of america"; + + Analyzer analyzer = solrSynsToAnalyzer(testFile); + + assertAnalyzesTo(analyzer, "aaa", + new String[]{"two", "words"}, + new int[]{0, 0}, + new int[]{3, 3}, + new String[]{"SYNONYM", "SYNONYM"}, + new int[]{1, 1}, + new int[]{1, 1}); + + assertAnalyzesToPositions(analyzer, "amazing aaa", + new String[]{"amazing", "two", "words"}, + new String[]{"word", "SYNONYM", "SYNONYM"}, + new int[]{1, 1, 1}, + new int[]{1, 1, 1}); + + assertAnalyzesTo(analyzer, "p bbb s", + new String[]{"p", "one", "very", "two", "many", "multiple", "words", "s"}, + new int[]{0, 2, 2, 2, 2, 2, 2, 6}, + new int[]{1, 5, 5, 5, 5, 5, 5, 7}, + new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "word"}, + new int[]{1, 1, 0, 1, 0, 1, 1, 1}, + new int[]{1, 1, 1, 3, 1, 1, 1, 1}); + + assertAnalyzesTo(analyzer, "p ee ff s", + new String[]{"p", "one", "s"}, + new int[]{0, 2, 8}, + new int[]{1, 7, 9}, + new String[]{"word", "SYNONYM", "word"}, + new int[]{1, 1, 1}, + new int[]{1, 1, 1}); + + assertAnalyzesTo(analyzer, "p h i j s", + new String[]{"p", "one", "j", "s"}, + new int[]{0, 2, 6, 8}, + new int[]{1, 5, 7, 9}, + new String[]{"word", "SYNONYM", "word", "word"}, + new int[]{1, 1, 1, 1}, + new int[]{1, 1, 1, 1}); + + analyzer.close(); + } + + private Analyzer getAnalyzer(SynonymMap.Builder b, final boolean ignoreCase) throws IOException { + final SynonymMap map = b.build(); + return new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); + // Make a local variable so testRandomHuge doesn't share it across threads! + SynonymGraphFilter synFilter = new SynonymGraphFilter(tokenizer, map, ignoreCase); + TestSynonymGraphFilter.this.flattenFilter = null; + TestSynonymGraphFilter.this.synFilter = synFilter; + return new TokenStreamComponents(tokenizer, synFilter); + } + }; + } + + /** Appends FlattenGraphFilter too */ + private Analyzer getFlattenAnalyzer(SynonymMap.Builder b, boolean ignoreCase) throws IOException { + final SynonymMap map = b.build(); + return new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); + // Make a local variable so testRandomHuge doesn't share it across threads! + SynonymGraphFilter synFilter = new SynonymGraphFilter(tokenizer, map, ignoreCase); + FlattenGraphFilter flattenFilter = new FlattenGraphFilter(synFilter); + TestSynonymGraphFilter.this.synFilter = synFilter; + TestSynonymGraphFilter.this.flattenFilter = flattenFilter; + return new TokenStreamComponents(tokenizer, flattenFilter); + } + }; + } + + private void add(SynonymMap.Builder b, String input, String output, boolean keepOrig) { + if (VERBOSE) { + //System.out.println(" add input=" + input + " output=" + output + " keepOrig=" + keepOrig); + } + CharsRefBuilder inputCharsRef = new CharsRefBuilder(); + SynonymMap.Builder.join(input.split(" +"), inputCharsRef); + + CharsRefBuilder outputCharsRef = new CharsRefBuilder(); + SynonymMap.Builder.join(output.split(" +"), outputCharsRef); + + b.add(inputCharsRef.get(), outputCharsRef.get(), keepOrig); + } + + private char[] randomBinaryChars(int minLen, int maxLen, double bias, char base) { + int len = TestUtil.nextInt(random(), minLen, maxLen); + char[] chars = new char[len]; + for(int i=0;i 0) { + b.append(' '); + } + b.append(c); + } + return b.toString(); + } + + private static class OneSyn { + char[] in; + char[] out; + boolean keepOrig; + + @Override + public String toString() { + return toTokenString(in) + " --> " + toTokenString(out) + " (keepOrig=" + keepOrig + ")"; + } + } + + public void testRandomSyns() throws Exception { + int synCount = atLeast(10); + double bias = random().nextDouble(); + boolean dedup = random().nextBoolean(); + + boolean flatten = random().nextBoolean(); + + SynonymMap.Builder b = new SynonymMap.Builder(dedup); + List syns = new ArrayList<>(); + // Makes random syns from random a / b tokens, mapping to random x / y tokens + if (VERBOSE) { + System.out.println("TEST: make " + synCount + " syns"); + System.out.println(" bias for a over b=" + bias); + System.out.println(" dedup=" + dedup); + System.out.println(" flatten=" + flatten); + } + + int maxSynLength = 0; + + for(int i=0;i states = new HashSet<>(); + states.add(0); + Transition t = new Transition(); + for(int i=0;i nextStates = new HashSet<>(); + for(int state : states) { + int count = a.initTransition(state, t); + for(int j=0;j= t.min && digit <= t.max) { + nextStates.add(t.dest); + } + } + } + states = nextStates; + if (states.isEmpty()) { + return false; + } + } + + for(int state : states) { + if (a.isAccept(state)) { + return true; + } + } + + return false; + } + + /** Stupid, slow brute-force, yet hopefully bug-free, synonym filter. */ + private Automaton slowSynFilter(String doc, List syns, boolean flatten) { + String[] tokens = doc.split(" +"); + if (VERBOSE) { + System.out.println(" doc has " + tokens.length + " tokens"); + } + int i=0; + Automaton.Builder a = new Automaton.Builder(); + int lastState = a.createState(); + while (i flatStates; + if (flatten) { + flatStates = new ArrayList<>(); + } else { + flatStates = null; + } + + if (keepOrig) { + // Add path for the original tokens + addSidePath(a, lastState, nextState, matches.get(0).in, flatStates); + } + + for(OneSyn syn : matches) { + addSidePath(a, lastState, nextState, syn.out, flatStates); + } + + i += matches.get(0).in.length; + } else { + a.addTransition(lastState, nextState, tokens[i].charAt(0)); + i++; + } + + lastState = nextState; + } + + a.setAccept(lastState, true); + + return topoSort(a.finish()); + } + + /** Just creates a side path from startState to endState with the provided tokens. */ + private static void addSidePath(Automaton.Builder a, int startState, int endState, char[] tokens, List flatStates) { + int lastState = startState; + for(int i=0;i= flatStates.size()) { + nextState = a.createState(); + if (flatStates != null) { + assert i == flatStates.size(); + flatStates.add(nextState); + } + } else { + nextState = flatStates.get(i); + } + a.addTransition(lastState, nextState, tokens[i]); + + lastState = nextState; + } + } + + private Automaton toAutomaton(TokenStream ts) throws IOException { + PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); + PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + ts.reset(); + Automaton a = new Automaton(); + int srcNode = -1; + int destNode = -1; + int state = a.createState(); + while (ts.incrementToken()) { + assert termAtt.length() == 1; + char c = termAtt.charAt(0); + int posInc = posIncAtt.getPositionIncrement(); + if (posInc != 0) { + srcNode += posInc; + while (state < srcNode) { + state = a.createState(); + } + } + destNode = srcNode + posLenAtt.getPositionLength(); + while (state < destNode) { + state = a.createState(); + } + a.addTransition(srcNode, destNode, c); + } + ts.end(); + ts.close(); + a.finishState(); + a.setAccept(destNode, true); + return a; + } + + /* + private String toDot(TokenStream ts) throws IOException { + PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); + PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class); + ts.reset(); + int srcNode = -1; + int destNode = -1; + + StringBuilder b = new StringBuilder(); + b.append("digraph Automaton {\n"); + b.append(" rankdir = LR\n"); + b.append(" node [width=0.2, height=0.2, fontsize=8]\n"); + b.append(" initial [shape=plaintext,label=\"\"]\n"); + b.append(" initial -> 0\n"); + + while (ts.incrementToken()) { + int posInc = posIncAtt.getPositionIncrement(); + if (posInc != 0) { + srcNode += posInc; + b.append(" "); + b.append(srcNode); + b.append(" [shape=circle,label=\"" + srcNode + "\"]\n"); + } + destNode = srcNode + posLenAtt.getPositionLength(); + b.append(" "); + b.append(srcNode); + b.append(" -> "); + b.append(destNode); + b.append(" [label=\""); + b.append(termAtt); + b.append("\""); + if (typeAtt.type().equals("word") == false) { + b.append(" color=red"); + } + b.append("]\n"); + } + ts.end(); + ts.close(); + + b.append('}'); + return b.toString(); + } + */ + + /** Renumbers nodes according to their topo sort */ + private Automaton topoSort(Automaton in) { + int[] newToOld = Operations.topoSortStates(in); + int[] oldToNew = new int[newToOld.length]; + + Automaton.Builder a = new Automaton.Builder(); + //System.out.println("remap:"); + for(int i=0;i " + i); + if (in.isAccept(newToOld[i])) { + a.setAccept(i, true); + //System.out.println(" **"); + } + } + + Transition t = new Transition(); + for(int i=0;i>> 1; + + final int[] synonymsIdxs = new int[count]; + for (int i = 0; i < count; i++) { + synonymsIdxs[i] = bytesReader.readVInt(); + } + + BytesRef scratchBytes = new BytesRef(); + map.words.get(synonymsIdxs[2], scratchBytes); + + int synonymLength = 1; + for (int i = scratchBytes.offset; i < scratchBytes.offset + scratchBytes.length; i++) { + if (scratchBytes.bytes[i] == SynonymMap.WORD_SEPARATOR) { + synonymLength++; + } + } + + assertEquals(count, 3); + assertEquals(synonymLength, 4); + + assertAnalyzesTo(analyzer, "spider man", + new String[]{"spiderman", "spider", "man"}, + new int[]{0, 0, 7}, + new int[]{10, 6, 10}, + new String[]{"SYNONYM", "word", "word"}, + new int[]{1, 0, 1}, + new int[]{2, 1, 1}); + + assertAnalyzesToPositions(analyzer, "amazing spider man", + new String[]{"amazing", "spiderman", "spider", "man"}, + new String[]{"word", "SYNONYM", "word", "word"}, + new int[]{1, 1, 0, 1}, + new int[]{1, 2, 1, 1}); + + // System.out.println(toDot(getAnalyzer(parser, true).tokenStream("field", new StringReader("the usa is wealthy")))); + + assertAnalyzesTo(analyzer, "the united states of america is wealthy", + new String[]{"the", "usa", "united", "u", "united", "states", "s", "states", "a", "of", "america", "is", "wealthy"}, + new int[] {0, 4, 4, 4, 4, 11, 11, 11, 18, 18, 21, 29, 32}, + new int[] {3, 28, 10, 10, 10, 28, 17, 17, 28, 20, 28, 31, 39}, + new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "word", "SYNONYM", "word", "word", "word", "word"}, + new int[] {1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1}, + new int[] {1, 4, 1, 1, 1, 3, 1, 1, 2, 1, 1, 1, 1}); + + assertAnalyzesToPositions(analyzer, "spiderman", + new String[]{"spider", "spiderman", "man"}, + new String[]{"SYNONYM", "word", "SYNONYM"}, + new int[]{1, 0, 1}, + new int[]{1, 2, 1}); + + assertAnalyzesTo(analyzer, "spiderman enemies", + new String[]{"spider", "spiderman", "man", "enemies"}, + new int[]{0, 0, 0, 10}, + new int[]{9, 9, 9, 17}, + new String[]{"SYNONYM", "word", "SYNONYM", "word"}, + new int[]{1, 0, 1, 1}, + new int[]{1, 2, 1, 1}); + + assertAnalyzesTo(analyzer, "the usa is wealthy", + new String[]{"the", "united", "u", "united", "usa", "states", "s", "states", "a", "of", "america", "is", "wealthy"}, + new int[] {0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 11}, + new int[] {3, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 18}, + new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "word", "word"}, + new int[] {1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1}, + new int[] {1, 1, 1, 1, 4, 3, 1, 1, 2, 1, 1, 1, 1}); + + assertAllStrings(analyzer, "the usa is wealthy", new String[] { + "the usa is wealthy", + "the united states is wealthy", + "the u s a is wealthy", + "the united states of america is wealthy", + // Wrong. Here only due to "sausagization" of the multi word synonyms. + "the u states is wealthy", + "the u states a is wealthy", + "the u s of america is wealthy", + "the u states of america is wealthy", + "the united s a is wealthy", + "the united states a is wealthy", + "the united s of america is wealthy"}); + + assertAnalyzesTo(analyzer, "the united states is wealthy", + new String[]{"the", "usa", "u", "united", "united", "s", "states", "states", "a", "of", "america", "is", "wealthy"}, + new int[] {0, 4, 4, 4, 4, 11, 11, 11, 11, 11, 11, 18, 21}, + new int[] {3, 17, 10, 10, 10, 17, 17, 17, 17, 17, 17, 20, 28}, + new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "word"}, + new int[] {1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1}, + new int[] {1, 4, 1, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1}, + false); + + assertAnalyzesTo(analyzer, "the united states of balance", + new String[]{"the", "usa", "u", "united", "united", "s", "states", "states", "a", "of", "america", "of", "balance"}, + new int[] {0, 4, 4, 4, 4, 11, 11, 11, 11, 11, 11, 18, 21}, + new int[] {3, 17, 10, 10, 10, 17, 17, 17, 17, 17, 17, 20, 28}, + new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "word"}, + new int[] {1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1}, + new int[] {1, 4, 1, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1}); + + analyzer.close(); + } + + public void testMultiwordOffsets() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + final boolean keepOrig = true; + add(b, "national hockey league", "nhl", keepOrig); + Analyzer a = getFlattenAnalyzer(b, true); + + assertAnalyzesTo(a, "national hockey league", + new String[]{"nhl", "national", "hockey", "league"}, + new int[]{0, 0, 9, 16}, + new int[]{22, 8, 15, 22}, + new int[]{1, 0, 1, 1}); + a.close(); + } + + public void testIncludeOrig() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + final boolean keepOrig = true; + add(b, "a b", "ab", keepOrig); + add(b, "a c", "ac", keepOrig); + add(b, "a", "aa", keepOrig); + add(b, "b", "bb", keepOrig); + add(b, "z x c v", "zxcv", keepOrig); + add(b, "x c", "xc", keepOrig); + + Analyzer a = getFlattenAnalyzer(b, true); + + assertAnalyzesTo(a, "$", + new String[]{"$"}, + new int[]{1}); + assertAnalyzesTo(a, "a", + new String[]{"aa", "a"}, + new int[]{1, 0}); + assertAnalyzesTo(a, "a", + new String[]{"aa", "a"}, + new int[]{1, 0}); + assertAnalyzesTo(a, "$ a", + new String[]{"$", "aa", "a"}, + new int[]{1, 1, 0}); + assertAnalyzesTo(a, "a $", + new String[]{"aa", "a", "$"}, + new int[]{1, 0, 1}); + assertAnalyzesTo(a, "$ a !", + new String[]{"$", "aa", "a", "!"}, + new int[]{1, 1, 0, 1}); + assertAnalyzesTo(a, "a a", + new String[]{"aa", "a", "aa", "a"}, + new int[]{1, 0, 1, 0}); + assertAnalyzesTo(a, "b", + new String[]{"bb", "b"}, + new int[]{1, 0}); + assertAnalyzesTo(a, "z x c v", + new String[]{"zxcv", "z", "x", "c", "v"}, + new int[]{1, 0, 1, 1, 1}); + assertAnalyzesTo(a, "z x c $", + new String[]{"z", "xc", "x", "c", "$"}, + new int[]{1, 1, 0, 1, 1}); + a.close(); + } + + /** + * Helper method to validate all strings that can be generated from a token stream. + * Uses {@link TokenStreamToAutomaton} to create an automaton. Asserts the finite strings of the automaton are all + * and only the given valid strings. + * @param analyzer analyzer containing the SynonymFilter under test. + * @param text text to be analyzed. + * @param expectedStrings all expected finite strings. + */ + public void assertAllStrings(Analyzer analyzer, String text, String[] expectedStrings) throws IOException { + TokenStream tokenStream = analyzer.tokenStream("dummy", text); + try { + Automaton automaton = new TokenStreamToAutomaton().toAutomaton(tokenStream); + Set finiteStrings = AutomatonTestUtil.getFiniteStringsRecursive(automaton, -1); + + assertEquals("Invalid resulting strings count. Expected " + expectedStrings.length + " was " + finiteStrings.size(), + expectedStrings.length, finiteStrings.size()); + + Set expectedStringsSet = new HashSet<>(Arrays.asList(expectedStrings)); + + BytesRefBuilder scratchBytesRefBuilder = new BytesRefBuilder(); + for (IntsRef ir: finiteStrings) { + String s = Util.toBytesRef(ir, scratchBytesRefBuilder).utf8ToString().replace((char) TokenStreamToAutomaton.POS_SEP, ' '); + assertTrue("Unexpected string found: " + s, expectedStringsSet.contains(s)); + } + } finally { + tokenStream.close(); + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java index 0dd449c9961..e4a5bd912bd 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java @@ -579,14 +579,15 @@ public class Automaton implements Accountable { /** Returns the dot (graphviz) representation of this automaton. * This is extremely useful for visualizing the automaton. */ public String toDot() { - // TODO: breadth first search so we can see get layered output... + // TODO: breadth first search so we can get layered output... StringBuilder b = new StringBuilder(); b.append("digraph Automaton {\n"); b.append(" rankdir = LR\n"); + b.append(" node [width=0.2, height=0.2, fontsize=8]\n"); final int numStates = getNumStates(); if (numStates > 0) { - b.append(" initial [shape=plaintext,label=\"0\"]\n"); + b.append(" initial [shape=plaintext,label=\"\"]\n"); b.append(" initial -> 0\n"); } diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java index eedb5336624..718a9089ce2 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java @@ -370,10 +370,8 @@ final public class Operations { } /** Returns true if these two automata accept exactly the - * same language. This is a costly computation! Note - * also that a1 and a2 will be determinized as a side - * effect. Both automata must be determinized and have - * no dead states! */ + * same language. This is a costly computation! Both automata + * must be determinized and have no dead states! */ public static boolean sameLanguage(Automaton a1, Automaton a2) { if (a1 == a2) { return true; diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/StatePair.java b/lucene/core/src/java/org/apache/lucene/util/automaton/StatePair.java index 4ce81ab35a9..7be9339914d 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/StatePair.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/StatePair.java @@ -79,7 +79,9 @@ public class StatePair { */ @Override public int hashCode() { - return s1 ^ s2; + // Don't use s1 ^ s2 since it's vulnerable to the case where s1 == s2 always --> hashCode = 0, e.g. if you call Operations.sameLanguage, + // passing the same automaton against itself: + return s1 * 31 + s2; } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java index 5fd2fef189c..924756e5ce0 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java @@ -185,22 +185,22 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { assertEquals("term "+i, output[i], termAtt.toString()); if (startOffsets != null) { - assertEquals("startOffset "+i, startOffsets[i], offsetAtt.startOffset()); + assertEquals("startOffset " + i + " term=" + termAtt, startOffsets[i], offsetAtt.startOffset()); } if (endOffsets != null) { - assertEquals("endOffset "+i, endOffsets[i], offsetAtt.endOffset()); + assertEquals("endOffset " + i + " term=" + termAtt, endOffsets[i], offsetAtt.endOffset()); } if (types != null) { - assertEquals("type "+i, types[i], typeAtt.type()); + assertEquals("type " + i + " term=" + termAtt, types[i], typeAtt.type()); } if (posIncrements != null) { - assertEquals("posIncrement "+i, posIncrements[i], posIncrAtt.getPositionIncrement()); + assertEquals("posIncrement " + i + " term=" + termAtt, posIncrements[i], posIncrAtt.getPositionIncrement()); } if (posLengths != null) { - assertEquals("posLength "+i, posLengths[i], posLengthAtt.getPositionLength()); + assertEquals("posLength " + i + " term=" + termAtt, posLengths[i], posLengthAtt.getPositionLength()); } if (keywordAtts != null) { - assertEquals("keywordAtt " + i, keywordAtts[i], keywordAtt.isKeyword()); + assertEquals("keywordAtt " + i + " term=" + termAtt, keywordAtts[i], keywordAtt.isKeyword()); } // we can enforce some basic things about a few attributes even if the caller doesn't check: @@ -208,13 +208,13 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { final int startOffset = offsetAtt.startOffset(); final int endOffset = offsetAtt.endOffset(); if (finalOffset != null) { - assertTrue("startOffset must be <= finalOffset", startOffset <= finalOffset.intValue()); - assertTrue("endOffset must be <= finalOffset: got endOffset=" + endOffset + " vs finalOffset=" + finalOffset.intValue(), + assertTrue("startOffset (= " + startOffset + ") must be <= finalOffset (= " + finalOffset + ") term=" + termAtt, startOffset <= finalOffset.intValue()); + assertTrue("endOffset must be <= finalOffset: got endOffset=" + endOffset + " vs finalOffset=" + finalOffset.intValue() + " term=" + termAtt, endOffset <= finalOffset.intValue()); } if (offsetsAreCorrect) { - assertTrue("offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset, offsetAtt.startOffset() >= lastStartOffset); + assertTrue("offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset + " term=" + termAtt, offsetAtt.startOffset() >= lastStartOffset); lastStartOffset = offsetAtt.startOffset(); } @@ -236,7 +236,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { // We've seen a token leaving from this position // before; verify the startOffset is the same: //System.out.println(" + vs " + pos + " -> " + startOffset); - assertEquals("pos=" + pos + " posLen=" + posLength + " token=" + termAtt, posToStartOffset.get(pos).intValue(), startOffset); + assertEquals(i + " inconsistent startOffset: pos=" + pos + " posLen=" + posLength + " token=" + termAtt, posToStartOffset.get(pos).intValue(), startOffset); } final int endPos = pos + posLength; @@ -249,7 +249,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { // We've seen a token arriving to this position // before; verify the endOffset is the same: //System.out.println(" + ve " + endPos + " -> " + endOffset); - assertEquals("pos=" + pos + " posLen=" + posLength + " token=" + termAtt, posToEndOffset.get(endPos).intValue(), endOffset); + assertEquals("inconsistent endOffset " + i + " pos=" + pos + " posLen=" + posLength + " token=" + termAtt, posToEndOffset.get(endPos).intValue(), endOffset); } } } @@ -351,16 +351,19 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException { checkResetException(a, input); + checkAnalysisConsistency(random(), a, true, input); assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, null, input.length()); } public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[]) throws IOException { checkResetException(a, input); + checkAnalysisConsistency(random(), a, true, input); assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length()); } public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], boolean offsetsAreCorrect) throws IOException { checkResetException(a, input); + checkAnalysisConsistency(random(), a, true, input, offsetsAreCorrect); assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length(), offsetsAreCorrect); } @@ -379,6 +382,10 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { public static void assertAnalyzesToPositions(Analyzer a, String input, String[] output, int[] posIncrements, int[] posLengths) throws IOException { assertAnalyzesTo(a, input, output, null, null, null, posIncrements, posLengths); } + + public static void assertAnalyzesToPositions(Analyzer a, String input, String[] output, String[] types, int[] posIncrements, int[] posLengths) throws IOException { + assertAnalyzesTo(a, input, output, null, null, types, posIncrements, posLengths); + } public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[]) throws IOException { assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, null, null); @@ -599,7 +606,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { try { for (int i = 0; i < iterations; i++) { String text; - + if (random.nextInt(10) == 7) { // real data from linedocs text = docs.nextDoc().get("body"); @@ -623,7 +630,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { // synthetic text = TestUtil.randomAnalysisString(random, maxWordLength, simple); } - + try { checkAnalysisConsistency(random, a, useCharFilter, text, offsetsAreCorrect, currentField); if (iw != null) { @@ -769,7 +776,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { } catch (IllegalStateException ise) { // Catch & ignore MockTokenizer's // anger... - if ("end() called before incrementToken() returned false!".equals(ise.getMessage())) { + if (ise.getMessage().contains("end() called in wrong state=")) { // OK } else { throw ise; @@ -794,7 +801,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { } catch (IllegalStateException ise) { // Catch & ignore MockTokenizer's // anger... - if ("end() called before incrementToken() returned false!".equals(ise.getMessage())) { + if (ise.getMessage().contains("end() called in wrong state=")) { // OK } else { throw ise; diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java index 62567219e4c..76b71c122e7 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java @@ -103,6 +103,7 @@ public class MockTokenizer extends Tokenizer { public MockTokenizer(CharacterRunAutomaton runAutomaton, boolean lowerCase) { this(runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH); } + /** Calls {@link #MockTokenizer(CharacterRunAutomaton, boolean) MockTokenizer(Reader, WHITESPACE, true)} */ public MockTokenizer() { this(WHITESPACE, true); @@ -316,7 +317,7 @@ public class MockTokenizer extends Tokenizer { // some tokenizers, such as limiting tokenizers, call end() before incrementToken() returns false. // these tests should disable this check (in general you should consume the entire stream) if (streamState != State.INCREMENT_FALSE) { - fail("end() called before incrementToken() returned false!"); + fail("end() called in wrong state=" + streamState + "!"); } } finally { streamState = State.END; From fcde04ac733f3e9fc73e140f5f73d0f0af239019 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Thu, 22 Dec 2016 17:23:42 -0500 Subject: [PATCH 21/83] WDF creates broken offsets --- .../miscellaneous/TestWordDelimiterFilter.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java index 580b17e205f..7f35298b9ed 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java @@ -340,7 +340,10 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { new String[] { "abc", "abcdef", "abcdef123456", "def", "123", "123456", "456" }, new int[] { 0, 0, 0, 4, 8, 8, 12 }, new int[] { 3, 7, 15, 7, 11, 15, 15 }, - new int[] { 1, 0, 0, 1, 1, 0, 1 }); + null, + new int[] { 1, 0, 0, 1, 1, 0, 1 }, + null, + false); a.close(); } @@ -361,7 +364,10 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { new String[] { "abc-def-123-456", "abc", "abcdef", "abcdef123456", "def", "123", "123456", "456" }, new int[] { 0, 0, 0, 0, 4, 8, 8, 12 }, new int[] { 15, 3, 7, 15, 7, 11, 15, 15 }, - new int[] { 1, 0, 0, 0, 1, 1, 0, 1 }); + null, + new int[] { 1, 0, 0, 0, 1, 1, 0, 1 }, + null, + false); a.close(); } From f7ea2ae85db39f12709c3341d57efa28a56bf976 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Fri, 23 Dec 2016 12:14:57 +0000 Subject: [PATCH 22/83] LUCENE-7530: extend/add -validate-source-patterns checks for .xml/.template files --- build.xml | 40 +++++++++++------- .../lucene/analysis/common/pom.xml.template | 36 ++++++++-------- .../lucene/analysis/icu/pom.xml.template | 42 +++++++++---------- .../lucene/analysis/kuromoji/pom.xml.template | 38 ++++++++--------- .../analysis/morfologik/pom.xml.template | 38 ++++++++--------- .../lucene/analysis/phonetic/pom.xml.template | 38 ++++++++--------- .../maven/lucene/analysis/pom.xml.template | 36 ++++++++-------- .../lucene/analysis/smartcn/pom.xml.template | 36 ++++++++-------- .../lucene/analysis/stempel/pom.xml.template | 36 ++++++++-------- .../lucene/analysis/uima/pom.xml.template | 36 ++++++++-------- .../lucene/backward-codecs/pom.xml.template | 36 ++++++++-------- .../maven/lucene/benchmark/pom.xml.template | 36 ++++++++-------- .../lucene/classification/pom.xml.template | 36 ++++++++-------- .../maven/lucene/codecs/pom.xml.template | 36 ++++++++-------- .../lucene/codecs/src/java/pom.xml.template | 36 ++++++++-------- .../lucene/codecs/src/test/pom.xml.template | 36 ++++++++-------- dev-tools/maven/lucene/core/pom.xml.template | 36 ++++++++-------- .../lucene/core/src/java/pom.xml.template | 36 ++++++++-------- .../lucene/core/src/test/pom.xml.template | 36 ++++++++-------- dev-tools/maven/lucene/demo/pom.xml.template | 36 ++++++++-------- .../maven/lucene/expressions/pom.xml.template | 36 ++++++++-------- dev-tools/maven/lucene/facet/pom.xml.template | 36 ++++++++-------- .../maven/lucene/grouping/pom.xml.template | 36 ++++++++-------- .../maven/lucene/highlighter/pom.xml.template | 36 ++++++++-------- dev-tools/maven/lucene/join/pom.xml.template | 36 ++++++++-------- .../maven/lucene/memory/pom.xml.template | 36 ++++++++-------- dev-tools/maven/lucene/misc/pom.xml.template | 36 ++++++++-------- dev-tools/maven/lucene/pom.xml.template | 36 ++++++++-------- .../maven/lucene/queries/pom.xml.template | 36 ++++++++-------- .../maven/lucene/queryparser/pom.xml.template | 36 ++++++++-------- .../maven/lucene/replicator/pom.xml.template | 38 ++++++++--------- .../maven/lucene/sandbox/pom.xml.template | 36 ++++++++-------- .../lucene/spatial-extras/pom.xml.template | 36 ++++++++-------- .../maven/lucene/spatial/pom.xml.template | 38 ++++++++--------- .../maven/lucene/spatial3d/pom.xml.template | 36 ++++++++-------- .../maven/lucene/suggest/pom.xml.template | 36 ++++++++-------- .../lucene/test-framework/pom.xml.template | 36 ++++++++-------- dev-tools/maven/pom.xml.template | 36 ++++++++-------- .../contrib/analysis-extras/pom.xml.template | 36 ++++++++-------- .../solr/contrib/analytics/pom.xml.template | 36 ++++++++-------- .../solr/contrib/clustering/pom.xml.template | 36 ++++++++-------- .../dataimporthandler-extras/pom.xml.template | 36 ++++++++-------- .../dataimporthandler/pom.xml.template | 36 ++++++++-------- .../solr/contrib/extraction/pom.xml.template | 36 ++++++++-------- .../solr/contrib/langid/pom.xml.template | 36 ++++++++-------- .../maven/solr/contrib/ltr/pom.xml.template | 36 ++++++++-------- .../solr/contrib/map-reduce/pom.xml.template | 36 ++++++++-------- .../contrib/morphlines-cell/pom.xml.template | 36 ++++++++-------- .../contrib/morphlines-core/pom.xml.template | 36 ++++++++-------- dev-tools/maven/solr/contrib/pom.xml.template | 36 ++++++++-------- .../maven/solr/contrib/uima/pom.xml.template | 36 ++++++++-------- .../solr/contrib/velocity/pom.xml.template | 36 ++++++++-------- dev-tools/maven/solr/core/pom.xml.template | 36 ++++++++-------- .../maven/solr/core/src/java/pom.xml.template | 36 ++++++++-------- .../maven/solr/core/src/test/pom.xml.template | 36 ++++++++-------- dev-tools/maven/solr/pom.xml.template | 36 ++++++++-------- dev-tools/maven/solr/solrj/pom.xml.template | 36 ++++++++-------- .../solr/solrj/src/java/pom.xml.template | 36 ++++++++-------- .../solr/solrj/src/test/pom.xml.template | 36 ++++++++-------- .../solr/test-framework/pom.xml.template | 36 ++++++++-------- 60 files changed, 1095 insertions(+), 1085 deletions(-) diff --git a/build.xml b/build.xml index 0d25615d3af..c06273c83b1 100644 --- a/build.xml +++ b/build.xml @@ -138,6 +138,7 @@ 'java', 'jflex', 'py', 'pl', 'g4', 'jj', 'html', 'js', 'css', 'xml', 'xsl', 'vm', 'sh', 'cmd', 'bat', 'policy', 'properties', 'mdtext', + 'template', ]; def invalidPatterns = [ (~$/@author\b/$) : '@author javadoc tag', @@ -166,17 +167,36 @@ } def javadocsPattern = ~$/(?sm)^\Q/**\E(.*?)\Q*/\E/$; - def commentPattern = ~$/(?sm)^\Q/*\E(.*?)\Q*/\E/$; + def javaCommentPattern = ~$/(?sm)^\Q/*\E(.*?)\Q*/\E/$; + def xmlCommentPattern = ~$/(?sm)\Q\E/$; def lineSplitter = ~$/[\r\n]+/$; def licenseMatcher = Defaults.createDefaultMatcher(); def validLoggerPattern = ~$/(?s)\b(private\s|static\s|final\s){3}+\s*Logger\s+\p{javaJavaIdentifierStart}+\s+=\s+\QLoggerFactory.getLogger(MethodHandles.lookup().lookupClass());\E/$; def packagePattern = ~$/(?m)^\s*package\s+org\.apache.*;/$; + def xmlTagPattern = ~$/(?m)\s*<[a-zA-Z].*/$; def isLicense = { matcher, ratDocument -> licenseMatcher.reset(); return lineSplitter.split(matcher.group(1)).any{ licenseMatcher.match(ratDocument, it) }; } + def checkLicenseHeaderPrecedes = { f, description, contentPattern, commentPattern, text, ratDocument -> + def contentMatcher = contentPattern.matcher(text); + if (contentMatcher.find()) { + def contentStartPos = contentMatcher.start(); + def commentMatcher = commentPattern.matcher(text); + while (commentMatcher.find()) { + if (isLicense(commentMatcher, ratDocument)) { + if (commentMatcher.start() < contentStartPos) { + break; // This file is all good, so break loop: license header precedes 'description' definition + } else { + reportViolation(f, description+' declaration precedes license header'); + } + } + } + } + } + ant.fileScanner{ fileset(dir: baseDir){ extensions.each{ @@ -219,20 +239,10 @@ reportViolation(f, 'invalid logging pattern [not private static final, uses static class name]'); } } - def packageMatcher = packagePattern.matcher(text); - if (packageMatcher.find()) { - def packageStartPos = packageMatcher.start(); - def commentMatcher = commentPattern.matcher(text); - while (commentMatcher.find()) { - if (isLicense(commentMatcher, ratDocument)) { - if (commentMatcher.start() < packageStartPos) { - break; // This file is all good, so break loop: license header precedes package definition - } else { - reportViolation(f, 'package declaration precedes license header'); - } - } - } - } + checkLicenseHeaderPrecedes(f, 'package', packagePattern, javaCommentPattern, text, ratDocument); + } + if (f.toString().endsWith('.xml') || f.toString().endsWith('.xml.template')) { + checkLicenseHeaderPrecedes(f, '', xmlTagPattern, xmlCommentPattern, text, ratDocument); } }; diff --git a/dev-tools/maven/lucene/analysis/common/pom.xml.template b/dev-tools/maven/lucene/analysis/common/pom.xml.template index cded6281137..cbf98986678 100644 --- a/dev-tools/maven/lucene/analysis/common/pom.xml.template +++ b/dev-tools/maven/lucene/analysis/common/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/analysis/icu/pom.xml.template b/dev-tools/maven/lucene/analysis/icu/pom.xml.template index 40b5f34d9e5..2396a1999ba 100644 --- a/dev-tools/maven/lucene/analysis/icu/pom.xml.template +++ b/dev-tools/maven/lucene/analysis/icu/pom.xml.template @@ -1,24 +1,24 @@ - + - 4.0.0 org.apache.lucene @@ -31,8 +31,8 @@ jar Lucene ICU Analysis Components - Provides integration with ICU (International Components for Unicode) for - stronger Unicode and internationalization support. + Provides integration with ICU (International Components for Unicode) for + stronger Unicode and internationalization support. lucene/analysis/icu diff --git a/dev-tools/maven/lucene/analysis/kuromoji/pom.xml.template b/dev-tools/maven/lucene/analysis/kuromoji/pom.xml.template index e5877a41780..85bfc4b744b 100644 --- a/dev-tools/maven/lucene/analysis/kuromoji/pom.xml.template +++ b/dev-tools/maven/lucene/analysis/kuromoji/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene @@ -31,7 +31,7 @@ jar Lucene Kuromoji Japanese Morphological Analyzer - Lucene Kuromoji Japanese Morphological Analyzer + Lucene Kuromoji Japanese Morphological Analyzer lucene/analysis/kuromoji diff --git a/dev-tools/maven/lucene/analysis/morfologik/pom.xml.template b/dev-tools/maven/lucene/analysis/morfologik/pom.xml.template index 79c512f119b..5676eca9a06 100644 --- a/dev-tools/maven/lucene/analysis/morfologik/pom.xml.template +++ b/dev-tools/maven/lucene/analysis/morfologik/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene @@ -31,7 +31,7 @@ jar Lucene Morfologik Polish Lemmatizer - A dictionary-driven lemmatizer for Polish (includes morphosyntactic annotations) + A dictionary-driven lemmatizer for Polish (includes morphosyntactic annotations) lucene/analysis/morfologik diff --git a/dev-tools/maven/lucene/analysis/phonetic/pom.xml.template b/dev-tools/maven/lucene/analysis/phonetic/pom.xml.template index 06dcb9c7921..cc7a9da184e 100644 --- a/dev-tools/maven/lucene/analysis/phonetic/pom.xml.template +++ b/dev-tools/maven/lucene/analysis/phonetic/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene @@ -31,7 +31,7 @@ jar Lucene Phonetic Filters - Provides phonetic encoding via Commons Codec. + Provides phonetic encoding via Commons Codec. lucene/analysis/phonetic diff --git a/dev-tools/maven/lucene/analysis/pom.xml.template b/dev-tools/maven/lucene/analysis/pom.xml.template index ba524804176..845fcb7346e 100644 --- a/dev-tools/maven/lucene/analysis/pom.xml.template +++ b/dev-tools/maven/lucene/analysis/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/analysis/smartcn/pom.xml.template b/dev-tools/maven/lucene/analysis/smartcn/pom.xml.template index 30c46aa9307..105a1941319 100644 --- a/dev-tools/maven/lucene/analysis/smartcn/pom.xml.template +++ b/dev-tools/maven/lucene/analysis/smartcn/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/analysis/stempel/pom.xml.template b/dev-tools/maven/lucene/analysis/stempel/pom.xml.template index 7e1346b8b51..acf5124d86a 100644 --- a/dev-tools/maven/lucene/analysis/stempel/pom.xml.template +++ b/dev-tools/maven/lucene/analysis/stempel/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/analysis/uima/pom.xml.template b/dev-tools/maven/lucene/analysis/uima/pom.xml.template index 667a2f4ced6..36277a56c69 100644 --- a/dev-tools/maven/lucene/analysis/uima/pom.xml.template +++ b/dev-tools/maven/lucene/analysis/uima/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/backward-codecs/pom.xml.template b/dev-tools/maven/lucene/backward-codecs/pom.xml.template index 8522880a1f9..e40148fd556 100644 --- a/dev-tools/maven/lucene/backward-codecs/pom.xml.template +++ b/dev-tools/maven/lucene/backward-codecs/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/benchmark/pom.xml.template b/dev-tools/maven/lucene/benchmark/pom.xml.template index 1aa8e1ad153..282c997dd29 100644 --- a/dev-tools/maven/lucene/benchmark/pom.xml.template +++ b/dev-tools/maven/lucene/benchmark/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/classification/pom.xml.template b/dev-tools/maven/lucene/classification/pom.xml.template index ad26925aec5..73f516fcddb 100644 --- a/dev-tools/maven/lucene/classification/pom.xml.template +++ b/dev-tools/maven/lucene/classification/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/codecs/pom.xml.template b/dev-tools/maven/lucene/codecs/pom.xml.template index ef05ca1a957..c368ffe016c 100644 --- a/dev-tools/maven/lucene/codecs/pom.xml.template +++ b/dev-tools/maven/lucene/codecs/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/codecs/src/java/pom.xml.template b/dev-tools/maven/lucene/codecs/src/java/pom.xml.template index d23093ff242..64ddad9b9f6 100644 --- a/dev-tools/maven/lucene/codecs/src/java/pom.xml.template +++ b/dev-tools/maven/lucene/codecs/src/java/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/codecs/src/test/pom.xml.template b/dev-tools/maven/lucene/codecs/src/test/pom.xml.template index 7db591397d3..d1c9737a132 100644 --- a/dev-tools/maven/lucene/codecs/src/test/pom.xml.template +++ b/dev-tools/maven/lucene/codecs/src/test/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/core/pom.xml.template b/dev-tools/maven/lucene/core/pom.xml.template index affd40757c4..290ffa6b3ad 100644 --- a/dev-tools/maven/lucene/core/pom.xml.template +++ b/dev-tools/maven/lucene/core/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/core/src/java/pom.xml.template b/dev-tools/maven/lucene/core/src/java/pom.xml.template index 43302bfed80..6fc7ebeb5e5 100644 --- a/dev-tools/maven/lucene/core/src/java/pom.xml.template +++ b/dev-tools/maven/lucene/core/src/java/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/core/src/test/pom.xml.template b/dev-tools/maven/lucene/core/src/test/pom.xml.template index 1619ad86ca0..eca2885b03e 100644 --- a/dev-tools/maven/lucene/core/src/test/pom.xml.template +++ b/dev-tools/maven/lucene/core/src/test/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/demo/pom.xml.template b/dev-tools/maven/lucene/demo/pom.xml.template index 7e8eda4c4d2..52155617519 100644 --- a/dev-tools/maven/lucene/demo/pom.xml.template +++ b/dev-tools/maven/lucene/demo/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/expressions/pom.xml.template b/dev-tools/maven/lucene/expressions/pom.xml.template index a83ac393191..8a45b2eead7 100644 --- a/dev-tools/maven/lucene/expressions/pom.xml.template +++ b/dev-tools/maven/lucene/expressions/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/facet/pom.xml.template b/dev-tools/maven/lucene/facet/pom.xml.template index 6953b671aaf..982513cdb08 100644 --- a/dev-tools/maven/lucene/facet/pom.xml.template +++ b/dev-tools/maven/lucene/facet/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/grouping/pom.xml.template b/dev-tools/maven/lucene/grouping/pom.xml.template index daff88e0437..294c8ad7cff 100644 --- a/dev-tools/maven/lucene/grouping/pom.xml.template +++ b/dev-tools/maven/lucene/grouping/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/highlighter/pom.xml.template b/dev-tools/maven/lucene/highlighter/pom.xml.template index e6d81c3dfe0..82a26c806f0 100644 --- a/dev-tools/maven/lucene/highlighter/pom.xml.template +++ b/dev-tools/maven/lucene/highlighter/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/join/pom.xml.template b/dev-tools/maven/lucene/join/pom.xml.template index 61567d87b5c..66f3093628f 100644 --- a/dev-tools/maven/lucene/join/pom.xml.template +++ b/dev-tools/maven/lucene/join/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/memory/pom.xml.template b/dev-tools/maven/lucene/memory/pom.xml.template index b42d8b8ae5b..007e6d4f606 100644 --- a/dev-tools/maven/lucene/memory/pom.xml.template +++ b/dev-tools/maven/lucene/memory/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/misc/pom.xml.template b/dev-tools/maven/lucene/misc/pom.xml.template index a4ea5bc8f42..d27a557bd8f 100644 --- a/dev-tools/maven/lucene/misc/pom.xml.template +++ b/dev-tools/maven/lucene/misc/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/pom.xml.template b/dev-tools/maven/lucene/pom.xml.template index 8db3fd1c98c..410a937e006 100644 --- a/dev-tools/maven/lucene/pom.xml.template +++ b/dev-tools/maven/lucene/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/queries/pom.xml.template b/dev-tools/maven/lucene/queries/pom.xml.template index 7e555c5f523..74f5df5c349 100644 --- a/dev-tools/maven/lucene/queries/pom.xml.template +++ b/dev-tools/maven/lucene/queries/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/queryparser/pom.xml.template b/dev-tools/maven/lucene/queryparser/pom.xml.template index 715d44fb6a5..fb9d6a418fe 100644 --- a/dev-tools/maven/lucene/queryparser/pom.xml.template +++ b/dev-tools/maven/lucene/queryparser/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/replicator/pom.xml.template b/dev-tools/maven/lucene/replicator/pom.xml.template index c749ba482c5..4939aa7bb03 100644 --- a/dev-tools/maven/lucene/replicator/pom.xml.template +++ b/dev-tools/maven/lucene/replicator/pom.xml.template @@ -1,25 +1,25 @@ + - - 4.0.0 + 4.0.0 org.apache.lucene lucene-parent diff --git a/dev-tools/maven/lucene/sandbox/pom.xml.template b/dev-tools/maven/lucene/sandbox/pom.xml.template index baa8a3c04db..5db6a6b83ca 100644 --- a/dev-tools/maven/lucene/sandbox/pom.xml.template +++ b/dev-tools/maven/lucene/sandbox/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/spatial-extras/pom.xml.template b/dev-tools/maven/lucene/spatial-extras/pom.xml.template index 58a5aa8a731..c9f47145898 100644 --- a/dev-tools/maven/lucene/spatial-extras/pom.xml.template +++ b/dev-tools/maven/lucene/spatial-extras/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/spatial/pom.xml.template b/dev-tools/maven/lucene/spatial/pom.xml.template index 5f6420a4b6e..48f0e9cff31 100644 --- a/dev-tools/maven/lucene/spatial/pom.xml.template +++ b/dev-tools/maven/lucene/spatial/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene @@ -31,7 +31,7 @@ jar Lucene Spatial - Geospatial Indexing and Query for Apache Lucene + Geospatial Indexing and Query for Apache Lucene lucene/spatial diff --git a/dev-tools/maven/lucene/spatial3d/pom.xml.template b/dev-tools/maven/lucene/spatial3d/pom.xml.template index 18d943f7e73..27d3bb8c431 100644 --- a/dev-tools/maven/lucene/spatial3d/pom.xml.template +++ b/dev-tools/maven/lucene/spatial3d/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/suggest/pom.xml.template b/dev-tools/maven/lucene/suggest/pom.xml.template index 4e26ca604c8..d41eb7f3bb0 100644 --- a/dev-tools/maven/lucene/suggest/pom.xml.template +++ b/dev-tools/maven/lucene/suggest/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/lucene/test-framework/pom.xml.template b/dev-tools/maven/lucene/test-framework/pom.xml.template index 21a910c0819..3db5f5af1f0 100644 --- a/dev-tools/maven/lucene/test-framework/pom.xml.template +++ b/dev-tools/maven/lucene/test-framework/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/pom.xml.template b/dev-tools/maven/pom.xml.template index adfe5b639ca..cd8d6b8db77 100644 --- a/dev-tools/maven/pom.xml.template +++ b/dev-tools/maven/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache diff --git a/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template b/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template index b7a11a88b9f..a33c2785d84 100644 --- a/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template +++ b/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/contrib/analytics/pom.xml.template b/dev-tools/maven/solr/contrib/analytics/pom.xml.template index 8246e25485c..4e6940d0882 100644 --- a/dev-tools/maven/solr/contrib/analytics/pom.xml.template +++ b/dev-tools/maven/solr/contrib/analytics/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/contrib/clustering/pom.xml.template b/dev-tools/maven/solr/contrib/clustering/pom.xml.template index 652accb081b..ffe0550097b 100644 --- a/dev-tools/maven/solr/contrib/clustering/pom.xml.template +++ b/dev-tools/maven/solr/contrib/clustering/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/contrib/dataimporthandler-extras/pom.xml.template b/dev-tools/maven/solr/contrib/dataimporthandler-extras/pom.xml.template index d8515be021b..b9e6121fedb 100644 --- a/dev-tools/maven/solr/contrib/dataimporthandler-extras/pom.xml.template +++ b/dev-tools/maven/solr/contrib/dataimporthandler-extras/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template b/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template index 06ccf70b684..39b75b0d7d5 100644 --- a/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template +++ b/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/contrib/extraction/pom.xml.template b/dev-tools/maven/solr/contrib/extraction/pom.xml.template index 4a0ec2e545c..b9c7cfc65bf 100644 --- a/dev-tools/maven/solr/contrib/extraction/pom.xml.template +++ b/dev-tools/maven/solr/contrib/extraction/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/contrib/langid/pom.xml.template b/dev-tools/maven/solr/contrib/langid/pom.xml.template index b1165b2e7e0..d0065e53d9e 100644 --- a/dev-tools/maven/solr/contrib/langid/pom.xml.template +++ b/dev-tools/maven/solr/contrib/langid/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/contrib/ltr/pom.xml.template b/dev-tools/maven/solr/contrib/ltr/pom.xml.template index 67d74e755a6..4de59a2808d 100644 --- a/dev-tools/maven/solr/contrib/ltr/pom.xml.template +++ b/dev-tools/maven/solr/contrib/ltr/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/contrib/map-reduce/pom.xml.template b/dev-tools/maven/solr/contrib/map-reduce/pom.xml.template index 5165e7a10af..623c8cb78c0 100644 --- a/dev-tools/maven/solr/contrib/map-reduce/pom.xml.template +++ b/dev-tools/maven/solr/contrib/map-reduce/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/contrib/morphlines-cell/pom.xml.template b/dev-tools/maven/solr/contrib/morphlines-cell/pom.xml.template index 6c5b9c396e0..2849e90c17b 100644 --- a/dev-tools/maven/solr/contrib/morphlines-cell/pom.xml.template +++ b/dev-tools/maven/solr/contrib/morphlines-cell/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/contrib/morphlines-core/pom.xml.template b/dev-tools/maven/solr/contrib/morphlines-core/pom.xml.template index beefa83f186..a14707d1bbe 100644 --- a/dev-tools/maven/solr/contrib/morphlines-core/pom.xml.template +++ b/dev-tools/maven/solr/contrib/morphlines-core/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/contrib/pom.xml.template b/dev-tools/maven/solr/contrib/pom.xml.template index 6ca72b21837..20cbac5ec49 100644 --- a/dev-tools/maven/solr/contrib/pom.xml.template +++ b/dev-tools/maven/solr/contrib/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/contrib/uima/pom.xml.template b/dev-tools/maven/solr/contrib/uima/pom.xml.template index 1dae4e77e52..115c974d223 100644 --- a/dev-tools/maven/solr/contrib/uima/pom.xml.template +++ b/dev-tools/maven/solr/contrib/uima/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/contrib/velocity/pom.xml.template b/dev-tools/maven/solr/contrib/velocity/pom.xml.template index 9faab82faf4..75b5e0a40ff 100644 --- a/dev-tools/maven/solr/contrib/velocity/pom.xml.template +++ b/dev-tools/maven/solr/contrib/velocity/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/core/pom.xml.template b/dev-tools/maven/solr/core/pom.xml.template index c36dacbe033..17fb331ac9f 100644 --- a/dev-tools/maven/solr/core/pom.xml.template +++ b/dev-tools/maven/solr/core/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/core/src/java/pom.xml.template b/dev-tools/maven/solr/core/src/java/pom.xml.template index 949a0a75c85..8255958654d 100644 --- a/dev-tools/maven/solr/core/src/java/pom.xml.template +++ b/dev-tools/maven/solr/core/src/java/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/core/src/test/pom.xml.template b/dev-tools/maven/solr/core/src/test/pom.xml.template index 48de292a423..a4e979faded 100644 --- a/dev-tools/maven/solr/core/src/test/pom.xml.template +++ b/dev-tools/maven/solr/core/src/test/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/pom.xml.template b/dev-tools/maven/solr/pom.xml.template index 6f2eb6d4152..956b5049b2d 100644 --- a/dev-tools/maven/solr/pom.xml.template +++ b/dev-tools/maven/solr/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.lucene diff --git a/dev-tools/maven/solr/solrj/pom.xml.template b/dev-tools/maven/solr/solrj/pom.xml.template index 2392e346bde..ac6a26d9dff 100644 --- a/dev-tools/maven/solr/solrj/pom.xml.template +++ b/dev-tools/maven/solr/solrj/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/solrj/src/java/pom.xml.template b/dev-tools/maven/solr/solrj/src/java/pom.xml.template index e384739eb01..91e6e5adb09 100644 --- a/dev-tools/maven/solr/solrj/src/java/pom.xml.template +++ b/dev-tools/maven/solr/solrj/src/java/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/solrj/src/test/pom.xml.template b/dev-tools/maven/solr/solrj/src/test/pom.xml.template index 2946713536c..1fc2c5526e8 100644 --- a/dev-tools/maven/solr/solrj/src/test/pom.xml.template +++ b/dev-tools/maven/solr/solrj/src/test/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr diff --git a/dev-tools/maven/solr/test-framework/pom.xml.template b/dev-tools/maven/solr/test-framework/pom.xml.template index 17dab4c6f77..2ffb0393769 100644 --- a/dev-tools/maven/solr/test-framework/pom.xml.template +++ b/dev-tools/maven/solr/test-framework/pom.xml.template @@ -1,24 +1,24 @@ + - 4.0.0 org.apache.solr From ac3f1bb339df530d6d4484f26c9ab2da17bd28df Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Wed, 21 Dec 2016 17:18:39 +0000 Subject: [PATCH 23/83] SOLR-8542: reduce direct solrconfig-ltr.xml references in solr/contrib/ltr tests --- .../org/apache/solr/ltr/TestLTRQParserExplain.java | 2 +- .../test/org/apache/solr/ltr/TestLTRQParserPlugin.java | 5 +---- .../src/test/org/apache/solr/ltr/TestLTRWithFacet.java | 2 +- .../src/test/org/apache/solr/ltr/TestLTRWithSort.java | 2 +- .../src/test/org/apache/solr/ltr/TestRerankBase.java | 10 ++++------ .../apache/solr/ltr/TestSelectiveWeightCreation.java | 2 +- .../solr/ltr/feature/TestEdisMaxSolrFeature.java | 2 +- .../apache/solr/ltr/feature/TestExternalFeatures.java | 2 +- .../solr/ltr/feature/TestExternalValueFeatures.java | 2 +- .../apache/solr/ltr/feature/TestFeatureLogging.java | 2 +- .../solr/ltr/feature/TestFeatureLtrScoringModel.java | 2 +- .../org/apache/solr/ltr/feature/TestFeatureStore.java | 2 +- .../solr/ltr/feature/TestFieldLengthFeature.java | 2 +- .../apache/solr/ltr/feature/TestFieldValueFeature.java | 2 +- .../apache/solr/ltr/feature/TestFilterSolrFeature.java | 2 +- .../solr/ltr/feature/TestNoMatchSolrFeature.java | 2 +- .../solr/ltr/feature/TestOriginalScoreFeature.java | 2 +- .../apache/solr/ltr/feature/TestRankingFeature.java | 2 +- .../solr/ltr/feature/TestUserTermScoreWithQ.java | 2 +- .../solr/ltr/feature/TestUserTermScorerQuery.java | 2 +- .../solr/ltr/feature/TestUserTermScorereQDF.java | 2 +- .../org/apache/solr/ltr/feature/TestValueFeature.java | 2 +- .../org/apache/solr/ltr/model/TestLinearModel.java | 2 +- .../solr/ltr/model/TestMultipleAdditiveTreesModel.java | 2 +- .../apache/solr/ltr/store/rest/TestModelManager.java | 2 +- .../ltr/store/rest/TestModelManagerPersistence.java | 2 +- 26 files changed, 29 insertions(+), 34 deletions(-) diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserExplain.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserExplain.java index 2f90df841f9..3d7d3e599cb 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserExplain.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserExplain.java @@ -26,7 +26,7 @@ public class TestLTRQParserExplain extends TestRerankBase { @BeforeClass public static void setup() throws Exception { - setuptest(); + setuptest(true); loadFeatures("features-store-test-model.json"); } diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java index b2b0a8d2088..d4457a0a7a2 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java @@ -26,9 +26,7 @@ public class TestLTRQParserPlugin extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); - // store = getModelStore(); - bulkIndex(); + setuptest(true); loadFeatures("features-linear.json"); loadModels("linear-model.json"); @@ -37,7 +35,6 @@ public class TestLTRQParserPlugin extends TestRerankBase { @AfterClass public static void after() throws Exception { aftertest(); - // store.clear(); } @Test diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRWithFacet.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRWithFacet.java index ab519ec24ab..4026bbbfd7e 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRWithFacet.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRWithFacet.java @@ -28,7 +28,7 @@ public class TestLTRWithFacet extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "a1", "description", "E", "popularity", "1")); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRWithSort.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRWithSort.java index 1fbe1d5fe58..d120af1c283 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRWithSort.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRWithSort.java @@ -28,7 +28,7 @@ public class TestLTRWithSort extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "a1", "description", "E", "popularity", "1")); assertU(adoc("id", "2", "title", "a1 b1", "description", diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java index 4914d28cb96..792975a112a 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java @@ -75,14 +75,14 @@ public class TestRerankBase extends RestTestBase { protected static File fstorefile = null; protected static File mstorefile = null; - public static void setuptest() throws Exception { + protected static void setuptest(boolean bulkIndex) throws Exception { setuptest("solrconfig-ltr.xml", "schema.xml"); - bulkIndex(); + if (bulkIndex) bulkIndex(); } - public static void setupPersistenttest() throws Exception { + protected static void setupPersistenttest(boolean bulkIndex) throws Exception { setupPersistentTest("solrconfig-ltr.xml", "schema.xml"); - bulkIndex(); + if (bulkIndex) bulkIndex(); } public static ManagedFeatureStore getManagedFeatureStore() { @@ -178,8 +178,6 @@ public class TestRerankBase extends RestTestBase { FileUtils.deleteDirectory(tmpSolrHome); System.clearProperty("managed.schema.mutable"); // System.clearProperty("enable.update.log"); - - } public static void makeRestTestHarnessNull() { diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java index b9b3d63f54f..e44d4ac5496 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java @@ -104,7 +104,7 @@ public class TestSelectiveWeightCreation extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "w1 w3", "description", "w1", "popularity", "1")); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestEdisMaxSolrFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestEdisMaxSolrFeature.java index cd63b5c17e2..96a1d28cce3 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestEdisMaxSolrFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestEdisMaxSolrFeature.java @@ -27,7 +27,7 @@ public class TestEdisMaxSolrFeature extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity", "1")); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java index 15b76338a54..e27844bba72 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java @@ -27,7 +27,7 @@ public class TestExternalFeatures extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity", "1")); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java index fc0ade253ff..8a09bb380c7 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java @@ -27,7 +27,7 @@ public class TestExternalValueFeatures extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity", "1")); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java index f18c6bf8f22..ad431f5a5e7 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java @@ -29,7 +29,7 @@ public class TestFeatureLogging extends TestRerankBase { @BeforeClass public static void setup() throws Exception { - setuptest(); + setuptest(true); } @AfterClass diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLtrScoringModel.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLtrScoringModel.java index 5fcebad884b..a50e75ee97a 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLtrScoringModel.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLtrScoringModel.java @@ -29,7 +29,7 @@ public class TestFeatureLtrScoringModel extends TestRerankBase { @BeforeClass public static void setup() throws Exception { - setuptest(); + setuptest(true); store = getManagedFeatureStore(); } diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureStore.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureStore.java index 0ed0cdac3a2..ca58b7b4d72 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureStore.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureStore.java @@ -32,7 +32,7 @@ public class TestFeatureStore extends TestRerankBase { @BeforeClass public static void setup() throws Exception { - setuptest(); + setuptest(true); fstore = getManagedFeatureStore(); } diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldLengthFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldLengthFeature.java index 4a0d4490a73..2e1a14d607b 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldLengthFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldLengthFeature.java @@ -27,7 +27,7 @@ public class TestFieldLengthFeature extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "w1", "description", "w1")); assertU(adoc("id", "2", "title", "w2 2asd asdd didid", "description", diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java index 8295403f386..e4a132ad8f5 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java @@ -30,7 +30,7 @@ public class TestFieldValueFeature extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity", "1","isTrendy","true")); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFilterSolrFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFilterSolrFeature.java index a6a80bd35c2..23b7a55ea83 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFilterSolrFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFilterSolrFeature.java @@ -28,7 +28,7 @@ import org.junit.Test; public class TestFilterSolrFeature extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity", "1")); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java index 004e3148d2c..e6c287d3a0e 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java @@ -33,7 +33,7 @@ public class TestNoMatchSolrFeature extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity", "1")); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreFeature.java index 48662e64887..d6512246896 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreFeature.java @@ -32,7 +32,7 @@ public class TestOriginalScoreFeature extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "w1")); assertU(adoc("id", "2", "title", "w2")); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestRankingFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestRankingFeature.java index 437e10d2558..31add41fb43 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestRankingFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestRankingFeature.java @@ -28,7 +28,7 @@ public class TestRankingFeature extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity", "1")); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestUserTermScoreWithQ.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestUserTermScoreWithQ.java index 754409a658c..b5c8e32ea84 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestUserTermScoreWithQ.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestUserTermScoreWithQ.java @@ -27,7 +27,7 @@ public class TestUserTermScoreWithQ extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity", "1")); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestUserTermScorerQuery.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestUserTermScorerQuery.java index c79207c644e..c0f92d220d3 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestUserTermScorerQuery.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestUserTermScorerQuery.java @@ -27,7 +27,7 @@ public class TestUserTermScorerQuery extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity", "1")); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestUserTermScorereQDF.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestUserTermScorereQDF.java index f47a883c37a..6b22cdfde23 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestUserTermScorereQDF.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestUserTermScorereQDF.java @@ -27,7 +27,7 @@ public class TestUserTermScorereQDF extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity", "1")); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestValueFeature.java index 084da4a3695..de0c9239640 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestValueFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestValueFeature.java @@ -27,7 +27,7 @@ public class TestValueFeature extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "w1")); assertU(adoc("id", "2", "title", "w2")); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestLinearModel.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestLinearModel.java index e8ee22482cb..067bd277de8 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestLinearModel.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestLinearModel.java @@ -50,7 +50,7 @@ public class TestLinearModel extends TestRerankBase { @BeforeClass public static void setup() throws Exception { - setuptest(); + setuptest(true); // loadFeatures("features-store-test-model.json"); store = getManagedModelStore(); fstore = getManagedFeatureStore().getFeatureStore("test"); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestMultipleAdditiveTreesModel.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestMultipleAdditiveTreesModel.java index 560437078cb..1824eb4e0a6 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestMultipleAdditiveTreesModel.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestMultipleAdditiveTreesModel.java @@ -30,7 +30,7 @@ public class TestMultipleAdditiveTreesModel extends TestRerankBase { @BeforeClass public static void before() throws Exception { - setuptest("solrconfig-ltr.xml", "schema.xml"); + setuptest(false); assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity","1")); assertU(adoc("id", "2", "title", "w2", "description", "w2", "popularity","2")); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/store/rest/TestModelManager.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/store/rest/TestModelManager.java index 8d11a907fbc..5b43c6064a8 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/store/rest/TestModelManager.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/store/rest/TestModelManager.java @@ -34,7 +34,7 @@ public class TestModelManager extends TestRerankBase { @BeforeClass public static void init() throws Exception { - setuptest(); + setuptest(true); } @Before diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/store/rest/TestModelManagerPersistence.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/store/rest/TestModelManagerPersistence.java index 66c26fd7255..f707e55c583 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/store/rest/TestModelManagerPersistence.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/store/rest/TestModelManagerPersistence.java @@ -31,7 +31,7 @@ public class TestModelManagerPersistence extends TestRerankBase { @Before public void init() throws Exception { - setupPersistenttest(); + setupPersistenttest(true); } // executed first From f62874e47a0c790b9e396f58ef6f14ea04e2280b Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Thu, 22 Dec 2016 15:31:20 +0000 Subject: [PATCH 24/83] SOLR-8542: change default feature vector format (to 'dense' from 'sparse') also: increase test coverage w.r.t. 'sparse' vs. 'dense' vs. 'default' feature vector format --- solr/contrib/ltr/README.md | 2 +- .../LTRFeatureLoggerTransformerFactory.java | 29 +++--- .../solr/collection1/conf/solrconfig-ltr.xml | 1 + .../org/apache/solr/ltr/TestRerankBase.java | 44 +++++++++ .../ltr/feature/TestExternalFeatures.java | 35 +++++-- .../feature/TestExternalValueFeatures.java | 19 +++- .../solr/ltr/feature/TestFeatureLogging.java | 28 ++++-- .../ltr/feature/TestFilterSolrFeature.java | 4 +- .../ltr/feature/TestNoMatchSolrFeature.java | 92 +++++++++++++++++-- 9 files changed, 208 insertions(+), 46 deletions(-) diff --git a/solr/contrib/ltr/README.md b/solr/contrib/ltr/README.md index 5c95056c678..83fb279a6db 100644 --- a/solr/contrib/ltr/README.md +++ b/solr/contrib/ltr/README.md @@ -324,7 +324,7 @@ produce the features without doing the reranking: `fl=*,score,[features store=yourFeatureStore format=[dense|sparse] ]` This will return the values of the features in the given store. The format of the -extracted features will be based on the format parameter. The default is sparse. +extracted features will be based on the format parameter. The default is dense. # Assemble training data In order to train a learning to rank model you need training data. Training data is diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java index 354ecc27044..9585a7f97ab 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.Collections; import java.util.List; +import java.util.Locale; import java.util.Map; import org.apache.lucene.index.LeafReaderContext; @@ -59,7 +60,7 @@ import org.slf4j.LoggerFactory; * will default to the features used by your reranking model.
    * efi.* - External feature information variables required by the features * you are extracting.
    - * format - The format you want the features to be returned in. Supports (dense|sparse). Defaults to sparse.
    + * format - The format you want the features to be returned in. Supports (dense|sparse). Defaults to dense.
    */ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory { @@ -77,7 +78,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory { private String fvCacheName; private String loggingModelName = DEFAULT_LOGGING_MODEL_NAME; private String defaultStore; - private String defaultFormat; + private FeatureLogger.FeatureFormat defaultFormat = FeatureLogger.FeatureFormat.DENSE; private char csvKeyValueDelimiter = CSVFeatureLogger.DEFAULT_KEY_VALUE_SEPARATOR; private char csvFeatureSeparator = CSVFeatureLogger.DEFAULT_FEATURE_SEPARATOR; @@ -96,7 +97,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory { } public void setDefaultFormat(String defaultFormat) { - this.defaultFormat = defaultFormat; + this.defaultFormat = FeatureLogger.FeatureFormat.valueOf(defaultFormat.toUpperCase(Locale.ROOT)); } public void setCsvKeyValueDelimiter(String csvKeyValueDelimiter) { @@ -133,7 +134,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory { // Create and supply the feature logger to be used SolrQueryRequestContextUtils.setFeatureLogger(req, createFeatureLogger( - localparams.get(FV_FORMAT, defaultFormat))); + localparams.get(FV_FORMAT))); return new FeatureTransformer(name, localparams, req); } @@ -147,23 +148,17 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory { * * @return a feature logger for the format specified. */ - private FeatureLogger createFeatureLogger(String featureFormat) { - final FeatureLogger.FeatureFormat f; - if (featureFormat == null || featureFormat.isEmpty() || - featureFormat.equals("sparse")) { - f = FeatureLogger.FeatureFormat.SPARSE; - } - else if (featureFormat.equals("dense")) { - f = FeatureLogger.FeatureFormat.DENSE; - } - else { - f = FeatureLogger.FeatureFormat.SPARSE; - log.warn("unknown feature logger feature format {}", featureFormat); + private FeatureLogger createFeatureLogger(String formatStr) { + final FeatureLogger.FeatureFormat format; + if (formatStr != null) { + format = FeatureLogger.FeatureFormat.valueOf(formatStr.toUpperCase(Locale.ROOT)); + } else { + format = this.defaultFormat; } if (fvCacheName == null) { throw new IllegalArgumentException("a fvCacheName must be configured"); } - return new CSVFeatureLogger(fvCacheName, f, csvKeyValueDelimiter, csvFeatureSeparator); + return new CSVFeatureLogger(fvCacheName, format, csvKeyValueDelimiter, csvFeatureSeparator); } class FeatureTransformer extends DocTransformer { diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml index 1e1a6183be7..0e92546723f 100644 --- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml @@ -36,6 +36,7 @@ enclosed between brackets (in this case [fv]). In order to get the feature vector you will have to specify that you want the field (e.g., fl="*,[fv]) --> + ${solr.ltr.transformer.fv.defaultFormat:dense} QUERY_DOC_FV diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java index 792975a112a..52778219c61 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java @@ -75,12 +75,55 @@ public class TestRerankBase extends RestTestBase { protected static File fstorefile = null; protected static File mstorefile = null; + final private static String SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT = "solr.ltr.transformer.fv.defaultFormat"; + private static String defaultFeatureFormat; + + protected String chooseDefaultFeatureVector(String dense, String sparse) { + if (defaultFeatureFormat == null) { + // to match ${solr.ltr.transformer.fv.defaultFormat:dense} snippet + return dense; + } else if ("dense".equals(defaultFeatureFormat)) { + return dense; + } else if ("sparse".equals(defaultFeatureFormat)) { + return sparse; + } else { + fail("unexpected feature format choice: "+defaultFeatureFormat); + return null; + } + } + + protected static void chooseDefaultFeatureFormat() throws Exception { + switch (random().nextInt(3)) { + case 0: + defaultFeatureFormat = null; + break; + case 1: + defaultFeatureFormat = "dense"; + break; + case 2: + defaultFeatureFormat = "sparse"; + break; + default: + fail("unexpected feature format choice"); + break; + } + if (defaultFeatureFormat != null) { + System.setProperty(SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT, defaultFeatureFormat); + } + } + + protected static void unchooseDefaultFeatureFormat() { + System.clearProperty(SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT); + } + protected static void setuptest(boolean bulkIndex) throws Exception { + chooseDefaultFeatureFormat(); setuptest("solrconfig-ltr.xml", "schema.xml"); if (bulkIndex) bulkIndex(); } protected static void setupPersistenttest(boolean bulkIndex) throws Exception { + chooseDefaultFeatureFormat(); setupPersistentTest("solrconfig-ltr.xml", "schema.xml"); if (bulkIndex) bulkIndex(); } @@ -178,6 +221,7 @@ public class TestRerankBase extends RestTestBase { FileUtils.deleteDirectory(tmpSolrHome); System.clearProperty("managed.schema.mutable"); // System.clearProperty("enable.update.log"); + unchooseDefaultFeatureFormat(); } public static void makeRestTestHarnessNull() { diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java index e27844bba72..10ababb07f5 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java @@ -94,8 +94,15 @@ public class TestExternalFeatures extends TestRerankBase { // Stopword only query passed in query.add("rq", "{!ltr reRankDocs=3 model=externalmodel efi.user_query='a'}"); + final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "matchedTitle","0.0", + "titlePhraseMatch","0.0"); + final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + + final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv); + // Features are query title matches, which remove stopwords, leaving blank query, so no matches - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'"); } @Test @@ -104,7 +111,7 @@ public class TestExternalFeatures extends TestRerankBase { query.setQuery("*:*"); query.add("rows", "1"); - final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( + final String docs0fv_csv = FeatureLoggerTestUtils.toFeatureVector( "confidence","2.3", "originalScore","1.0"); // Features we're extracting depend on external feature info not passed in @@ -114,13 +121,13 @@ public class TestExternalFeatures extends TestRerankBase { // Adding efi in features section should make it work query.remove("fl"); query.add("fl", "score,fvalias:[fv store=fstore2 efi.myconf=2.3]"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_sparse_csv+"'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_csv+"'"); // Adding efi in transformer + rq should still use the transformer's params for feature extraction query.remove("fl"); query.add("fl", "score,fvalias:[fv store=fstore2 efi.myconf=2.3]"); query.add("rq", "{!ltr reRankDocs=3 model=externalmodel efi.user_query=w3}"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_sparse_csv+"'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_csv+"'"); } @Test @@ -129,10 +136,18 @@ public class TestExternalFeatures extends TestRerankBase { query.setQuery("*:*"); query.add("rows", "1"); + final String docs0fvalias_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "confidence","0.0", + "originalScore","0.0"); + final String docs0fvalias_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( + "originalScore","0.0"); + + final String docs0fvalias_default_csv = chooseDefaultFeatureVector(docs0fvalias_dense_csv, docs0fvalias_sparse_csv); + // Efi is explicitly not required, so we do not score the feature query.remove("fl"); query.add("fl", "fvalias:[fv store=fstore2]"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+FeatureLoggerTestUtils.toFeatureVector("originalScore","0.0")+"'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fvalias_default_csv+"'"); } @Test @@ -141,10 +156,18 @@ public class TestExternalFeatures extends TestRerankBase { query.setQuery("*:*"); query.add("rows", "1"); + final String docs0fvalias_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "occurrences","0.0", + "originalScore","0.0"); + final String docs0fvalias_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( + "originalScore","0.0"); + + final String docs0fvalias_default_csv = chooseDefaultFeatureVector(docs0fvalias_dense_csv, docs0fvalias_sparse_csv); + // Efi is explicitly not required, so we do not score the feature query.remove("fl"); query.add("fl", "fvalias:[fv store=fstore3]"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+FeatureLoggerTestUtils.toFeatureVector("originalScore","0.0")+"'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fvalias_default_csv+"'"); } @Test diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java index 8a09bb380c7..2de23a544e2 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java @@ -58,9 +58,17 @@ public class TestExternalValueFeatures extends TestRerankBase { query.add("rows", "3"); query.add("rq", "{!ltr reRankDocs=3 model=external_model_binary_feature efi.user_device_tablet=1}"); + final String docs0features_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "user_device_smartphone","0.0", + "user_device_tablet","1.0"); + final String docs0features_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( + "user_device_tablet","1.0"); + + final String docs0features_default_csv = chooseDefaultFeatureVector(docs0features_dense_csv, docs0features_sparse_csv); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'"); assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/features=='"+FeatureLoggerTestUtils.toFeatureVector("user_device_tablet","1.0")+"'"); + "/response/docs/[0]/features=='"+docs0features_default_csv+"'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==65.0"); } @@ -76,9 +84,16 @@ public class TestExternalValueFeatures extends TestRerankBase { query .add("rq", "{!ltr reRankDocs=3 model=external_model_binary_feature}"); + final String docs0features_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "user_device_smartphone","0.0", + "user_device_tablet","0.0"); + final String docs0features_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + + final String docs0features_default_csv = chooseDefaultFeatureVector(docs0features_dense_csv, docs0features_sparse_csv); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'"); assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/features==''"); + "/response/docs/[0]/features=='"+docs0features_default_csv+"'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.0"); } diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java index ad431f5a5e7..6f811d92021 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java @@ -56,6 +56,13 @@ public class TestFeatureLogging extends TestRerankBase { "c1", "c2", "c3"}, "test1", "{\"weights\":{\"c1\":1.0,\"c2\":1.0,\"c3\":1.0}}"); + final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "c1","1.0", + "c2","2.0", + "c3","3.0", + "pop","2.0", + "nomatch","0.0", + "yesmatch","1.0"); final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( "c1","1.0", "c2","2.0", @@ -63,6 +70,8 @@ public class TestFeatureLogging extends TestRerankBase { "pop","2.0", "yesmatch","1.0"); + final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv); + final SolrQuery query = new SolrQuery(); query.setQuery("title:bloomberg"); query.add("fl", "title,description,id,popularity,[fv]"); @@ -73,7 +82,7 @@ public class TestFeatureLogging extends TestRerankBase { restTestHarness.query("/query" + query.toQueryString()); assertJQ( "/query" + query.toQueryString(), - "/response/docs/[0]/=={'title':'bloomberg bloomberg ', 'description':'bloomberg','id':'7', 'popularity':2, '[fv]':'"+docs0fv_sparse_csv+"'}"); + "/response/docs/[0]/=={'title':'bloomberg bloomberg ', 'description':'bloomberg','id':'7', 'popularity':2, '[fv]':'"+docs0fv_default_csv+"'}"); query.remove("fl"); query.add("fl", "[fv]"); @@ -82,7 +91,7 @@ public class TestFeatureLogging extends TestRerankBase { restTestHarness.query("/query" + query.toQueryString()); assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/=={'[fv]':'"+docs0fv_sparse_csv+"'}"); + "/response/docs/[0]/=={'[fv]':'"+docs0fv_default_csv+"'}"); } @Test @@ -157,7 +166,7 @@ public class TestFeatureLogging extends TestRerankBase { query.add("rq", "{!ltr reRankDocs=3 model=sumgroup}"); - final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( + final String docs0fv_csv = FeatureLoggerTestUtils.toFeatureVector( "c1","1.0", "c2","2.0", "c3","3.0", @@ -166,7 +175,7 @@ public class TestFeatureLogging extends TestRerankBase { restTestHarness.query("/query" + query.toQueryString()); assertJQ( "/query" + query.toQueryString(), - "/grouped/title/groups/[0]/doclist/docs/[0]/=={'fv':'"+docs0fv_sparse_csv+"'}"); + "/grouped/title/groups/[0]/doclist/docs/[0]/=={'fv':'"+docs0fv_csv+"'}"); } @Test @@ -181,25 +190,28 @@ public class TestFeatureLogging extends TestRerankBase { "{\"weights\":{\"match\":1.0}}"); final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector("match", "1.0", "c4", "1.0"); - final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector("c4", "1.0"); + final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( "c4", "1.0"); final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector("match", "1.0", "c4", "1.0"); final String docs1fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector("match", "0.0", "c4", "1.0"); + final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv); + final String docs1fv_default_csv = chooseDefaultFeatureVector(docs1fv_dense_csv, docs1fv_sparse_csv); + final SolrQuery query = new SolrQuery(); query.setQuery("title:bloomberg"); query.add("rows", "10"); query.add("rq", "{!ltr reRankDocs=10 model=sum4}"); - //csv - no feature format check (default to sparse) + //csv - no feature format specified i.e. use default query.remove("fl"); query.add("fl", "*,score,fv:[fv store=test4]"); assertJQ( "/query" + query.toQueryString(), - "/response/docs/[0]/fv/=='"+docs0fv_sparse_csv+"'"); + "/response/docs/[0]/fv/=='"+docs0fv_default_csv+"'"); assertJQ( "/query" + query.toQueryString(), - "/response/docs/[1]/fv/=='"+docs1fv_sparse_csv+"'"); + "/response/docs/[1]/fv/=='"+docs1fv_default_csv+"'"); //csv - sparse feature format check query.remove("fl"); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFilterSolrFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFilterSolrFeature.java index 23b7a55ea83..bb52f39c161 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFilterSolrFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFilterSolrFeature.java @@ -97,13 +97,13 @@ public class TestFilterSolrFeature extends TestRerankBase { query.add("rq", "{!ltr reRankDocs=4 model=fqmodel efi.user_query=w2}"); query.add("fl", "fv:[fv]"); - final String docs0fv_sparse_csv= FeatureLoggerTestUtils.toFeatureVector( + final String docs0fv_csv= FeatureLoggerTestUtils.toFeatureVector( "matchedTitle","1.0", "popularity","3.0"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='2'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='1'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_sparse_csv+"'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_csv+"'"); } } diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java index e6c287d3a0e..c068be95cc5 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java @@ -105,20 +105,48 @@ public class TestNoMatchSolrFeature extends TestRerankBase { final Double doc0Score = (Double) ((Map) ((ArrayList) ((Map) jsonParse .get("response")).get("docs")).get(0)).get("score"); + final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature","0.0", + "yesmatchfeature",doc0Score.toString(), + "nomatchfeature2","0.0"); + final String docs1fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature","0.0", + "yesmatchfeature","0.0", + "nomatchfeature2","0.0"); + final String docs2fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature","0.0", + "yesmatchfeature","0.0", + "nomatchfeature2","0.0"); + final String docs3fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature","0.0", + "yesmatchfeature","0.0", + "nomatchfeature2","0.0"); + + final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( + "yesmatchfeature",doc0Score.toString()); + final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + final String docs2fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + final String docs3fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + + final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv); + final String docs1fv_default_csv = chooseDefaultFeatureVector(docs1fv_dense_csv, docs1fv_sparse_csv); + final String docs2fv_default_csv = chooseDefaultFeatureVector(docs2fv_dense_csv, docs2fv_sparse_csv); + final String docs3fv_default_csv = chooseDefaultFeatureVector(docs3fv_dense_csv, docs3fv_sparse_csv); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==" + (doc0Score * 1.1)); assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/fv=='"+FeatureLoggerTestUtils.toFeatureVector("yesmatchfeature", doc0Score.toString())+"'"); + "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='2'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/fv=='"+docs1fv_default_csv+"'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/fv=='"+docs2fv_default_csv+"'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='4'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/score==0.0"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/fv=='"+docs3fv_default_csv+"'"); } @Test @@ -142,15 +170,47 @@ public class TestNoMatchSolrFeature extends TestRerankBase { final Double doc0Score = (Double) ((Map) ((ArrayList) ((Map) jsonParse .get("response")).get("docs")).get(0)).get("score"); + final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature","0.0", + "yesmatchfeature",doc0Score.toString(), + "nomatchfeature2","0.0", + "nomatchfeature3","0.0"); + final String docs1fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature","0.0", + "yesmatchfeature","0.0", + "nomatchfeature2","0.0", + "nomatchfeature3","0.0"); + final String docs2fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature","0.0", + "yesmatchfeature","0.0", + "nomatchfeature2","0.0", + "nomatchfeature3","0.0"); + final String docs3fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature","0.0", + "yesmatchfeature","0.0", + "nomatchfeature2","0.0", + "nomatchfeature3","0.0"); + + final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( + "yesmatchfeature",doc0Score.toString()); + final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + final String docs2fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + final String docs3fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + + final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv); + final String docs1fv_default_csv = chooseDefaultFeatureVector(docs1fv_dense_csv, docs1fv_sparse_csv); + final String docs2fv_default_csv = chooseDefaultFeatureVector(docs2fv_dense_csv, docs2fv_sparse_csv); + final String docs3fv_default_csv = chooseDefaultFeatureVector(docs3fv_dense_csv, docs3fv_sparse_csv); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.0"); assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/fv=='"+FeatureLoggerTestUtils.toFeatureVector("yesmatchfeature", doc0Score.toString())+"'"); + "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/fv=='"+docs1fv_default_csv+"'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/fv=='"+docs2fv_default_csv+"'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/score==0.0"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/fv=='"+docs3fv_default_csv+"'"); } @Test @@ -163,8 +223,14 @@ public class TestNoMatchSolrFeature extends TestRerankBase { query.add("fv", "true"); query.add("rq", "{!ltr model=nomatchmodel3 reRankDocs=4}"); + final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature4","0.0"); + final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + + final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.0"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'"); } @Test @@ -184,9 +250,15 @@ public class TestNoMatchSolrFeature extends TestRerankBase { query.add("rows", "4"); query.add("rq", "{!ltr model=nomatchmodel4 reRankDocs=4}"); + final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature4","0.0"); + final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + + final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.0"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'"); } } From bc8936a567e40eef3b70665fd8838548350c9aaa Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Fri, 23 Dec 2016 16:01:05 +0000 Subject: [PATCH 25/83] SOLR-9660: rename GroupSpecification's sortSpecWithinGroup to withinGroupSortSpec (Judith Silverman via Christine Poerschke) --- .../solr/handler/component/QueryComponent.java | 18 +++++++++--------- .../component/QueryElevationComponent.java | 4 ++-- .../search/grouping/GroupingSpecification.java | 18 +++++++++--------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index 84ade43c7a5..9bd5efb32a5 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -255,23 +255,23 @@ public class QueryComponent extends SearchComponent final SortSpec groupSortSpec = searcher.weightSortSpec(sortSpec, Sort.RELEVANCE); // groupSort defaults to sort - String sortWithinGroupStr = params.get(GroupParams.GROUP_SORT); + String withinGroupSortStr = params.get(GroupParams.GROUP_SORT); //TODO: move weighting of sort - final SortSpec sortSpecWithinGroup; - if (sortWithinGroupStr != null) { - SortSpec parsedSortSpecWithinGroup = SortSpecParsing.parseSortSpec(sortWithinGroupStr, req); - sortSpecWithinGroup = searcher.weightSortSpec(parsedSortSpecWithinGroup, Sort.RELEVANCE); + final SortSpec withinGroupSortSpec; + if (withinGroupSortStr != null) { + SortSpec parsedWithinGroupSortSpec = SortSpecParsing.parseSortSpec(withinGroupSortStr, req); + withinGroupSortSpec = searcher.weightSortSpec(parsedWithinGroupSortSpec, Sort.RELEVANCE); } else { - sortSpecWithinGroup = new SortSpec( + withinGroupSortSpec = new SortSpec( groupSortSpec.getSort(), groupSortSpec.getSchemaFields(), groupSortSpec.getCount(), groupSortSpec.getOffset()); } - sortSpecWithinGroup.setOffset(params.getInt(GroupParams.GROUP_OFFSET, 0)); - sortSpecWithinGroup.setCount(params.getInt(GroupParams.GROUP_LIMIT, 1)); + withinGroupSortSpec.setOffset(params.getInt(GroupParams.GROUP_OFFSET, 0)); + withinGroupSortSpec.setCount(params.getInt(GroupParams.GROUP_LIMIT, 1)); - groupingSpec.setSortSpecWithinGroup(sortSpecWithinGroup); + groupingSpec.setWithinGroupSortSpec(withinGroupSortSpec); groupingSpec.setGroupSortSpec(groupSortSpec); String formatStr = params.get(GroupParams.GROUP_FORMAT, Grouping.Format.grouped.name()); diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java index f72fc89a66a..25157cfb7ba 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java @@ -465,10 +465,10 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore if (modGroupSortSpec != null) { groupingSpec.setGroupSortSpec(modGroupSortSpec); } - SortSpec withinGroupSortSpec = groupingSpec.getSortSpecWithinGroup(); + SortSpec withinGroupSortSpec = groupingSpec.getWithinGroupSortSpec(); SortSpec modWithinGroupSortSpec = this.modifySortSpec(withinGroupSortSpec, force, comparator); if (modWithinGroupSortSpec != null) { - groupingSpec.setSortSpecWithinGroup(modWithinGroupSortSpec); + groupingSpec.setWithinGroupSortSpec(modWithinGroupSortSpec); } } } diff --git a/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java b/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java index 4194dd087f8..e1e276e5a28 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java @@ -31,7 +31,7 @@ public class GroupingSpecification { private String[] queries = new String[]{}; private String[] functions = new String[]{}; private SortSpec groupSortSpec; - private SortSpec sortSpecWithinGroup; + private SortSpec withinGroupSortSpec; private boolean includeGroupCount; private boolean main; private Grouping.Format responseFormat; @@ -76,7 +76,7 @@ public class GroupingSpecification { @Deprecated public int getWithinGroupOffset() { - return sortSpecWithinGroup.getOffset(); + return withinGroupSortSpec.getOffset(); } @Deprecated public int getGroupOffset() { @@ -86,8 +86,9 @@ public class GroupingSpecification { @Deprecated public int getWithinGroupLimit() { - return sortSpecWithinGroup.getCount(); + return withinGroupSortSpec.getCount(); } + @Deprecated public int getGroupLimit() { return getWithinGroupLimit(); @@ -114,10 +115,9 @@ public class GroupingSpecification { @Deprecated public Sort getSortWithinGroup() { - return sortSpecWithinGroup.getSort(); + return withinGroupSortSpec.getSort(); } - public boolean isIncludeGroupCount() { return includeGroupCount; } @@ -166,12 +166,12 @@ public class GroupingSpecification { this.groupSortSpec = groupSortSpec; } - public SortSpec getSortSpecWithinGroup() { - return sortSpecWithinGroup; + public SortSpec getWithinGroupSortSpec() { + return withinGroupSortSpec; } - public void setSortSpecWithinGroup(SortSpec sortSpecWithinGroup) { - this.sortSpecWithinGroup = sortSpecWithinGroup; + public void setWithinGroupSortSpec(SortSpec withinGroupSortSpec) { + this.withinGroupSortSpec = withinGroupSortSpec; } } From 687f03661d726d70d5718db8b45990b3fbc98183 Mon Sep 17 00:00:00 2001 From: Andrzej Bialecki Date: Fri, 23 Dec 2016 23:01:47 +0100 Subject: [PATCH 26/83] SOLR-9805 Fix NPE. --- solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java b/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java index 77c4e1aa1c4..6d597cb0056 100644 --- a/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java +++ b/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java @@ -44,7 +44,7 @@ public class JvmMetricsTest extends SolrJettyTestBase { assertTrue(metrics.size() > 0); for (String metric : OperatingSystemMetricSet.METRICS) { Gauge gauge = (Gauge)metrics.get(metric); - if (gauge == null) { // some are optional depending on OS + if (gauge == null || gauge.getValue() == null) { // some are optional depending on OS continue; } double value = ((Number)gauge.getValue()).doubleValue(); From 54d8574f9662f89598a06fbb47de9a376ef5d2bc Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Sun, 25 Dec 2016 10:00:50 +0300 Subject: [PATCH 27/83] SOLR-9448: providing a test for workaround of a differently named uniqueKey field --- solr/CHANGES.txt | 2 + .../schema-minimal-with-another-uniqkey.xml | 23 ++++++++ .../TestSubQueryTransformerDistrib.java | 55 +++++++++++-------- 3 files changed, 56 insertions(+), 24 deletions(-) create mode 100644 solr/core/src/test-files/solr/collection1/conf/schema-minimal-with-another-uniqkey.xml diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index b2ec5efb500..a6ea07b6e22 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -332,6 +332,8 @@ Other Changes * SOLR-9758: refactor preferLocalShards implementation (Christine Poerschke) +* SOLR-9448: providing a test to workaround a differently named uniqueKey field (Mikhail Khludnev) + ================== 6.3.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-minimal-with-another-uniqkey.xml b/solr/core/src/test-files/solr/collection1/conf/schema-minimal-with-another-uniqkey.xml new file mode 100644 index 00000000000..2087a1daae0 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-minimal-with-another-uniqkey.xml @@ -0,0 +1,23 @@ + + + + + notid + + + diff --git a/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java b/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java index c417272bb23..620cac0a942 100644 --- a/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java +++ b/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java @@ -26,7 +26,6 @@ import java.util.List; import java.util.Map; import java.util.Random; -import org.apache.solr.SolrTestCaseJ4.SuppressSSL; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; @@ -42,15 +41,19 @@ import org.apache.solr.common.util.ContentStreamBase; import org.junit.BeforeClass; import org.junit.Test; -@SuppressSSL public class TestSubQueryTransformerDistrib extends SolrCloudTestCase { + private static final String support = "These guys help customers"; + private static final String engineering = "These guys develop stuff"; final static String people = "people"; final static String depts = "departments"; + private static boolean differentUniqueId; @BeforeClass public static void setupCluster() throws Exception { + differentUniqueId = random().nextBoolean(); + final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf"); String configName = "solrCloudCollectionConfig"; @@ -72,7 +75,9 @@ public class TestSubQueryTransformerDistrib extends SolrCloudTestCase { CollectionAdminRequest.createCollection(depts, configName, shards, replicas) .withProperty("config", "solrconfig-doctransformers.xml") - .withProperty("schema", "schema-docValuesJoin.xml") + .withProperty("schema", + differentUniqueId ? "schema-minimal-with-another-uniqkey.xml": + "schema-docValuesJoin.xml") .process(cluster.getSolrClient()); CloudSolrClient client = cluster.getSolrClient(); @@ -102,28 +107,22 @@ public class TestSubQueryTransformerDistrib extends SolrCloudTestCase { "fl","*,depts:[subquery "+((random1.nextBoolean() ? "" : "separator=,"))+"]", "rows","" + peopleMultiplier, "depts.q","{!terms f=dept_id_s v=$row.dept_ss_dv "+((random1.nextBoolean() ? "" : "separator=,"))+"}", - "depts.fl","text_t", + "depts.fl","text_t"+(differentUniqueId?",id:notid":""), "depts.indent","true", "depts.collection","departments", + differentUniqueId ? "depts.distrib.singlePass":"notnecessary","true", "depts.rows",""+(deptMultiplier*2), - "depts.logParamsList","q,fl,rows,row.dept_ss_dv"})); + "depts.logParamsList","q,fl,rows,row.dept_ss_dv", + random().nextBoolean()?"depts.wt":"whatever",anyWt(), + random().nextBoolean()?"wt":"whatever",anyWt()})); final QueryResponse rsp = new QueryResponse(); rsp.setResponse(cluster.getSolrClient().request(qr, people)); final SolrDocumentList hits = rsp.getResults(); assertEquals(peopleMultiplier, hits.getNumFound()); - Map engText = new HashMap() { - { put("text_t", "These guys develop stuff"); - } - }; - Map suppText = new HashMap() { - { put("text_t", "These guys help customers"); - } - }; - - int engineer = 0; - int support = 0; + int engineerCount = 0; + int supportCount = 0; for (int res : new int [] {0, (peopleMultiplier-1) /2, peopleMultiplier-1}) { SolrDocument doc = hits.get(res); @@ -133,16 +132,23 @@ public class TestSubQueryTransformerDistrib extends SolrCloudTestCase { deptMultiplier * 2, relDepts.getNumFound()); for (int deptN = 0 ; deptN < relDepts.getNumFound(); deptN++ ) { SolrDocument deptDoc = relDepts.get(deptN); - assertTrue(deptDoc + "should be either "+engText +" or "+suppText, - (engText.equals(deptDoc) && ++engineer>0) || - (suppText.equals(deptDoc) && ++support>0)); + String actual = (String) deptDoc.get("text_t"); + assertTrue(deptDoc + "should be either "+engineering +" or "+support, + (engineering.equals(actual) && ++engineerCount>0) || + (support.equals(actual) && ++supportCount>0)); } } - assertEquals(hits.toString(), engineer, support); + assertEquals(hits.toString(), engineerCount, supportCount); } } + private String anyWt() { + String[] wts = new String[]{"javabin","xml","json"}; + return wts[random().nextInt(wts.length)]; + } + + private void createIndex(String people, int peopleMultiplier, String depts, int deptMultiplier) throws SolrServerException, IOException { @@ -175,14 +181,15 @@ public class TestSubQueryTransformerDistrib extends SolrCloudTestCase { addDocs(people, peopleDocs); List deptsDocs = new ArrayList<>(); + String deptIdField = differentUniqueId? "notid":"id"; for (int d=0; d < deptMultiplier; d++) { - deptsDocs.add(add(doc("id",""+id++, "dept_id_s", "Engineering", "text_t","These guys develop stuff", "salary_i_dv", "1000", + deptsDocs.add(add(doc(deptIdField,""+id++, "dept_id_s", "Engineering", "text_t",engineering, "salary_i_dv", "1000", "dept_id_i", "0"))); - deptsDocs.add(add(doc("id",""+id++, "dept_id_s", "Marketing", "text_t","These guys make you look good","salary_i_dv", "1500", + deptsDocs.add(add(doc(deptIdField,""+id++, "dept_id_s", "Marketing", "text_t","These guys make you look good","salary_i_dv", "1500", "dept_id_i", "1"))); - deptsDocs.add(add(doc("id",""+id++, "dept_id_s", "Sales", "text_t","These guys sell stuff","salary_i_dv", "1600", + deptsDocs.add(add(doc(deptIdField,""+id++, "dept_id_s", "Sales", "text_t","These guys sell stuff","salary_i_dv", "1600", "dept_id_i", "2"))); - deptsDocs.add(add(doc("id",""+id++, "dept_id_s", "Support", "text_t","These guys help customers","salary_i_dv", "800", + deptsDocs.add(add(doc(deptIdField,""+id++, "dept_id_s", "Support", "text_t",support,"salary_i_dv", "800", "dept_id_i", "3"))); } From 5c8a70fb57fd877e336c3d90a98ae10c5a6906ae Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Fri, 23 Dec 2016 22:54:50 +0300 Subject: [PATCH 28/83] SOLR-9725: substitute properties in JdbcDataSource configuration --- solr/CHANGES.txt | 2 + .../handler/dataimport/JdbcDataSource.java | 4 +- .../dataimport/TestJdbcDataSource.java | 43 +++++++++++++------ 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index a6ea07b6e22..55aeb93e7de 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -193,6 +193,8 @@ New Features * SOLR-9884: Add version to segments handler output (Steven Bower via Erick Erickson) +* SOLR-9725: Substitute properties into JdbcDataSource configuration ( Jamie Jackson, Yuri Sashevsky via Mikhail Khludnev) + Optimizations ---------------------- * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/JdbcDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/JdbcDataSource.java index ce8671a0f51..b17650af7dd 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/JdbcDataSource.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/JdbcDataSource.java @@ -71,6 +71,7 @@ public class JdbcDataSource extends @Override public void init(Context context, Properties initProps) { + resolveVariables(context, initProps); initProps = decryptPwd(context, initProps); Object o = initProps.get(CONVERT_TYPE); if (o != null) @@ -113,7 +114,7 @@ public class JdbcDataSource extends } private Properties decryptPwd(Context context, Properties initProps) { - String encryptionKey = context.replaceTokens(initProps.getProperty("encryptKeyFile")); + String encryptionKey = initProps.getProperty("encryptKeyFile"); if (initProps.getProperty("password") != null && encryptionKey != null) { // this means the password is encrypted and use the file to decode it try { @@ -143,7 +144,6 @@ public class JdbcDataSource extends protected Callable createConnectionFactory(final Context context, final Properties initProps) { // final VariableResolver resolver = context.getVariableResolver(); - resolveVariables(context, initProps); final String jndiName = initProps.getProperty(JNDI_NAME); final String url = initProps.getProperty(URL); final String driver = initProps.getProperty(DRIVER); diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSource.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSource.java index 7853ad154f6..01340bc4c43 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSource.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSource.java @@ -80,6 +80,7 @@ public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase { driver = mockControl.createMock(Driver.class); dataSource = mockControl.createMock(DataSource.class); connection = mockControl.createMock(Connection.class); + props.clear(); } @Override @@ -139,20 +140,33 @@ public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase { } @Test - public void testRetrieveFromJndiWithCredentialsWithEncryptedPwd() throws Exception { + public void testRetrieveFromJndiWithCredentialsEncryptedAndResolved() throws Exception { MockInitialContextFactory.bind("java:comp/env/jdbc/JndiDB", dataSource); - + + String user = "Fred"; + String plainPassword = "MyPassword"; + String encryptedPassword = "U2FsdGVkX18QMjY0yfCqlfBMvAB4d3XkwY96L7gfO2o="; + String propsNamespace = "exampleNamespace"; + props.put(JdbcDataSource.JNDI_NAME, "java:comp/env/jdbc/JndiDB"); - props.put("user", "Fred"); - props.put("encryptKeyFile", createEncryptionKeyFile()); - props.put("password", "U2FsdGVkX18QMjY0yfCqlfBMvAB4d3XkwY96L7gfO2o="); - props.put("holdability", "HOLD_CURSORS_OVER_COMMIT"); - EasyMock.expect(dataSource.getConnection("Fred", "MyPassword")).andReturn( - connection); + + props.put("user", "${" +propsNamespace +".user}"); + props.put("encryptKeyFile", "${" +propsNamespace +".encryptKeyFile}"); + props.put("password", "${" +propsNamespace +".password}"); + + EasyMock.expect(dataSource.getConnection(user, plainPassword)).andReturn( + connection); + + Map values = new HashMap<>(); + values.put("user", user); + values.put("encryptKeyFile", createEncryptionKeyFile()); + values.put("password", encryptedPassword); + context.getVariableResolver().addNamespace(propsNamespace, values); + jdbcDataSource.init(context, props); connection.setAutoCommit(false); - connection.setHoldability(1); + //connection.setHoldability(1); mockControl.replay(); @@ -167,10 +181,11 @@ public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase { public void testRetrieveFromJndiWithCredentialsWithEncryptedAndResolvedPwd() throws Exception { MockInitialContextFactory.bind("java:comp/env/jdbc/JndiDB", dataSource); - props.put(JdbcDataSource.JNDI_NAME, "java:comp/env/jdbc/JndiDB"); - props.put("user", "Fred"); - props.put("encryptKeyFile", "${foo.bar}"); - props.put("password", "U2FsdGVkX18QMjY0yfCqlfBMvAB4d3XkwY96L7gfO2o="); + Properties properties = new Properties(); + properties.put(JdbcDataSource.JNDI_NAME, "java:comp/env/jdbc/JndiDB"); + properties.put("user", "Fred"); + properties.put("encryptKeyFile", "${foo.bar}"); + properties.put("password", "U2FsdGVkX18QMjY0yfCqlfBMvAB4d3XkwY96L7gfO2o="); EasyMock.expect(dataSource.getConnection("Fred", "MyPassword")).andReturn( connection); @@ -178,7 +193,7 @@ public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase { values.put("bar", createEncryptionKeyFile()); context.getVariableResolver().addNamespace("foo", values); - jdbcDataSource.init(context, props); + jdbcDataSource.init(context, properties); connection.setAutoCommit(false); From ba47f530d1165d4518569422472bc9e4f1c04b26 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 26 Dec 2016 10:10:03 +0100 Subject: [PATCH 29/83] LUCENE-7401: Make sure BKD trees index all dimensions. --- lucene/CHANGES.txt | 3 ++ .../org/apache/lucene/util/bkd/BKDWriter.java | 52 ++++++++++++++++--- .../org/apache/lucene/util/bkd/TestBKD.java | 48 +++++++++++++++-- .../org/apache/lucene/index/RandomCodec.java | 2 +- 4 files changed, 92 insertions(+), 13 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 0099f9771d5..255867d68c3 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -172,6 +172,9 @@ Improvements is no longer needed. Support for older Java 9 builds was removed. (Uwe Schindler) +* LUCENE-7401: Changed the way BKD trees pick the split dimension in order to + ensure all dimensions are indexed. (Adrien Grand) + Optimizations * LUCENE-7568: Optimize merging when index sorting is used but the diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java index bf360d3502a..5e391f4e281 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java @@ -479,9 +479,12 @@ public class BKDWriter implements Closeable { docsSeen.set(values.getDocID(i)); } + final int[] parentSplits = new int[numDims]; build(1, numLeaves, values, 0, Math.toIntExact(pointCount), out, - minPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, + minPackedValue, maxPackedValue, parentSplits, + splitPackedValues, leafBlockFPs, new int[maxPointsInLeafNode]); + assert Arrays.equals(parentSplits, new int[numDims]); long indexFP = out.getFilePointer(); writeIndex(out, leafBlockFPs, splitPackedValues); @@ -1001,12 +1004,15 @@ public class BKDWriter implements Closeable { heapPointWriter = null; } + final int[] parentSplits = new int[numDims]; build(1, numLeaves, sortedPointWriters, ordBitSet, out, minPackedValue, maxPackedValue, + parentSplits, splitPackedValues, leafBlockFPs, toCloseHeroically); + assert Arrays.equals(parentSplits, new int[numDims]); for(PathSlice slice : sortedPointWriters) { slice.writer.destroy(); @@ -1413,7 +1419,29 @@ public class BKDWriter implements Closeable { return true; } - protected int split(byte[] minPackedValue, byte[] maxPackedValue) { + /** + * Pick the next dimension to split. + * @param minPackedValue the min values for all dimensions + * @param maxPackedValue the max values for all dimensions + * @param parentSplits how many times each dim has been split on the parent levels + * @return the dimension to split + */ + protected int split(byte[] minPackedValue, byte[] maxPackedValue, int[] parentSplits) { + // First look at whether there is a dimension that has split less than 2x less than + // the dim that has most splits, and return it if there is such a dimension and it + // does not only have equals values. This helps ensure all dimensions are indexed. + int maxNumSplits = 0; + for (int numSplits : parentSplits) { + maxNumSplits = Math.max(maxNumSplits, numSplits); + } + for (int dim = 0; dim < numDims; ++dim) { + final int offset = dim * bytesPerDim; + if (parentSplits[dim] < maxNumSplits / 2 && + StringHelper.compare(bytesPerDim, minPackedValue, offset, maxPackedValue, offset) != 0) { + return dim; + } + } + // Find which dim has the largest span so we can split on it: int splitDim = -1; for(int dim=0;dim>> 1; int commonPrefixLen = bytesPerDim; @@ -1575,10 +1604,14 @@ public class BKDWriter implements Closeable { maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim); // recurse + parentSplits[splitDim]++; build(nodeID * 2, leafNodeOffset, reader, from, mid, out, - minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, spareDocIds); + minPackedValue, maxSplitPackedValue, parentSplits, + splitPackedValues, leafBlockFPs, spareDocIds); build(nodeID * 2 + 1, leafNodeOffset, reader, mid, to, out, - minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, spareDocIds); + minSplitPackedValue, maxPackedValue, parentSplits, + splitPackedValues, leafBlockFPs, spareDocIds); + parentSplits[splitDim]--; } } @@ -1589,6 +1622,7 @@ public class BKDWriter implements Closeable { LongBitSet ordBitSet, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, + int[] parentSplits, byte[] splitPackedValues, long[] leafBlockFPs, List toCloseHeroically) throws IOException { @@ -1699,7 +1733,7 @@ public class BKDWriter implements Closeable { int splitDim; if (numDims > 1) { - splitDim = split(minPackedValue, maxPackedValue); + splitDim = split(minPackedValue, maxPackedValue, parentSplits); } else { splitDim = 0; } @@ -1767,10 +1801,11 @@ public class BKDWriter implements Closeable { } } + parentSplits[splitDim]++; // Recurse on left tree: build(2*nodeID, leafNodeOffset, leftSlices, ordBitSet, out, - minPackedValue, maxSplitPackedValue, + minPackedValue, maxSplitPackedValue, parentSplits, splitPackedValues, leafBlockFPs, toCloseHeroically); for(int dim=0;dim Date: Mon, 26 Dec 2016 20:15:06 +0100 Subject: [PATCH 30/83] LUCENE-7604: Disable test on Java 9 --- .../src/test/org/apache/lucene/search/TestLRUQueryCache.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java index 460f4a7d342..87382f98387 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java @@ -51,6 +51,7 @@ import org.apache.lucene.index.SerialMergeScheduler; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Constants; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.RamUsageTester; @@ -378,6 +379,7 @@ public class TestLRUQueryCache extends LuceneTestCase { // by the cache itself, not cache entries, and we want to make sure that // memory usage is not grossly underestimated. public void testRamBytesUsedConstantEntryOverhead() throws IOException { + LuceneTestCase.assumeFalse("RamUsageTester does not fully work on Java 9", Constants.JRE_IS_MINIMUM_JAVA9); final LRUQueryCache queryCache = new LRUQueryCache(1000000, 10000000, context -> true); final RamUsageTester.Accumulator acc = new RamUsageTester.Accumulator() { @@ -958,6 +960,8 @@ public class TestLRUQueryCache extends LuceneTestCase { } public void testDetectMutatedQueries() throws IOException { + LuceneTestCase.assumeFalse("LUCENE-7604: For some unknown reason the non-constant BadQuery#hashCode() does not trigger ConcurrentModificationException on Java 9 b150", + Constants.JRE_IS_MINIMUM_JAVA9); Directory dir = newDirectory(); final RandomIndexWriter w = new RandomIndexWriter(random(), dir); w.addDocument(new Document()); From 56476fb8c70486c21324879f1bd1efc79df92ffb Mon Sep 17 00:00:00 2001 From: Andrzej Bialecki Date: Tue, 27 Dec 2016 11:37:50 +0100 Subject: [PATCH 31/83] SOLR-9805 Fix assertion error on Windows where SystemLoadAverage is reported as -1. Don't expose non-existent attributes. --- .../solr/metrics/OperatingSystemMetricSet.java | 16 ++++++++++++++-- .../org/apache/solr/metrics/JvmMetricsTest.java | 3 ++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/metrics/OperatingSystemMetricSet.java b/solr/core/src/java/org/apache/solr/metrics/OperatingSystemMetricSet.java index b26386a2a1d..34ef5d1c2e6 100644 --- a/solr/core/src/java/org/apache/solr/metrics/OperatingSystemMetricSet.java +++ b/solr/core/src/java/org/apache/solr/metrics/OperatingSystemMetricSet.java @@ -17,11 +17,15 @@ package org.apache.solr.metrics; import javax.management.JMException; +import javax.management.MBeanAttributeInfo; +import javax.management.MBeanInfo; import javax.management.MBeanServer; import javax.management.ObjectName; import java.lang.invoke.MethodHandles; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import com.codahale.metrics.JmxAttributeGauge; import com.codahale.metrics.Metric; @@ -67,9 +71,17 @@ public class OperatingSystemMetricSet implements MetricSet { try { final ObjectName on = new ObjectName("java.lang:type=OperatingSystem"); // verify that it exists - mBeanServer.getMBeanInfo(on); + MBeanInfo info = mBeanServer.getMBeanInfo(on); + // collect valid attributes + Set attributes = new HashSet<>(); + for (MBeanAttributeInfo ai : info.getAttributes()) { + attributes.add(ai.getName()); + } for (String metric : METRICS) { - metrics.put(metric, new JmxAttributeGauge(mBeanServer, on, metric)); + // verify that an attribute exists before attempting to add it + if (attributes.contains(metric)) { + metrics.put(metric, new JmxAttributeGauge(mBeanServer, on, metric)); + } } } catch (JMException ignored) { log.debug("Unable to load OperatingSystem MBean", ignored); diff --git a/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java b/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java index 6d597cb0056..72adc686354 100644 --- a/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java +++ b/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java @@ -48,7 +48,8 @@ public class JvmMetricsTest extends SolrJettyTestBase { continue; } double value = ((Number)gauge.getValue()).doubleValue(); - assertTrue(value >= 0); + // SystemLoadAverage on Windows may be -1.0 + assertTrue("unexpected value of " + metric + ": " + value, value >= 0 || value == -1.0); } } From 254473bf33ee7ce33a47c9229396902e812736e5 Mon Sep 17 00:00:00 2001 From: Shalin Shekhar Mangar Date: Tue, 27 Dec 2016 19:12:24 +0530 Subject: [PATCH 32/83] SOLR-9877: Use instrumented http client and connection pool --- solr/CHANGES.txt | 2 + .../org/apache/solr/core/CoreContainer.java | 8 +- .../component/HttpShardHandlerFactory.java | 65 ++++++++- .../solr/update/UpdateShardHandler.java | 65 ++++++++- .../InstrumentedHttpRequestExecutor.java | 125 ++++++++++++++++++ ...tedPoolingHttpClientConnectionManager.java | 113 ++++++++++++++++ .../client/solrj/impl/HttpClientUtil.java | 35 +++-- 7 files changed, 388 insertions(+), 25 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/util/stats/InstrumentedHttpRequestExecutor.java create mode 100644 solr/core/src/java/org/apache/solr/util/stats/InstrumentedPoolingHttpClientConnectionManager.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 55aeb93e7de..fa8da6e3cef 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -195,6 +195,8 @@ New Features * SOLR-9725: Substitute properties into JdbcDataSource configuration ( Jamie Jackson, Yuri Sashevsky via Mikhail Khludnev) +* SOLR-9877: Use instrumented http client and connection pool. (shalin) + Optimizations ---------------------- * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java index 6e640bcf9c9..f3747dcb9f6 100644 --- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java +++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java @@ -460,10 +460,16 @@ public class CoreContainer { } } + metricManager = new SolrMetricManager(); shardHandlerFactory = ShardHandlerFactory.newInstance(cfg.getShardHandlerFactoryPluginInfo(), loader); + if (shardHandlerFactory instanceof SolrMetricProducer) { + SolrMetricProducer metricProducer = (SolrMetricProducer) shardHandlerFactory; + metricProducer.initializeMetrics(metricManager, SolrInfoMBean.Group.http.toString(), "httpShardHandler"); + } updateShardHandler = new UpdateShardHandler(cfg.getUpdateShardHandlerConfig()); + updateShardHandler.initializeMetrics(metricManager, SolrInfoMBean.Group.http.toString(), "updateShardHandler"); solrCores.allocateLazyCores(cfg.getTransientCacheSize(), loader); @@ -476,8 +482,6 @@ public class CoreContainer { MDCLoggingContext.setNode(this); - metricManager = new SolrMetricManager(); - securityConfHandler = isZooKeeperAware() ? new SecurityConfHandlerZk(this) : new SecurityConfHandlerLocal(this); reloadSecurityProperties(); this.backupRepoFactory = new BackupRepositoryFactory(cfg.getBackupRepositoryPlugins()); diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java index e910443ea47..3c01720c6bb 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java +++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java @@ -35,14 +35,21 @@ import org.apache.solr.common.util.StrUtils; import org.apache.solr.common.util.URLUtil; import org.apache.solr.core.CoreDescriptor; import org.apache.solr.core.PluginInfo; +import org.apache.solr.metrics.SolrMetricManager; +import org.apache.solr.metrics.SolrMetricProducer; import org.apache.solr.update.UpdateShardHandlerConfig; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.util.DefaultSolrThreadFactory; +import org.apache.solr.util.stats.InstrumentedHttpRequestExecutor; +import org.apache.solr.util.stats.InstrumentedPoolingHttpClientConnectionManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collection; import java.util.Arrays; import java.util.Comparator; import java.util.List; @@ -56,7 +63,7 @@ import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.apache.solr.util.plugin.PluginInfoInitialized { +public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.apache.solr.util.plugin.PluginInfoInitialized, SolrMetricProducer { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final String DEFAULT_SCHEME = "http"; @@ -74,7 +81,9 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org. new DefaultSolrThreadFactory("httpShardExecutor") ); + protected InstrumentedPoolingHttpClientConnectionManager clientConnectionManager; protected CloseableHttpClient defaultClient; + protected InstrumentedHttpRequestExecutor httpRequestExecutor; private LBHttpSolrClient loadbalancer; //default values: int soTimeout = UpdateShardHandlerConfig.DEFAULT_DISTRIBUPDATESOTIMEOUT; @@ -169,12 +178,12 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org. ); ModifiableSolrParams clientParams = getClientParams(); - - this.defaultClient = HttpClientUtil.createClient(clientParams); - + httpRequestExecutor = new InstrumentedHttpRequestExecutor(); + clientConnectionManager = new InstrumentedPoolingHttpClientConnectionManager(HttpClientUtil.getSchemaRegisteryProvider().getSchemaRegistry()); + this.defaultClient = HttpClientUtil.createClient(clientParams, clientConnectionManager, false, httpRequestExecutor); this.loadbalancer = createLoadbalancer(defaultClient); } - + protected ModifiableSolrParams getClientParams() { ModifiableSolrParams clientParams = new ModifiableSolrParams(); clientParams.set(HttpClientUtil.PROP_MAX_CONNECTIONS_PER_HOST, maxConnectionsPerHost); @@ -219,6 +228,9 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org. if (defaultClient != null) { HttpClientUtil.close(defaultClient); } + if (clientConnectionManager != null) { + clientConnectionManager.close(); + } } } } @@ -350,4 +362,47 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org. return url; } + + @Override + public String getName() { + return this.getClass().getName(); + } + + @Override + public String getVersion() { + return getClass().getPackage().getSpecificationVersion(); + } + + @Override + public Collection initializeMetrics(SolrMetricManager manager, String registry, String scope) { + List metricNames = new ArrayList<>(4); + metricNames.addAll(clientConnectionManager.initializeMetrics(manager, registry, scope)); + metricNames.addAll(httpRequestExecutor.initializeMetrics(manager, registry, scope)); + return metricNames; + } + + @Override + public String getDescription() { + return "Metrics tracked by HttpShardHandlerFactory for distributed query requests"; + } + + @Override + public Category getCategory() { + return Category.OTHER; + } + + @Override + public String getSource() { + return null; + } + + @Override + public URL[] getDocs() { + return new URL[0]; + } + + @Override + public NamedList getStatistics() { + return null; + } } diff --git a/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java b/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java index 35096e53224..c3ed8cd14e7 100644 --- a/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java +++ b/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java @@ -17,6 +17,10 @@ package org.apache.solr.update; import java.lang.invoke.MethodHandles; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; import java.util.concurrent.ExecutorService; import org.apache.http.client.HttpClient; @@ -27,11 +31,16 @@ import org.apache.solr.cloud.RecoveryStrategy; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.ExecutorUtil; +import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SolrjNamedThreadFactory; +import org.apache.solr.metrics.SolrMetricManager; +import org.apache.solr.metrics.SolrMetricProducer; +import org.apache.solr.util.stats.InstrumentedHttpRequestExecutor; +import org.apache.solr.util.stats.InstrumentedPoolingHttpClientConnectionManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class UpdateShardHandler { +public class UpdateShardHandler implements SolrMetricProducer { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @@ -50,10 +59,12 @@ public class UpdateShardHandler { private final CloseableHttpClient client; - private final PoolingHttpClientConnectionManager clientConnectionManager; + private final InstrumentedPoolingHttpClientConnectionManager clientConnectionManager; + + private final InstrumentedHttpRequestExecutor httpRequestExecutor; public UpdateShardHandler(UpdateShardHandlerConfig cfg) { - clientConnectionManager = new PoolingHttpClientConnectionManager(HttpClientUtil.getSchemaRegisteryProvider().getSchemaRegistry()); + clientConnectionManager = new InstrumentedPoolingHttpClientConnectionManager(HttpClientUtil.getSchemaRegisteryProvider().getSchemaRegistry()); if (cfg != null ) { clientConnectionManager.setMaxTotal(cfg.getMaxUpdateConnections()); clientConnectionManager.setDefaultMaxPerRoute(cfg.getMaxUpdateConnectionsPerHost()); @@ -64,7 +75,8 @@ public class UpdateShardHandler { clientParams.set(HttpClientUtil.PROP_SO_TIMEOUT, cfg.getDistributedSocketTimeout()); clientParams.set(HttpClientUtil.PROP_CONNECTION_TIMEOUT, cfg.getDistributedConnectionTimeout()); } - client = HttpClientUtil.createClient(clientParams, clientConnectionManager); + httpRequestExecutor = new InstrumentedHttpRequestExecutor(); + client = HttpClientUtil.createClient(clientParams, clientConnectionManager, false, httpRequestExecutor); // following is done only for logging complete configuration. // The maxConnections and maxConnectionsPerHost have already been specified on the connection manager @@ -74,7 +86,50 @@ public class UpdateShardHandler { } log.debug("Created UpdateShardHandler HTTP client with params: {}", clientParams); } - + + @Override + public String getName() { + return this.getClass().getName(); + } + + @Override + public String getVersion() { + return getClass().getPackage().getSpecificationVersion(); + } + + @Override + public Collection initializeMetrics(SolrMetricManager manager, String registry, String scope) { + List metricNames = new ArrayList<>(4); + metricNames.addAll(clientConnectionManager.initializeMetrics(manager, registry, scope)); + metricNames.addAll(httpRequestExecutor.initializeMetrics(manager, registry, scope)); + return metricNames; + } + + @Override + public String getDescription() { + return "Metrics tracked by UpdateShardHandler for "; + } + + @Override + public Category getCategory() { + return null; + } + + @Override + public String getSource() { + return null; + } + + @Override + public URL[] getDocs() { + return new URL[0]; + } + + @Override + public NamedList getStatistics() { + return null; + } + public HttpClient getHttpClient() { return client; } diff --git a/solr/core/src/java/org/apache/solr/util/stats/InstrumentedHttpRequestExecutor.java b/solr/core/src/java/org/apache/solr/util/stats/InstrumentedHttpRequestExecutor.java new file mode 100644 index 00000000000..946a822d15a --- /dev/null +++ b/solr/core/src/java/org/apache/solr/util/stats/InstrumentedHttpRequestExecutor.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util.stats; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.Collection; +import java.util.Collections; +import java.util.Locale; + +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Timer; +import org.apache.http.HttpClientConnection; +import org.apache.http.HttpException; +import org.apache.http.HttpRequest; +import org.apache.http.HttpResponse; +import org.apache.http.RequestLine; +import org.apache.http.client.methods.HttpRequestWrapper; +import org.apache.http.client.utils.URIBuilder; +import org.apache.http.protocol.HttpContext; +import org.apache.http.protocol.HttpRequestExecutor; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.metrics.SolrMetricManager; +import org.apache.solr.metrics.SolrMetricProducer; + +/** + * Sub-class of HttpRequestExecutor which tracks metrics interesting to solr + * Inspired and partially copied from dropwizard httpclient library + */ +public class InstrumentedHttpRequestExecutor extends HttpRequestExecutor implements SolrMetricProducer { + protected MetricRegistry metricsRegistry; + protected String scope; + + private static String methodNameString(HttpRequest request) { + return request.getRequestLine().getMethod().toLowerCase(Locale.ROOT) + "-requests"; + } + + @Override + public HttpResponse execute(HttpRequest request, HttpClientConnection conn, HttpContext context) throws IOException, HttpException { + assert metricsRegistry != null; + final Timer.Context timerContext = timer(request).time(); + try { + return super.execute(request, conn, context); + } finally { + timerContext.stop(); + } + } + + private Timer timer(HttpRequest request) { + return metricsRegistry.timer(getNameFor(request)); + } + + @Override + public String getName() { + return this.getClass().getName(); + } + + @Override + public String getVersion() { + return getClass().getPackage().getSpecificationVersion(); + } + + @Override + public Collection initializeMetrics(SolrMetricManager manager, String registry, String scope) { + this.metricsRegistry = manager.registry(registry); + this.scope = scope; + return Collections.emptyList(); // we do not know the names of the metrics yet + } + + @Override + public String getDescription() { + return null; + } + + @Override + public Category getCategory() { + return Category.OTHER; + } + + @Override + public String getSource() { + return null; + } + + @Override + public URL[] getDocs() { + return null; + } + + @Override + public NamedList getStatistics() { + return null; + } + + private String getNameFor(HttpRequest request) { + try { + final RequestLine requestLine = request.getRequestLine(); + String schemeHostPort = null; + if (request instanceof HttpRequestWrapper) { + HttpRequestWrapper wrapper = (HttpRequestWrapper) request; + schemeHostPort = wrapper.getTarget().getSchemeName() + "://" + wrapper.getTarget().getHostName() + ":" + wrapper.getTarget().getPort(); + } + final URIBuilder url = new URIBuilder(requestLine.getUri()); + return SolrMetricManager.mkName((schemeHostPort != null ? schemeHostPort : "") + url.removeQuery().build().toString() + "." + methodNameString(request), scope); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/util/stats/InstrumentedPoolingHttpClientConnectionManager.java b/solr/core/src/java/org/apache/solr/util/stats/InstrumentedPoolingHttpClientConnectionManager.java new file mode 100644 index 00000000000..08b68cb3802 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/util/stats/InstrumentedPoolingHttpClientConnectionManager.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util.stats; + +import java.net.URL; +import java.util.Arrays; +import java.util.Collection; + +import com.codahale.metrics.Gauge; +import com.codahale.metrics.MetricRegistry; +import org.apache.http.config.Registry; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.metrics.SolrMetricManager; +import org.apache.solr.metrics.SolrMetricProducer; + +/** + * Sub-class of PoolingHttpClientConnectionManager which tracks metrics interesting to Solr. + * Inspired by dropwizard metrics-httpclient library implementation. + */ +public class InstrumentedPoolingHttpClientConnectionManager extends PoolingHttpClientConnectionManager implements SolrMetricProducer { + + protected MetricRegistry metricsRegistry; + + public InstrumentedPoolingHttpClientConnectionManager(Registry socketFactoryRegistry) { + super(socketFactoryRegistry); + } + + public MetricRegistry getMetricsRegistry() { + return metricsRegistry; + } + + public void setMetricsRegistry(MetricRegistry metricRegistry) { + this.metricsRegistry = metricRegistry; + } + + @Override + public String getName() { + return this.getClass().getName(); + } + + @Override + public String getVersion() { + return getClass().getPackage().getSpecificationVersion(); + } + + @Override + public Collection initializeMetrics(SolrMetricManager manager, String registry, String scope) { + this.metricsRegistry = manager.registry(registry); + metricsRegistry.register(SolrMetricManager.mkName("availableConnections", scope), + (Gauge) () -> { + // this acquires a lock on the connection pool; remove if contention sucks + return getTotalStats().getAvailable(); + }); + metricsRegistry.register(SolrMetricManager.mkName("leasedConnections", scope), + (Gauge) () -> { + // this acquires a lock on the connection pool; remove if contention sucks + return getTotalStats().getLeased(); + }); + metricsRegistry.register(SolrMetricManager.mkName("maxConnections", scope), + (Gauge) () -> { + // this acquires a lock on the connection pool; remove if contention sucks + return getTotalStats().getMax(); + }); + metricsRegistry.register(SolrMetricManager.mkName("pendingConnections", scope), + (Gauge) () -> { + // this acquires a lock on the connection pool; remove if contention sucks + return getTotalStats().getPending(); + }); + return Arrays.asList("availableConnections", "leasedConnections", "maxConnections", "pendingConnections"); + } + + @Override + public String getDescription() { + return ""; + } + + @Override + public Category getCategory() { + return Category.OTHER; + } + + @Override + public String getSource() { + return null; + } + + @Override + public URL[] getDocs() { + return null; + } + + @Override + public NamedList getStatistics() { + return null; + } +} diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java index d4dea17af24..decd5e8efda 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java @@ -55,6 +55,7 @@ import org.apache.http.impl.client.DefaultHttpRequestRetryHandler; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.protocol.HttpContext; +import org.apache.http.protocol.HttpRequestExecutor; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.ObjectReleaseTracker; @@ -213,22 +214,18 @@ public class HttpClientUtil { return createClient(params, cm, false); } - - /** - * Creates new http client by using the provided configuration. - * - */ - public static CloseableHttpClient createClient(final SolrParams params, PoolingHttpClientConnectionManager cm, boolean sharedConnectionManager) { + + public static CloseableHttpClient createClient(final SolrParams params, PoolingHttpClientConnectionManager cm, boolean sharedConnectionManager, HttpRequestExecutor httpRequestExecutor) { final ModifiableSolrParams config = new ModifiableSolrParams(params); if (logger.isDebugEnabled()) { logger.debug("Creating new http client, config:" + config); } - + cm.setMaxTotal(params.getInt(HttpClientUtil.PROP_MAX_CONNECTIONS, 10000)); cm.setDefaultMaxPerRoute(params.getInt(HttpClientUtil.PROP_MAX_CONNECTIONS_PER_HOST, 10000)); cm.setValidateAfterInactivity(Integer.getInteger(VALIDATE_AFTER_INACTIVITY, VALIDATE_AFTER_INACTIVITY_DEFAULT)); - + HttpClientBuilder newHttpClientBuilder = HttpClientBuilder.create(); if (sharedConnectionManager) { @@ -236,7 +233,7 @@ public class HttpClientUtil { } else { newHttpClientBuilder.setConnectionManagerShared(false); } - + ConnectionKeepAliveStrategy keepAliveStrat = new ConnectionKeepAliveStrategy() { @Override public long getKeepAliveDuration(HttpResponse response, HttpContext context) { @@ -256,18 +253,30 @@ public class HttpClientUtil { } newHttpClientBuilder.addInterceptorLast(new DynamicInterceptor()); - + newHttpClientBuilder = newHttpClientBuilder.setKeepAliveStrategy(keepAliveStrat) .evictIdleConnections((long) Integer.getInteger(EVICT_IDLE_CONNECTIONS, EVICT_IDLE_CONNECTIONS_DEFAULT), TimeUnit.MILLISECONDS); - + + if (httpRequestExecutor != null) { + newHttpClientBuilder.setRequestExecutor(httpRequestExecutor); + } + HttpClientBuilder builder = setupBuilder(newHttpClientBuilder, params); - + HttpClient httpClient = builder.setConnectionManager(cm).build(); - + assert ObjectReleaseTracker.track(httpClient); return (CloseableHttpClient) httpClient; } + /** + * Creates new http client by using the provided configuration. + * + */ + public static CloseableHttpClient createClient(final SolrParams params, PoolingHttpClientConnectionManager cm, boolean sharedConnectionManager) { + return createClient(params, cm, sharedConnectionManager, null); + } + private static HttpClientBuilder setupBuilder(HttpClientBuilder builder, SolrParams config) { Builder requestConfigBuilder = RequestConfig.custom() From 9d606d8ff058b5a66b53b9255317c139c53effc7 Mon Sep 17 00:00:00 2001 From: Andrzej Bialecki Date: Tue, 27 Dec 2016 19:52:06 +0100 Subject: [PATCH 33/83] SOLR-9880: Add Ganglia, Graphite and SLF4J metrics reporters. --- lucene/ivy-versions.properties | 3 + solr/CHANGES.txt | 2 + .../reporters/SolrGangliaReporter.java | 144 +++++++++++++ .../reporters/SolrGraphiteReporter.java | 129 +++++++++++ .../metrics/reporters/SolrSlf4jReporter.java | 127 +++++++++++ .../test-files/solr/solr-gangliareporter.xml | 32 +++ .../test-files/solr/solr-graphitereporter.xml | 31 +++ .../test-files/solr/solr-slf4jreporter.xml | 35 +++ .../reporters/SolrGangliaReporterTest.java | 81 +++++++ .../reporters/SolrGraphiteReporterTest.java | 115 ++++++++++ .../reporters/SolrSlf4jReporterTest.java | 77 +++++++ solr/licenses/gmetric4j-1.0.7.jar.sha1 | 1 + solr/licenses/gmetric4j-LICENSE-BSD.txt | 31 +++ solr/licenses/gmetric4j-NOTICE.txt | 0 solr/licenses/metrics-ganglia-3.1.2.jar.sha1 | 1 + solr/licenses/metrics-ganglia-LICENSE-ASL.txt | 203 ++++++++++++++++++ solr/licenses/metrics-ganglia-NOTICE.txt | 12 ++ solr/licenses/metrics-graphite-3.1.2.jar.sha1 | 1 + .../licenses/metrics-graphite-LICENSE-ASL.txt | 203 ++++++++++++++++++ solr/licenses/metrics-graphite-NOTICE.txt | 12 ++ solr/server/ivy.xml | 3 + 21 files changed, 1243 insertions(+) create mode 100644 solr/core/src/java/org/apache/solr/metrics/reporters/SolrGangliaReporter.java create mode 100644 solr/core/src/java/org/apache/solr/metrics/reporters/SolrGraphiteReporter.java create mode 100644 solr/core/src/java/org/apache/solr/metrics/reporters/SolrSlf4jReporter.java create mode 100644 solr/core/src/test-files/solr/solr-gangliareporter.xml create mode 100644 solr/core/src/test-files/solr/solr-graphitereporter.xml create mode 100644 solr/core/src/test-files/solr/solr-slf4jreporter.xml create mode 100644 solr/core/src/test/org/apache/solr/metrics/reporters/SolrGangliaReporterTest.java create mode 100644 solr/core/src/test/org/apache/solr/metrics/reporters/SolrGraphiteReporterTest.java create mode 100644 solr/core/src/test/org/apache/solr/metrics/reporters/SolrSlf4jReporterTest.java create mode 100644 solr/licenses/gmetric4j-1.0.7.jar.sha1 create mode 100644 solr/licenses/gmetric4j-LICENSE-BSD.txt create mode 100644 solr/licenses/gmetric4j-NOTICE.txt create mode 100644 solr/licenses/metrics-ganglia-3.1.2.jar.sha1 create mode 100644 solr/licenses/metrics-ganglia-LICENSE-ASL.txt create mode 100644 solr/licenses/metrics-ganglia-NOTICE.txt create mode 100644 solr/licenses/metrics-graphite-3.1.2.jar.sha1 create mode 100644 solr/licenses/metrics-graphite-LICENSE-ASL.txt create mode 100644 solr/licenses/metrics-graphite-NOTICE.txt diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties index 2f44f7e47bd..297d46c0123 100644 --- a/lucene/ivy-versions.properties +++ b/lucene/ivy-versions.properties @@ -71,10 +71,13 @@ com.sun.jersey.version = 1.9 /de.l3s.boilerpipe/boilerpipe = 1.1.0 /dom4j/dom4j = 1.6.1 /hsqldb/hsqldb = 1.8.0.10 +/info.ganglia.gmetric4j/gmetric4j = 1.0.7 /io.airlift/slice = 0.10 io.dropwizard.metrics.version = 3.1.2 /io.dropwizard.metrics/metrics-core = ${io.dropwizard.metrics.version} +/io.dropwizard.metrics/metrics-ganglia = ${io.dropwizard.metrics.version} +/io.dropwizard.metrics/metrics-graphite = ${io.dropwizard.metrics.version} /io.dropwizard.metrics/metrics-healthchecks = ${io.dropwizard.metrics.version} /io.dropwizard.metrics/metrics-jetty9 = ${io.dropwizard.metrics.version} /io.dropwizard.metrics/metrics-jvm = ${io.dropwizard.metrics.version} diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index fa8da6e3cef..f587109a48a 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -197,6 +197,8 @@ New Features * SOLR-9877: Use instrumented http client and connection pool. (shalin) +* SOLR-9880: Add Ganglia, Graphite and SLF4J metrics reporters. (ab) + Optimizations ---------------------- * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have diff --git a/solr/core/src/java/org/apache/solr/metrics/reporters/SolrGangliaReporter.java b/solr/core/src/java/org/apache/solr/metrics/reporters/SolrGangliaReporter.java new file mode 100644 index 00000000000..45561e58b58 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/metrics/reporters/SolrGangliaReporter.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.metrics.reporters; + +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +import com.codahale.metrics.MetricFilter; +import com.codahale.metrics.ganglia.GangliaReporter; +import info.ganglia.gmetric4j.gmetric.GMetric; +import org.apache.solr.metrics.SolrMetricManager; +import org.apache.solr.metrics.SolrMetricReporter; + +/** + * + */ +public class SolrGangliaReporter extends SolrMetricReporter { + + private String host = null; + private int port = -1; + private boolean multicast; + private int period = 60; + private String instancePrefix = null; + private String filterPrefix = null; + private boolean testing; + private GangliaReporter reporter; + + // for unit tests + GMetric ganglia = null; + + /** + * Create a Ganglia reporter for metrics managed in a named registry. + * + * @param metricManager metric manager instance that manages the selected registry + * @param registryName registry to use, one of registries managed by + * {@link SolrMetricManager} + */ + public SolrGangliaReporter(SolrMetricManager metricManager, String registryName) { + super(metricManager, registryName); + } + + public void setHost(String host) { + this.host = host; + } + + public void setPort(int port) { + this.port = port; + } + + public void setPrefix(String prefix) { + this.instancePrefix = prefix; + } + + public void setFilter(String filter) { + this.filterPrefix = filter; + } + + + public void setPeriod(int period) { + this.period = period; + } + + public void setMulticast(boolean multicast) { + this.multicast = multicast; + } + + // only for unit tests! + public void setTesting(boolean testing) { + this.testing = testing; + } + + void setGMetric(GMetric ganglia) { + this.ganglia = ganglia; + } + + @Override + protected void validate() throws IllegalStateException { + if (host == null) { + throw new IllegalStateException("Init argument 'host' must be set to a valid Ganglia server name."); + } + if (port == -1) { + throw new IllegalStateException("Init argument 'port' must be set to a valid Ganglia server port."); + } + if (period < 1) { + throw new IllegalStateException("Init argument 'period' is in time unit 'seconds' and must be at least 1."); + } + if (!testing) { + start(); + } + } + + //this is a separate method for unit tests + void start() { + if (!testing) { + try { + ganglia = new GMetric(host, port, + multicast ? GMetric.UDPAddressingMode.MULTICAST : GMetric.UDPAddressingMode.UNICAST, + 1); + } catch (IOException ioe) { + throw new IllegalStateException("Exception connecting to Ganglia", ioe); + } + } + if (instancePrefix == null) { + instancePrefix = registryName; + } else { + instancePrefix = instancePrefix + "." + registryName; + } + GangliaReporter.Builder builder = GangliaReporter + .forRegistry(metricManager.registry(registryName)) + .convertRatesTo(TimeUnit.SECONDS) + .convertDurationsTo(TimeUnit.MILLISECONDS) + .prefixedWith(instancePrefix); + MetricFilter filter; + if (filterPrefix != null) { + filter = new SolrMetricManager.PrefixFilter(filterPrefix); + } else { + filter = MetricFilter.ALL; + } + builder = builder.filter(filter); + reporter = builder.build(ganglia); + reporter.start(period, TimeUnit.SECONDS); + } + + @Override + public void close() throws IOException { + if (reporter != null) { + reporter.close(); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/metrics/reporters/SolrGraphiteReporter.java b/solr/core/src/java/org/apache/solr/metrics/reporters/SolrGraphiteReporter.java new file mode 100644 index 00000000000..8565ce86c05 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/metrics/reporters/SolrGraphiteReporter.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.metrics.reporters; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.concurrent.TimeUnit; + +import com.codahale.metrics.MetricFilter; +import com.codahale.metrics.graphite.Graphite; +import com.codahale.metrics.graphite.GraphiteReporter; +import com.codahale.metrics.graphite.GraphiteSender; +import com.codahale.metrics.graphite.PickledGraphite; +import org.apache.solr.metrics.SolrMetricManager; +import org.apache.solr.metrics.SolrMetricReporter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Metrics reporter that wraps {@link com.codahale.metrics.graphite.GraphiteReporter}. + */ +public class SolrGraphiteReporter extends SolrMetricReporter { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private String host = null; + private int port = -1; + private int period = 60; + private boolean pickled = false; + private String instancePrefix = null; + private String filterPrefix = null; + private GraphiteReporter reporter = null; + + /** + * Create a Graphite reporter for metrics managed in a named registry. + * + * @param metricManager metric manager instance that manages the selected registry + * @param registryName registry to use, one of registries managed by + * {@link SolrMetricManager} + */ + public SolrGraphiteReporter(SolrMetricManager metricManager, String registryName) { + super(metricManager, registryName); + } + + public void setHost(String host) { + this.host = host; + } + + public void setPort(int port) { + this.port = port; + } + + public void setPrefix(String prefix) { + this.instancePrefix = prefix; + } + + public void setFilter(String filter) { + this.filterPrefix = filter; + } + + public void setPickled(boolean pickled) { + this.pickled = pickled; + } + + public void setPeriod(int period) { + this.period = period; + } + + @Override + protected void validate() throws IllegalStateException { + if (host == null) { + throw new IllegalStateException("Init argument 'host' must be set to a valid Graphite server name."); + } + if (port == -1) { + throw new IllegalStateException("Init argument 'port' must be set to a valid Graphite server port."); + } + if (reporter != null) { + throw new IllegalStateException("Already started once?"); + } + if (period < 1) { + throw new IllegalStateException("Init argument 'period' is in time unit 'seconds' and must be at least 1."); + } + final GraphiteSender graphite; + if (pickled) { + graphite = new PickledGraphite(host, port); + } else { + graphite = new Graphite(host, port); + } + if (instancePrefix == null) { + instancePrefix = registryName; + } else { + instancePrefix = instancePrefix + "." + registryName; + } + GraphiteReporter.Builder builder = GraphiteReporter + .forRegistry(metricManager.registry(registryName)) + .prefixedWith(instancePrefix) + .convertRatesTo(TimeUnit.SECONDS) + .convertDurationsTo(TimeUnit.MILLISECONDS); + MetricFilter filter; + if (filterPrefix != null) { + filter = new SolrMetricManager.PrefixFilter(filterPrefix); + } else { + filter = MetricFilter.ALL; + } + builder = builder.filter(filter); + reporter = builder.build(graphite); + reporter.start(period, TimeUnit.SECONDS); + } + + @Override + public void close() throws IOException { + if (reporter != null) { + reporter.close(); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/metrics/reporters/SolrSlf4jReporter.java b/solr/core/src/java/org/apache/solr/metrics/reporters/SolrSlf4jReporter.java new file mode 100644 index 00000000000..817dda17f94 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/metrics/reporters/SolrSlf4jReporter.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.metrics.reporters; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.concurrent.TimeUnit; + +import com.codahale.metrics.MetricFilter; +import com.codahale.metrics.Slf4jReporter; +import org.apache.solr.metrics.SolrMetricManager; +import org.apache.solr.metrics.SolrMetricReporter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Metrics reporter that wraps {@link com.codahale.metrics.Slf4jReporter}. + * The following init arguments are supported: + *
      + *
    • period: (optional, int) number of seconds between reports, default is 60,
    • + *
    • prefix: (optional, str) prefix for metric names, in addition to + * registry name. Default is none, ie. just registry name.
    • + *
    • filter: (optional, str) if not empty only metric names that start + * with this value will be reported, default is all metrics from a registry,
    • + *
    • logger: (optional, str) logger name to use. Default is the + * metrics group, eg. solr.jvm
    • + *
    + */ +public class SolrSlf4jReporter extends SolrMetricReporter { + // we need this to pass validate-source-patterns + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private int period = 60; + private String instancePrefix = null; + private String logger = null; + private String filterPrefix = null; + private Slf4jReporter reporter; + + /** + * Create a SLF4J reporter for metrics managed in a named registry. + * + * @param metricManager metric manager instance that manages the selected registry + * @param registryName registry to use, one of registries managed by + * {@link SolrMetricManager} + */ + public SolrSlf4jReporter(SolrMetricManager metricManager, String registryName) { + super(metricManager, registryName); + } + + public void setPrefix(String prefix) { + this.instancePrefix = prefix; + } + + public void setFilter(String filter) { + this.filterPrefix = filter; + } + + public void setLogger(String logger) { + this.logger = logger; + } + + public void setPeriod(int period) { + this.period = period; + } + + @Override + protected void validate() throws IllegalStateException { + if (period < 1) { + throw new IllegalStateException("Init argument 'period' is in time unit 'seconds' and must be at least 1."); + } + if (instancePrefix == null) { + instancePrefix = registryName; + } else { + instancePrefix = instancePrefix + "." + registryName; + } + Slf4jReporter.Builder builder = Slf4jReporter + .forRegistry(metricManager.registry(registryName)) + .convertRatesTo(TimeUnit.SECONDS) + .convertDurationsTo(TimeUnit.MILLISECONDS); + + MetricFilter filter; + if (filterPrefix != null) { + filter = new SolrMetricManager.PrefixFilter(filterPrefix); + } else { + filter = MetricFilter.ALL; + } + builder = builder.filter(filter); + if (logger == null || logger.isEmpty()) { + // construct logger name from Group + if (pluginInfo.attributes.containsKey("group")) { + logger = SolrMetricManager.overridableRegistryName(pluginInfo.attributes.get("group")); + } else if (pluginInfo.attributes.containsKey("registry")) { + String reg = SolrMetricManager.overridableRegistryName(pluginInfo.attributes.get("registry")); + String[] names = reg.split("\\."); + if (names.length < 2) { + logger = reg; + } else { + logger = names[0] + "." + names[1]; + } + } + } + builder = builder.outputTo(LoggerFactory.getLogger(logger)); + reporter = builder.build(); + reporter.start(period, TimeUnit.SECONDS); + } + + @Override + public void close() throws IOException { + if (reporter != null) { + reporter.close(); + } + } +} diff --git a/solr/core/src/test-files/solr/solr-gangliareporter.xml b/solr/core/src/test-files/solr/solr-gangliareporter.xml new file mode 100644 index 00000000000..9e7233c9238 --- /dev/null +++ b/solr/core/src/test-files/solr/solr-gangliareporter.xml @@ -0,0 +1,32 @@ + + + + + + + localhost + 10000 + + 1 + test + cores + + true + + + diff --git a/solr/core/src/test-files/solr/solr-graphitereporter.xml b/solr/core/src/test-files/solr/solr-graphitereporter.xml new file mode 100644 index 00000000000..a0557df2e43 --- /dev/null +++ b/solr/core/src/test-files/solr/solr-graphitereporter.xml @@ -0,0 +1,31 @@ + + + + + + + localhost + ${mock-graphite-port} + + 1 + test + cores + false + + + diff --git a/solr/core/src/test-files/solr/solr-slf4jreporter.xml b/solr/core/src/test-files/solr/solr-slf4jreporter.xml new file mode 100644 index 00000000000..1a084161012 --- /dev/null +++ b/solr/core/src/test-files/solr/solr-slf4jreporter.xml @@ -0,0 +1,35 @@ + + + + + + + + 1 + test + cores + + + + 1 + test + cores + foobar + + + diff --git a/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGangliaReporterTest.java b/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGangliaReporterTest.java new file mode 100644 index 00000000000..b5b0f858395 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGangliaReporterTest.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.metrics.reporters; + +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import info.ganglia.gmetric4j.gmetric.GMetric; +import info.ganglia.gmetric4j.gmetric.GMetricSlope; +import info.ganglia.gmetric4j.gmetric.GMetricType; +import org.apache.commons.io.FileUtils; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.core.CoreContainer; +import org.apache.solr.core.NodeConfig; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.core.SolrXmlConfig; +import org.apache.solr.metrics.SolrMetricManager; +import org.apache.solr.metrics.SolrMetricReporter; +import org.apache.solr.util.TestHarness; +import org.junit.Test; + +import static org.mockito.Mockito.*; + +/** + * + */ +public class SolrGangliaReporterTest extends SolrTestCaseJ4 { + @Test + public void testReporter() throws Exception { + Path home = Paths.get(TEST_HOME()); + // define these properties, they are used in solrconfig.xml + System.setProperty("solr.test.sys.prop1", "propone"); + System.setProperty("solr.test.sys.prop2", "proptwo"); + + GMetric ganglia = mock(GMetric.class); + final List names = new ArrayList<>(); + doAnswer(invocation -> { + final Object[] args = invocation.getArguments(); + names.add((String)args[0]); + return null; + }).when(ganglia).announce(anyString(), anyString(), any(GMetricType.class), anyString(), any(GMetricSlope.class), anyInt(), anyInt(), anyString()); + String solrXml = FileUtils.readFileToString(Paths.get(home.toString(), "solr-gangliareporter.xml").toFile(), "UTF-8"); + NodeConfig cfg = SolrXmlConfig.fromString(new SolrResourceLoader(home), solrXml); + CoreContainer cc = createCoreContainer(cfg, + new TestHarness.TestCoresLocator(DEFAULT_TEST_CORENAME, initCoreDataDir.getAbsolutePath(), "solrconfig.xml", "schema.xml")); + h.coreName = DEFAULT_TEST_CORENAME; + SolrMetricManager metricManager = cc.getMetricManager(); + Map reporters = metricManager.getReporters("solr.node"); + assertEquals(1, reporters.size()); + SolrMetricReporter reporter = reporters.get("test"); + assertNotNull(reporter); + assertTrue(reporter instanceof SolrGangliaReporter); + SolrGangliaReporter gangliaReporter = (SolrGangliaReporter)reporter; + gangliaReporter.setGMetric(ganglia); + gangliaReporter.start(); + Thread.sleep(5000); + assertTrue(names.size() >= 3); + for (String name : names) { + assertTrue(name, name.startsWith("test.solr.node.cores.")); + } + } + +} diff --git a/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGraphiteReporterTest.java b/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGraphiteReporterTest.java new file mode 100644 index 00000000000..6773e0ca0e3 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGraphiteReporterTest.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.metrics.reporters; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.net.ServerSocket; +import java.net.Socket; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.commons.io.FileUtils; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.core.CoreContainer; +import org.apache.solr.core.NodeConfig; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.core.SolrXmlConfig; +import org.apache.solr.metrics.SolrMetricManager; +import org.apache.solr.metrics.SolrMetricReporter; +import org.apache.solr.util.TestHarness; +import org.junit.Test; + +/** + * + */ +public class SolrGraphiteReporterTest extends SolrTestCaseJ4 { + + @Test + public void testReporter() throws Exception { + Path home = Paths.get(TEST_HOME()); + // define these properties, they are used in solrconfig.xml + System.setProperty("solr.test.sys.prop1", "propone"); + System.setProperty("solr.test.sys.prop2", "proptwo"); + + MockGraphite mock = new MockGraphite(); + try { + mock.start(); + Thread.sleep(1000); + // define the port where MockGraphite is running + System.setProperty("mock-graphite-port", String.valueOf(mock.port)); + String solrXml = FileUtils.readFileToString(Paths.get(home.toString(), "solr-graphitereporter.xml").toFile(), "UTF-8"); + NodeConfig cfg = SolrXmlConfig.fromString(new SolrResourceLoader(home), solrXml); + CoreContainer cc = createCoreContainer(cfg, + new TestHarness.TestCoresLocator(DEFAULT_TEST_CORENAME, initCoreDataDir.getAbsolutePath(), "solrconfig.xml", "schema.xml")); + h.coreName = DEFAULT_TEST_CORENAME; + SolrMetricManager metricManager = cc.getMetricManager(); + Map reporters = metricManager.getReporters("solr.node"); + assertEquals(1, reporters.size()); + SolrMetricReporter reporter = reporters.get("test"); + assertNotNull(reporter); + assertTrue(reporter instanceof SolrGraphiteReporter); + Thread.sleep(5000); + assertTrue(mock.lines.size() >= 3); + for (String line : mock.lines) { + assertTrue(line, line.startsWith("test.solr.node.cores.")); + } + } finally { + mock.close(); + } + } + + private class MockGraphite extends Thread { + private List lines = new ArrayList<>(); + private ServerSocket server = null; + private int port; + private boolean stop; + + MockGraphite() throws Exception { + server = new ServerSocket(0); + port = server.getLocalPort(); + stop = false; + } + + public void run() { + while (!stop) { + try { + Socket s = server.accept(); + BufferedReader br = new BufferedReader(new InputStreamReader(s.getInputStream(), "UTF-8")); + String line; + while ((line = br.readLine()) != null) { + lines.add(line); + } + } catch (Exception e) { + stop = true; + } + } + } + + public void close() throws Exception { + stop = true; + if (server != null) { + server.close(); + } + } + } + +} diff --git a/solr/core/src/test/org/apache/solr/metrics/reporters/SolrSlf4jReporterTest.java b/solr/core/src/test/org/apache/solr/metrics/reporters/SolrSlf4jReporterTest.java new file mode 100644 index 00000000000..47bf8e7216f --- /dev/null +++ b/solr/core/src/test/org/apache/solr/metrics/reporters/SolrSlf4jReporterTest.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.metrics.reporters; + +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Map; + +import org.apache.commons.io.FileUtils; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.core.CoreContainer; +import org.apache.solr.core.NodeConfig; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.core.SolrXmlConfig; +import org.apache.solr.logging.LogWatcher; +import org.apache.solr.logging.LogWatcherConfig; +import org.apache.solr.metrics.SolrMetricManager; +import org.apache.solr.metrics.SolrMetricReporter; +import org.apache.solr.util.TestHarness; +import org.junit.Test; + +/** + * + */ +public class SolrSlf4jReporterTest extends SolrTestCaseJ4 { + + @Test + public void testReporter() throws Exception { + LogWatcherConfig watcherCfg = new LogWatcherConfig(true, null, null, 100); + LogWatcher watcher = LogWatcher.newRegisteredLogWatcher(watcherCfg, null); + watcher.setThreshold("INFO"); + Path home = Paths.get(TEST_HOME()); + // define these properties, they are used in solrconfig.xml + System.setProperty("solr.test.sys.prop1", "propone"); + System.setProperty("solr.test.sys.prop2", "proptwo"); + + String solrXml = FileUtils.readFileToString(Paths.get(home.toString(), "solr-slf4jreporter.xml").toFile(), "UTF-8"); + NodeConfig cfg = SolrXmlConfig.fromString(new SolrResourceLoader(home), solrXml); + CoreContainer cc = createCoreContainer(cfg, + new TestHarness.TestCoresLocator(DEFAULT_TEST_CORENAME, initCoreDataDir.getAbsolutePath(), "solrconfig.xml", "schema.xml")); + h.coreName = DEFAULT_TEST_CORENAME; + SolrMetricManager metricManager = cc.getMetricManager(); + Map reporters = metricManager.getReporters("solr.node"); + assertEquals(2, reporters.size()); + SolrMetricReporter reporter = reporters.get("test1"); + assertNotNull(reporter); + assertTrue(reporter instanceof SolrSlf4jReporter); + reporter = reporters.get("test2"); + assertNotNull(reporter); + assertTrue(reporter instanceof SolrSlf4jReporter); + + watcher.reset(); + Thread.sleep(5000); + + SolrDocumentList history = watcher.getHistory(-1, null); + // dot-separated names are treated like class names and collapsed + // in regular log output, but here we get the full name + assertTrue(history.stream().filter(d -> "solr.node".equals(d.getFirstValue("logger"))).count() > 0); + assertTrue(history.stream().filter(d -> "foobar".equals(d.getFirstValue("logger"))).count() > 0); + } +} diff --git a/solr/licenses/gmetric4j-1.0.7.jar.sha1 b/solr/licenses/gmetric4j-1.0.7.jar.sha1 new file mode 100644 index 00000000000..ca2b05fe514 --- /dev/null +++ b/solr/licenses/gmetric4j-1.0.7.jar.sha1 @@ -0,0 +1 @@ +37a1cb0d8821cad9bd33f1ce454459fed18efa44 diff --git a/solr/licenses/gmetric4j-LICENSE-BSD.txt b/solr/licenses/gmetric4j-LICENSE-BSD.txt new file mode 100644 index 00000000000..bdb2cf4ea6e --- /dev/null +++ b/solr/licenses/gmetric4j-LICENSE-BSD.txt @@ -0,0 +1,31 @@ +Title: gmetric4j + +Copyright: + +Copyright (C) 2012 Daniel Pocock +Copyright (c) 2008-2011 Jasper Humphrey + +Based on: jmxetric by Jasper Humphrey (BSD style license) + +License: BSD terms + + Copyright (C) 2010-2012 Daniel Pocock + Copyright (c) 2008-2011 Jasper Humphrey + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/solr/licenses/gmetric4j-NOTICE.txt b/solr/licenses/gmetric4j-NOTICE.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/solr/licenses/metrics-ganglia-3.1.2.jar.sha1 b/solr/licenses/metrics-ganglia-3.1.2.jar.sha1 new file mode 100644 index 00000000000..337754bf00a --- /dev/null +++ b/solr/licenses/metrics-ganglia-3.1.2.jar.sha1 @@ -0,0 +1 @@ +2a4e2fcd6436f9b1771f0f9b6bab445dddcf704f diff --git a/solr/licenses/metrics-ganglia-LICENSE-ASL.txt b/solr/licenses/metrics-ganglia-LICENSE-ASL.txt new file mode 100644 index 00000000000..ccb320c7daa --- /dev/null +++ b/solr/licenses/metrics-ganglia-LICENSE-ASL.txt @@ -0,0 +1,203 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2010-2012 Coda Hale and Yammer, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/solr/licenses/metrics-ganglia-NOTICE.txt b/solr/licenses/metrics-ganglia-NOTICE.txt new file mode 100644 index 00000000000..b4c6298472f --- /dev/null +++ b/solr/licenses/metrics-ganglia-NOTICE.txt @@ -0,0 +1,12 @@ +Metrics +Copyright 2010-2013 Coda Hale and Yammer, Inc. + +This product includes software developed by Coda Hale and Yammer, Inc. + +This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64, +LongAdder), which was released with the following comments: + + Written by Doug Lea with assistance from members of JCP JSR-166 + Expert Group and released to the public domain, as explained at + http://creativecommons.org/publicdomain/zero/1.0/ + diff --git a/solr/licenses/metrics-graphite-3.1.2.jar.sha1 b/solr/licenses/metrics-graphite-3.1.2.jar.sha1 new file mode 100644 index 00000000000..34f01615d95 --- /dev/null +++ b/solr/licenses/metrics-graphite-3.1.2.jar.sha1 @@ -0,0 +1 @@ +15a68399652c6123fe6e4c82ac4f0749e2eb6583 diff --git a/solr/licenses/metrics-graphite-LICENSE-ASL.txt b/solr/licenses/metrics-graphite-LICENSE-ASL.txt new file mode 100644 index 00000000000..ccb320c7daa --- /dev/null +++ b/solr/licenses/metrics-graphite-LICENSE-ASL.txt @@ -0,0 +1,203 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2010-2012 Coda Hale and Yammer, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/solr/licenses/metrics-graphite-NOTICE.txt b/solr/licenses/metrics-graphite-NOTICE.txt new file mode 100644 index 00000000000..b4c6298472f --- /dev/null +++ b/solr/licenses/metrics-graphite-NOTICE.txt @@ -0,0 +1,12 @@ +Metrics +Copyright 2010-2013 Coda Hale and Yammer, Inc. + +This product includes software developed by Coda Hale and Yammer, Inc. + +This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64, +LongAdder), which was released with the following comments: + + Written by Doug Lea with assistance from members of JCP JSR-166 + Expert Group and released to the public domain, as explained at + http://creativecommons.org/publicdomain/zero/1.0/ + diff --git a/solr/server/ivy.xml b/solr/server/ivy.xml index 8dc645a2917..c9b3a730143 100644 --- a/solr/server/ivy.xml +++ b/solr/server/ivy.xml @@ -36,6 +36,9 @@ + + + From 9bd152804da4c7704b7d3ddd311485b524ec0166 Mon Sep 17 00:00:00 2001 From: David Smiley Date: Tue, 27 Dec 2016 15:34:33 -0500 Subject: [PATCH 34/83] SOLR-9897: re-indent UnifiedSolrHighlighter.java --- .../highlight/UnifiedSolrHighlighter.java | 478 +++++++++--------- 1 file changed, 239 insertions(+), 239 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java index 6f81241ff58..c38546ee116 100644 --- a/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java +++ b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java @@ -110,256 +110,256 @@ import org.apache.solr.util.plugin.PluginInfoInitialized; */ public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized { - protected static final String SNIPPET_SEPARATOR = "\u0000"; - private static final String[] ZERO_LEN_STR_ARRAY = new String[0]; + protected static final String SNIPPET_SEPARATOR = "\u0000"; + private static final String[] ZERO_LEN_STR_ARRAY = new String[0]; - @Override - public void init(PluginInfo info) { + @Override + public void init(PluginInfo info) { + } + + @Override + public NamedList doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { + final SolrParams params = req.getParams(); + + // if highlighting isn't enabled, then why call doHighlighting? + if (!isHighlightingEnabled(params)) + return null; + + int[] docIDs = toDocIDs(docs); + + // fetch the unique keys + String[] keys = getUniqueKeys(req.getSearcher(), docIDs); + + // query-time parameters + String[] fieldNames = getHighlightFields(query, req, defaultFields); + + int maxPassages[] = new int[fieldNames.length]; + for (int i = 0; i < fieldNames.length; i++) { + maxPassages[i] = params.getFieldInt(fieldNames[i], HighlightParams.SNIPPETS, 1); } - @Override - public NamedList doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { - final SolrParams params = req.getParams(); + UnifiedHighlighter highlighter = getHighlighter(req); + Map snippets = highlighter.highlightFields(fieldNames, query, docIDs, maxPassages); + return encodeSnippets(keys, fieldNames, snippets); + } - // if highlighting isn't enabled, then why call doHighlighting? - if (!isHighlightingEnabled(params)) - return null; + /** + * Creates an instance of the Lucene {@link UnifiedHighlighter}. Provided for subclass extension so that + * a subclass can return a subclass of {@link SolrExtendedUnifiedHighlighter}. + */ + protected UnifiedHighlighter getHighlighter(SolrQueryRequest req) { + return new SolrExtendedUnifiedHighlighter(req); + } - int[] docIDs = toDocIDs(docs); - - // fetch the unique keys - String[] keys = getUniqueKeys(req.getSearcher(), docIDs); - - // query-time parameters - String[] fieldNames = getHighlightFields(query, req, defaultFields); - - int maxPassages[] = new int[fieldNames.length]; - for (int i = 0; i < fieldNames.length; i++) { - maxPassages[i] = params.getFieldInt(fieldNames[i], HighlightParams.SNIPPETS, 1); - } - - UnifiedHighlighter highlighter = getHighlighter(req); - Map snippets = highlighter.highlightFields(fieldNames, query, docIDs, maxPassages); - return encodeSnippets(keys, fieldNames, snippets); - } - - /** - * Creates an instance of the Lucene {@link UnifiedHighlighter}. Provided for subclass extension so that - * a subclass can return a subclass of {@link SolrExtendedUnifiedHighlighter}. - */ - protected UnifiedHighlighter getHighlighter(SolrQueryRequest req) { - return new SolrExtendedUnifiedHighlighter(req); - } - - /** - * Encodes the resulting snippets into a namedlist - * - * @param keys the document unique keys - * @param fieldNames field names to highlight in the order - * @param snippets map from field name to snippet array for the docs - * @return encoded namedlist of summaries - */ - protected NamedList encodeSnippets(String[] keys, String[] fieldNames, Map snippets) { - NamedList list = new SimpleOrderedMap<>(); - for (int i = 0; i < keys.length; i++) { - NamedList summary = new SimpleOrderedMap<>(); - for (String field : fieldNames) { - String snippet = snippets.get(field)[i]; - if (snippet == null) { - //TODO reuse logic of DefaultSolrHighlighter.alternateField - summary.add(field, ZERO_LEN_STR_ARRAY); - } else { - // we used a special snippet separator char and we can now split on it. - summary.add(field, snippet.split(SNIPPET_SEPARATOR)); - } - } - list.add(keys[i], summary); - } - return list; - } - - /** - * Converts solr's DocList to the int[] docIDs - */ - protected int[] toDocIDs(DocList docs) { - int[] docIDs = new int[docs.size()]; - DocIterator iterator = docs.iterator(); - for (int i = 0; i < docIDs.length; i++) { - if (!iterator.hasNext()) { - throw new AssertionError(); - } - docIDs[i] = iterator.nextDoc(); - } - if (iterator.hasNext()) { - throw new AssertionError(); - } - return docIDs; - } - - /** - * Retrieves the unique keys for the topdocs to key the results - */ - protected String[] getUniqueKeys(SolrIndexSearcher searcher, int[] docIDs) throws IOException { - IndexSchema schema = searcher.getSchema(); - SchemaField keyField = schema.getUniqueKeyField(); - if (keyField != null) { - Set selector = Collections.singleton(keyField.getName()); - String[] uniqueKeys = new String[docIDs.length]; - for (int i = 0; i < docIDs.length; i++) { - int docid = docIDs[i]; - Document doc = searcher.doc(docid, selector); - String id = schema.printableUniqueKey(doc); - uniqueKeys[i] = id; - } - return uniqueKeys; + /** + * Encodes the resulting snippets into a namedlist + * + * @param keys the document unique keys + * @param fieldNames field names to highlight in the order + * @param snippets map from field name to snippet array for the docs + * @return encoded namedlist of summaries + */ + protected NamedList encodeSnippets(String[] keys, String[] fieldNames, Map snippets) { + NamedList list = new SimpleOrderedMap<>(); + for (int i = 0; i < keys.length; i++) { + NamedList summary = new SimpleOrderedMap<>(); + for (String field : fieldNames) { + String snippet = snippets.get(field)[i]; + if (snippet == null) { + //TODO reuse logic of DefaultSolrHighlighter.alternateField + summary.add(field, ZERO_LEN_STR_ARRAY); } else { - return new String[docIDs.length]; + // we used a special snippet separator char and we can now split on it. + summary.add(field, snippet.split(SNIPPET_SEPARATOR)); } + } + list.add(keys[i], summary); + } + return list; + } + + /** + * Converts solr's DocList to the int[] docIDs + */ + protected int[] toDocIDs(DocList docs) { + int[] docIDs = new int[docs.size()]; + DocIterator iterator = docs.iterator(); + for (int i = 0; i < docIDs.length; i++) { + if (!iterator.hasNext()) { + throw new AssertionError(); + } + docIDs[i] = iterator.nextDoc(); + } + if (iterator.hasNext()) { + throw new AssertionError(); + } + return docIDs; + } + + /** + * Retrieves the unique keys for the topdocs to key the results + */ + protected String[] getUniqueKeys(SolrIndexSearcher searcher, int[] docIDs) throws IOException { + IndexSchema schema = searcher.getSchema(); + SchemaField keyField = schema.getUniqueKeyField(); + if (keyField != null) { + Set selector = Collections.singleton(keyField.getName()); + String[] uniqueKeys = new String[docIDs.length]; + for (int i = 0; i < docIDs.length; i++) { + int docid = docIDs[i]; + Document doc = searcher.doc(docid, selector); + String id = schema.printableUniqueKey(doc); + uniqueKeys[i] = id; + } + return uniqueKeys; + } else { + return new String[docIDs.length]; + } + } + + /** + * From {@link #getHighlighter(org.apache.solr.request.SolrQueryRequest)}. + */ + protected static class SolrExtendedUnifiedHighlighter extends UnifiedHighlighter { + protected final SolrParams params; + protected final IndexSchema schema; + + protected final RTimerTree loadFieldValuesTimer; + + public SolrExtendedUnifiedHighlighter(SolrQueryRequest req) { + super(req.getSearcher(), req.getSchema().getIndexAnalyzer()); + this.params = req.getParams(); + this.schema = req.getSchema(); + this.setMaxLength( + params.getInt(HighlightParams.MAX_CHARS, UnifiedHighlighter.DEFAULT_MAX_LENGTH)); + this.setCacheFieldValCharsThreshold( + params.getInt(HighlightParams.CACHE_FIELD_VAL_CHARS_THRESHOLD, DEFAULT_CACHE_CHARS_THRESHOLD)); + + // SolrRequestInfo is a thread-local singleton providing access to the ResponseBuilder to code that + // otherwise can't get it in a nicer way. + SolrQueryRequest request = SolrRequestInfo.getRequestInfo().getReq(); + final RTimerTree timerTree; + if (request.getRequestTimer() != null) { //It may be null if not used in a search context. + timerTree = request.getRequestTimer(); + } else { + timerTree = new RTimerTree(); // since null checks are annoying + } + loadFieldValuesTimer = timerTree.sub("loadFieldValues"); // we assume a new timer, state of STARTED + loadFieldValuesTimer.pause(); // state of PAUSED now with about zero time. Will fail if state isn't STARTED. + } + + @Override + protected OffsetSource getOffsetSource(String field) { + String sourceStr = params.getFieldParam(field, HighlightParams.OFFSET_SOURCE); + if (sourceStr != null) { + return OffsetSource.valueOf(sourceStr.toUpperCase(Locale.ROOT)); + } else { + return super.getOffsetSource(field); + } + } + + @Override + public int getMaxNoHighlightPassages(String field) { + boolean defaultSummary = params.getFieldBool(field, HighlightParams.DEFAULT_SUMMARY, false); + if (defaultSummary) { + return -1;// signifies return first hl.snippets passages worth of the content + } else { + return 0;// will return null + } + } + + @Override + protected PassageFormatter getFormatter(String fieldName) { + String preTag = params.getFieldParam(fieldName, HighlightParams.TAG_PRE, + params.getFieldParam(fieldName, HighlightParams.SIMPLE_PRE, "") + ); + + String postTag = params.getFieldParam(fieldName, HighlightParams.TAG_POST, + params.getFieldParam(fieldName, HighlightParams.SIMPLE_POST, "") + ); + String ellipsis = params.getFieldParam(fieldName, HighlightParams.TAG_ELLIPSIS, SNIPPET_SEPARATOR); + String encoder = params.getFieldParam(fieldName, HighlightParams.ENCODER, "simple"); + return new DefaultPassageFormatter(preTag, postTag, ellipsis, "html".equals(encoder)); + } + + @Override + protected PassageScorer getScorer(String fieldName) { + float k1 = params.getFieldFloat(fieldName, HighlightParams.SCORE_K1, 1.2f); + float b = params.getFieldFloat(fieldName, HighlightParams.SCORE_B, 0.75f); + float pivot = params.getFieldFloat(fieldName, HighlightParams.SCORE_PIVOT, 87f); + return new PassageScorer(k1, b, pivot); + } + + @Override + protected BreakIterator getBreakIterator(String field) { + String language = params.getFieldParam(field, HighlightParams.BS_LANGUAGE); + String country = params.getFieldParam(field, HighlightParams.BS_COUNTRY); + String variant = params.getFieldParam(field, HighlightParams.BS_VARIANT); + Locale locale = parseLocale(language, country, variant); + String type = params.getFieldParam(field, HighlightParams.BS_TYPE); + return parseBreakIterator(type, locale); } /** - * From {@link #getHighlighter(org.apache.solr.request.SolrQueryRequest)}. + * parse a break iterator type for the specified locale */ - protected static class SolrExtendedUnifiedHighlighter extends UnifiedHighlighter { - protected final SolrParams params; - protected final IndexSchema schema; - - protected final RTimerTree loadFieldValuesTimer; - - public SolrExtendedUnifiedHighlighter(SolrQueryRequest req) { - super(req.getSearcher(), req.getSchema().getIndexAnalyzer()); - this.params = req.getParams(); - this.schema = req.getSchema(); - this.setMaxLength( - params.getInt(HighlightParams.MAX_CHARS, UnifiedHighlighter.DEFAULT_MAX_LENGTH)); - this.setCacheFieldValCharsThreshold( - params.getInt(HighlightParams.CACHE_FIELD_VAL_CHARS_THRESHOLD, DEFAULT_CACHE_CHARS_THRESHOLD)); - - // SolrRequestInfo is a thread-local singleton providing access to the ResponseBuilder to code that - // otherwise can't get it in a nicer way. - SolrQueryRequest request = SolrRequestInfo.getRequestInfo().getReq(); - final RTimerTree timerTree; - if (request.getRequestTimer() != null) { //It may be null if not used in a search context. - timerTree = request.getRequestTimer(); - } else { - timerTree = new RTimerTree(); // since null checks are annoying - } - loadFieldValuesTimer = timerTree.sub("loadFieldValues"); // we assume a new timer, state of STARTED - loadFieldValuesTimer.pause(); // state of PAUSED now with about zero time. Will fail if state isn't STARTED. - } - - @Override - protected OffsetSource getOffsetSource(String field) { - String sourceStr = params.getFieldParam(field, HighlightParams.OFFSET_SOURCE); - if (sourceStr != null) { - return OffsetSource.valueOf(sourceStr.toUpperCase(Locale.ROOT)); - } else { - return super.getOffsetSource(field); - } - } - - @Override - public int getMaxNoHighlightPassages(String field) { - boolean defaultSummary = params.getFieldBool(field, HighlightParams.DEFAULT_SUMMARY, false); - if (defaultSummary) { - return -1;// signifies return first hl.snippets passages worth of the content - } else { - return 0;// will return null - } - } - - @Override - protected PassageFormatter getFormatter(String fieldName) { - String preTag = params.getFieldParam(fieldName, HighlightParams.TAG_PRE, - params.getFieldParam(fieldName, HighlightParams.SIMPLE_PRE, "") - ); - - String postTag = params.getFieldParam(fieldName, HighlightParams.TAG_POST, - params.getFieldParam(fieldName, HighlightParams.SIMPLE_POST, "") - ); - String ellipsis = params.getFieldParam(fieldName, HighlightParams.TAG_ELLIPSIS, SNIPPET_SEPARATOR); - String encoder = params.getFieldParam(fieldName, HighlightParams.ENCODER, "simple"); - return new DefaultPassageFormatter(preTag, postTag, ellipsis, "html".equals(encoder)); - } - - @Override - protected PassageScorer getScorer(String fieldName) { - float k1 = params.getFieldFloat(fieldName, HighlightParams.SCORE_K1, 1.2f); - float b = params.getFieldFloat(fieldName, HighlightParams.SCORE_B, 0.75f); - float pivot = params.getFieldFloat(fieldName, HighlightParams.SCORE_PIVOT, 87f); - return new PassageScorer(k1, b, pivot); - } - - @Override - protected BreakIterator getBreakIterator(String field) { - String language = params.getFieldParam(field, HighlightParams.BS_LANGUAGE); - String country = params.getFieldParam(field, HighlightParams.BS_COUNTRY); - String variant = params.getFieldParam(field, HighlightParams.BS_VARIANT); - Locale locale = parseLocale(language, country, variant); - String type = params.getFieldParam(field, HighlightParams.BS_TYPE); - return parseBreakIterator(type, locale); - } - - /** - * parse a break iterator type for the specified locale - */ - protected BreakIterator parseBreakIterator(String type, Locale locale) { - if (type == null || "SENTENCE".equals(type)) { - return BreakIterator.getSentenceInstance(locale); - } else if ("LINE".equals(type)) { - return BreakIterator.getLineInstance(locale); - } else if ("WORD".equals(type)) { - return BreakIterator.getWordInstance(locale); - } else if ("CHARACTER".equals(type)) { - return BreakIterator.getCharacterInstance(locale); - } else if ("WHOLE".equals(type)) { - return new WholeBreakIterator(); - } else { - throw new IllegalArgumentException("Unknown " + HighlightParams.BS_TYPE + ": " + type); - } - } - - /** - * parse a locale from a language+country+variant spec - */ - protected Locale parseLocale(String language, String country, String variant) { - if (language == null && country == null && variant == null) { - return Locale.ROOT; - } else if (language == null) { - throw new IllegalArgumentException("language is required if country or variant is specified"); - } else if (country == null && variant != null) { - throw new IllegalArgumentException("To specify variant, country is required"); - } else if (country != null && variant != null) { - return new Locale(language, country, variant); - } else if (country != null) { - return new Locale(language, country); - } else { - return new Locale(language); - } - } - - @Override - protected List loadFieldValues(String[] fields, DocIdSetIterator docIter, int - cacheCharsThreshold) throws IOException { - // Time loading field values. It can be an expensive part of highlighting. - loadFieldValuesTimer.resume(); - try { - return super.loadFieldValues(fields, docIter, cacheCharsThreshold); - } finally { - loadFieldValuesTimer.pause(); // note: doesn't need to be "stopped"; pause is fine. - } - } - - @Override - protected boolean shouldHandleMultiTermQuery(String field) { - return params.getFieldBool(field, HighlightParams.HIGHLIGHT_MULTI_TERM, true); - } - - @Override - protected boolean shouldHighlightPhrasesStrictly(String field) { - return params.getFieldBool(field, HighlightParams.USE_PHRASE_HIGHLIGHTER, true); - } - + protected BreakIterator parseBreakIterator(String type, Locale locale) { + if (type == null || "SENTENCE".equals(type)) { + return BreakIterator.getSentenceInstance(locale); + } else if ("LINE".equals(type)) { + return BreakIterator.getLineInstance(locale); + } else if ("WORD".equals(type)) { + return BreakIterator.getWordInstance(locale); + } else if ("CHARACTER".equals(type)) { + return BreakIterator.getCharacterInstance(locale); + } else if ("WHOLE".equals(type)) { + return new WholeBreakIterator(); + } else { + throw new IllegalArgumentException("Unknown " + HighlightParams.BS_TYPE + ": " + type); + } } + /** + * parse a locale from a language+country+variant spec + */ + protected Locale parseLocale(String language, String country, String variant) { + if (language == null && country == null && variant == null) { + return Locale.ROOT; + } else if (language == null) { + throw new IllegalArgumentException("language is required if country or variant is specified"); + } else if (country == null && variant != null) { + throw new IllegalArgumentException("To specify variant, country is required"); + } else if (country != null && variant != null) { + return new Locale(language, country, variant); + } else if (country != null) { + return new Locale(language, country); + } else { + return new Locale(language); + } + } + + @Override + protected List loadFieldValues(String[] fields, DocIdSetIterator docIter, int + cacheCharsThreshold) throws IOException { + // Time loading field values. It can be an expensive part of highlighting. + loadFieldValuesTimer.resume(); + try { + return super.loadFieldValues(fields, docIter, cacheCharsThreshold); + } finally { + loadFieldValuesTimer.pause(); // note: doesn't need to be "stopped"; pause is fine. + } + } + + @Override + protected boolean shouldHandleMultiTermQuery(String field) { + return params.getFieldBool(field, HighlightParams.HIGHLIGHT_MULTI_TERM, true); + } + + @Override + protected boolean shouldHighlightPhrasesStrictly(String field) { + return params.getFieldBool(field, HighlightParams.USE_PHRASE_HIGHLIGHTER, true); + } + + } + } \ No newline at end of file From 283b329bbbd8b6a8f9c75ec4e79ca4034c910e88 Mon Sep 17 00:00:00 2001 From: Karl Wright Date: Tue, 27 Dec 2016 19:41:55 -0500 Subject: [PATCH 35/83] LUCENE-7511: Introduce Vector.MINIMUM_ANGULAR_RESOLUTION. --- .../lucene/spatial3d/geom/GeoBBoxFactory.java | 34 +++++++++---------- .../spatial3d/geom/GeoCircleFactory.java | 2 +- .../spatial3d/geom/GeoPolygonFactory.java | 6 ++-- .../apache/lucene/spatial3d/geom/Vector.java | 4 +++ 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoBBoxFactory.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoBBoxFactory.java index 3d0b8cb7a40..d0427bc91df 100755 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoBBoxFactory.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoBBoxFactory.java @@ -45,18 +45,18 @@ public class GeoBBoxFactory { leftLon = -Math.PI; if (rightLon > Math.PI) rightLon = Math.PI; - if ((Math.abs(leftLon + Math.PI) < Vector.MINIMUM_RESOLUTION && Math.abs(rightLon - Math.PI) < Vector.MINIMUM_RESOLUTION) || - (Math.abs(rightLon + Math.PI) < Vector.MINIMUM_RESOLUTION && Math.abs(leftLon - Math.PI) < Vector.MINIMUM_RESOLUTION)) { - if (Math.abs(topLat - Math.PI * 0.5) < Vector.MINIMUM_RESOLUTION && Math.abs(bottomLat + Math.PI * 0.5) < Vector.MINIMUM_RESOLUTION) + if ((Math.abs(leftLon + Math.PI) < Vector.MINIMUM_ANGULAR_RESOLUTION && Math.abs(rightLon - Math.PI) < Vector.MINIMUM_ANGULAR_RESOLUTION) || + (Math.abs(rightLon + Math.PI) < Vector.MINIMUM_ANGULAR_RESOLUTION && Math.abs(leftLon - Math.PI) < Vector.MINIMUM_ANGULAR_RESOLUTION)) { + if (Math.abs(topLat - Math.PI * 0.5) < Vector.MINIMUM_ANGULAR_RESOLUTION && Math.abs(bottomLat + Math.PI * 0.5) < Vector.MINIMUM_ANGULAR_RESOLUTION) return new GeoWorld(planetModel); - if (Math.abs(topLat - bottomLat) < Vector.MINIMUM_RESOLUTION) { - if (Math.abs(topLat - Math.PI * 0.5) < Vector.MINIMUM_RESOLUTION || Math.abs(topLat + Math.PI * 0.5) < Vector.MINIMUM_RESOLUTION) + if (Math.abs(topLat - bottomLat) < Vector.MINIMUM_ANGULAR_RESOLUTION) { + if (Math.abs(topLat - Math.PI * 0.5) < Vector.MINIMUM_ANGULAR_RESOLUTION || Math.abs(topLat + Math.PI * 0.5) < Vector.MINIMUM_ANGULAR_RESOLUTION) return new GeoDegeneratePoint(planetModel, topLat, 0.0); return new GeoDegenerateLatitudeZone(planetModel, topLat); } - if (Math.abs(topLat - Math.PI * 0.5) < Vector.MINIMUM_RESOLUTION) + if (Math.abs(topLat - Math.PI * 0.5) < Vector.MINIMUM_ANGULAR_RESOLUTION) return new GeoNorthLatitudeZone(planetModel, bottomLat); - else if (Math.abs(bottomLat + Math.PI * 0.5) < Vector.MINIMUM_RESOLUTION) + else if (Math.abs(bottomLat + Math.PI * 0.5) < Vector.MINIMUM_ANGULAR_RESOLUTION) return new GeoSouthLatitudeZone(planetModel, topLat); return new GeoLatitudeZone(planetModel, topLat, bottomLat); } @@ -65,7 +65,7 @@ public class GeoBBoxFactory { if (extent < 0.0) extent += Math.PI * 2.0; if (topLat == Math.PI * 0.5 && bottomLat == -Math.PI * 0.5) { - if (Math.abs(leftLon - rightLon) < Vector.MINIMUM_RESOLUTION) + if (Math.abs(leftLon - rightLon) < Vector.MINIMUM_ANGULAR_RESOLUTION) return new GeoDegenerateLongitudeSlice(planetModel, leftLon); if (extent >= Math.PI) @@ -74,35 +74,35 @@ public class GeoBBoxFactory { return new GeoLongitudeSlice(planetModel, leftLon, rightLon); } //System.err.println(" not longitude slice"); - if (Math.abs(leftLon - rightLon) < Vector.MINIMUM_RESOLUTION) { - if (Math.abs(topLat - bottomLat) < Vector.MINIMUM_RESOLUTION) + if (Math.abs(leftLon - rightLon) < Vector.MINIMUM_ANGULAR_RESOLUTION) { + if (Math.abs(topLat - bottomLat) < Vector.MINIMUM_ANGULAR_RESOLUTION) return new GeoDegeneratePoint(planetModel, topLat, leftLon); return new GeoDegenerateVerticalLine(planetModel, topLat, bottomLat, leftLon); } //System.err.println(" not vertical line"); if (extent >= Math.PI) { - if (Math.abs(topLat - bottomLat) < Vector.MINIMUM_RESOLUTION) { + if (Math.abs(topLat - bottomLat) < Vector.MINIMUM_ANGULAR_RESOLUTION) { //System.err.println(" wide degenerate line"); return new GeoWideDegenerateHorizontalLine(planetModel, topLat, leftLon, rightLon); } - if (Math.abs(topLat - Math.PI * 0.5) < Vector.MINIMUM_RESOLUTION) { + if (Math.abs(topLat - Math.PI * 0.5) < Vector.MINIMUM_ANGULAR_RESOLUTION) { return new GeoWideNorthRectangle(planetModel, bottomLat, leftLon, rightLon); - } else if (Math.abs(bottomLat + Math.PI * 0.5) < Vector.MINIMUM_RESOLUTION) { + } else if (Math.abs(bottomLat + Math.PI * 0.5) < Vector.MINIMUM_ANGULAR_RESOLUTION) { return new GeoWideSouthRectangle(planetModel, topLat, leftLon, rightLon); } //System.err.println(" wide rect"); return new GeoWideRectangle(planetModel, topLat, bottomLat, leftLon, rightLon); } - if (Math.abs(topLat - bottomLat) < Vector.MINIMUM_RESOLUTION) { - if (Math.abs(topLat - Math.PI * 0.5) < Vector.MINIMUM_RESOLUTION || Math.abs(topLat + Math.PI * 0.5) < Vector.MINIMUM_RESOLUTION) { + if (Math.abs(topLat - bottomLat) < Vector.MINIMUM_ANGULAR_RESOLUTION) { + if (Math.abs(topLat - Math.PI * 0.5) < Vector.MINIMUM_ANGULAR_RESOLUTION || Math.abs(topLat + Math.PI * 0.5) < Vector.MINIMUM_ANGULAR_RESOLUTION) { return new GeoDegeneratePoint(planetModel, topLat, 0.0); } //System.err.println(" horizontal line"); return new GeoDegenerateHorizontalLine(planetModel, topLat, leftLon, rightLon); } - if (Math.abs(topLat - Math.PI * 0.5) < Vector.MINIMUM_RESOLUTION) { + if (Math.abs(topLat - Math.PI * 0.5) < Vector.MINIMUM_ANGULAR_RESOLUTION) { return new GeoNorthRectangle(planetModel, bottomLat, leftLon, rightLon); - } else if (Math.abs(bottomLat + Math.PI * 0.5) < Vector.MINIMUM_RESOLUTION) { + } else if (Math.abs(bottomLat + Math.PI * 0.5) < Vector.MINIMUM_ANGULAR_RESOLUTION) { return new GeoSouthRectangle(planetModel, topLat, leftLon, rightLon); } //System.err.println(" rectangle"); diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoCircleFactory.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoCircleFactory.java index ee75179eb99..292790f4a0a 100644 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoCircleFactory.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoCircleFactory.java @@ -34,7 +34,7 @@ public class GeoCircleFactory { * @return a GeoCircle corresponding to what was specified. */ public static GeoCircle makeGeoCircle(final PlanetModel planetModel, final double latitude, final double longitude, final double radius) { - if (radius < Vector.MINIMUM_RESOLUTION) { + if (radius < Vector.MINIMUM_ANGULAR_RESOLUTION) { return new GeoDegeneratePoint(planetModel, latitude, longitude); } return new GeoStandardCircle(planetModel, latitude, longitude, radius); diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoPolygonFactory.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoPolygonFactory.java index f0e4bcd84af..97bc230bb5b 100755 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoPolygonFactory.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoPolygonFactory.java @@ -601,7 +601,7 @@ public class GeoPolygonFactory { if (angleDelta > Math.PI) { angleDelta -= Math.PI * 2.0; } - if (Math.abs(angleDelta - Math.PI) < Vector.MINIMUM_RESOLUTION) { + if (Math.abs(angleDelta - Math.PI) < Vector.MINIMUM_ANGULAR_RESOLUTION) { return null; } //System.out.println(" angle delta = "+angleDelta); @@ -624,7 +624,7 @@ public class GeoPolygonFactory { if (angleDelta > Math.PI) { angleDelta -= Math.PI * 2.0; } - if (Math.abs(angleDelta - Math.PI) < Vector.MINIMUM_RESOLUTION) { + if (Math.abs(angleDelta - Math.PI) < Vector.MINIMUM_ANGULAR_RESOLUTION) { return null; } //System.out.println(" angle delta = "+angleDelta); @@ -634,7 +634,7 @@ public class GeoPolygonFactory { // Clockwise == inside == negative //System.out.println("Arcdistance = "+arcDistance); - if (Math.abs(arcDistance) < Vector.MINIMUM_RESOLUTION) { + if (Math.abs(arcDistance) < Vector.MINIMUM_ANGULAR_RESOLUTION) { // No idea what direction, so try another pole. return null; } diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/Vector.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/Vector.java index 7ebf4535789..852cac49df1 100755 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/Vector.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/Vector.java @@ -28,6 +28,10 @@ public class Vector { * less than this. */ public static final double MINIMUM_RESOLUTION = 1.0e-12; + /** + * Angular version of minimum resolution. + */ + public static final double MINIMUM_ANGULAR_RESOLUTION = Math.PI * MINIMUM_RESOLUTION; /** * For squared quantities, the bound is squared too. */ From c2292faaf1f4993bf1cec666f4286ac71f786506 Mon Sep 17 00:00:00 2001 From: Shalin Shekhar Mangar Date: Wed, 28 Dec 2016 19:00:13 +0530 Subject: [PATCH 36/83] SOLR-9877: Remove assertion because many tests use UpdateShardHandler without metrics --- .../apache/solr/util/stats/InstrumentedHttpRequestExecutor.java | 1 - 1 file changed, 1 deletion(-) diff --git a/solr/core/src/java/org/apache/solr/util/stats/InstrumentedHttpRequestExecutor.java b/solr/core/src/java/org/apache/solr/util/stats/InstrumentedHttpRequestExecutor.java index 946a822d15a..ad76d734361 100644 --- a/solr/core/src/java/org/apache/solr/util/stats/InstrumentedHttpRequestExecutor.java +++ b/solr/core/src/java/org/apache/solr/util/stats/InstrumentedHttpRequestExecutor.java @@ -53,7 +53,6 @@ public class InstrumentedHttpRequestExecutor extends HttpRequestExecutor impleme @Override public HttpResponse execute(HttpRequest request, HttpClientConnection conn, HttpContext context) throws IOException, HttpException { - assert metricsRegistry != null; final Timer.Context timerContext = timer(request).time(); try { return super.execute(request, conn, context); From e4ef4239f1b23afb116868e8528f1cd947287bd9 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Wed, 28 Dec 2016 10:41:17 +0000 Subject: [PATCH 37/83] SOLR-9787, SOLR-9442: Replace json.nl=arrnvp with json.nl=arrntv (array of Name Type Value) style in JSONResponseWriter --- solr/CHANGES.txt | 4 +- .../solr/response/JSONResponseWriter.java | 86 ++++++++++--------- .../apache/solr/response/JSONWriterTest.java | 20 +++-- 3 files changed, 60 insertions(+), 50 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index f587109a48a..7a708a6b95c 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -130,8 +130,8 @@ New Features Example: { type:terms, field:category, filter:"user:yonik" } (yonik) -* SOLR-9442: Adds Array of NamedValuePair (json.nl=arrnvp) style to JSONResponseWriter. - (Jonny Marks, Christine Poerschke) +* SOLR-9442, SOLR-9787: Adds Array of Name Type Value (json.nl=arrntv) style to JSONResponseWriter. + (Jonny Marks, Christine Poerschke, hossman) * SOLR-9481: Authentication and Authorization plugins now work in standalone mode if security.json is placed in SOLR_HOME on every node. Editing config through API is supported but affects only that one node. diff --git a/solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java b/solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java index ae1ea4703ad..513df4eed53 100644 --- a/solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java +++ b/solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java @@ -59,9 +59,9 @@ public class JSONResponseWriter implements QueryResponseWriter { final String namedListStyle = params.get(JSONWriter.JSON_NL_STYLE, JSONWriter.JSON_NL_FLAT).intern(); final JSONWriter w; - if (namedListStyle.equals(JSONWriter.JSON_NL_ARROFNVP)) { - w = new ArrayOfNamedValuePairJSONWriter( - writer, req, rsp, wrapperFunction, namedListStyle); + if (namedListStyle.equals(JSONWriter.JSON_NL_ARROFNTV)) { + w = new ArrayOfNameTypeValueJSONWriter( + writer, req, rsp, wrapperFunction, namedListStyle, true); } else { w = new JSONWriter( writer, req, rsp, wrapperFunction, namedListStyle); @@ -96,7 +96,7 @@ class JSONWriter extends TextResponseWriter { static final String JSON_NL_FLAT="flat"; static final String JSON_NL_ARROFARR="arrarr"; static final String JSON_NL_ARROFMAP="arrmap"; - static final String JSON_NL_ARROFNVP="arrnvp"; + static final String JSON_NL_ARROFNTV="arrntv"; static final String JSON_WRAPPER_FUNCTION="json.wrf"; @@ -331,9 +331,9 @@ class JSONWriter extends TextResponseWriter { writeNamedListAsArrArr(name,val); } else if (namedListStyle==JSON_NL_ARROFMAP) { writeNamedListAsArrMap(name,val); - } else if (namedListStyle==JSON_NL_ARROFNVP) { + } else if (namedListStyle==JSON_NL_ARROFNTV) { throw new UnsupportedOperationException(namedListStyle - + " namedListStyle must only be used with "+ArrayOfNamedValuePairJSONWriter.class.getSimpleName()); + + " namedListStyle must only be used with "+ArrayOfNameTypeValueJSONWriter.class.getSimpleName()); } } @@ -675,20 +675,25 @@ class JSONWriter extends TextResponseWriter { } /** - * Writes NamedLists directly as an array of NamedValuePair JSON objects... - * NamedList("a"=1,"b"=2,null=3,null=null) => [{"name":"a","int":1},{"name":"b","int":2},{"int":3},{"null":null}] - * NamedList("a"=1,"bar"="foo",null=3.4f) => [{"name":"a","int":1},{"name":"bar","str":"foo"},{"float":3.4}] + * Writes NamedLists directly as an array of NameTypeValue JSON objects... + * NamedList("a"=1,"b"=null,null=3,null=null) => + * [{"name":"a","type":"int","value":1}, + * {"name":"b","type":"null","value":null}, + * {"name":null,"type":"int","value":3}, + * {"name":null,"type":"null","value":null}] + * NamedList("a"=1,"bar"="foo",null=3.4f) => + * [{"name":"a","type":"int","value":1}, + * {"name":"bar","type":"str","value":"foo"}, + * {"name":null,"type":"float","value":3.4}] */ -class ArrayOfNamedValuePairJSONWriter extends JSONWriter { - private boolean writeTypeAsKey = false; +class ArrayOfNameTypeValueJSONWriter extends JSONWriter { + protected boolean writeTypeAndValueKey = false; + private final boolean writeNullName; - public ArrayOfNamedValuePairJSONWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp, - String wrapperFunction, String namedListStyle) { + public ArrayOfNameTypeValueJSONWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp, + String wrapperFunction, String namedListStyle, boolean writeNullName) { super(writer, req, rsp, wrapperFunction, namedListStyle); - if (namedListStyle != JSON_NL_ARROFNVP) { - throw new UnsupportedOperationException(ArrayOfNamedValuePairJSONWriter.class.getSimpleName()+" must only be used with " - + JSON_NL_ARROFNVP + " style"); - } + this.writeNullName = writeNullName; } @Override @@ -720,24 +725,24 @@ class ArrayOfNamedValuePairJSONWriter extends JSONWriter { /* * JSONWriter's writeNamedListAsArrMap turns NamedList("bar"="foo") into [{"foo":"bar"}] - * but we here wish to turn it into [ {"name":"bar","str":"foo"} ] instead. + * but we here wish to turn it into [ {"name":"bar","type":"str","value":"foo"} ] instead. * * So first we write the {"name":"bar", portion ... */ writeMapOpener(-1); - if (elementName != null) { + if (elementName != null || writeNullName) { writeKey("name", false); writeVal("name", elementName); writeMapSeparator(); } /* - * ... and then we write the "str":"foo"} portion. + * ... and then we write the "type":"str","value":"foo"} portion. */ - writeTypeAsKey = true; + writeTypeAndValueKey = true; writeVal(null, elementVal); // passing null since writeVal doesn't actually use name (and we already wrote elementName above) - if (writeTypeAsKey) { - throw new RuntimeException("writeTypeAsKey should have been reset to false by writeVal('"+elementName+"','"+elementVal+"')"); + if (writeTypeAndValueKey) { + throw new RuntimeException("writeTypeAndValueKey should have been reset to false by writeVal('"+elementName+"','"+elementVal+"')"); } writeMapCloser(); } @@ -746,82 +751,85 @@ class ArrayOfNamedValuePairJSONWriter extends JSONWriter { writeArrayCloser(); } - private void ifNeededWriteTypeAsKey(String type) throws IOException { - if (writeTypeAsKey) { - writeTypeAsKey = false; - writeKey(type, false); + protected void ifNeededWriteTypeAndValueKey(String type) throws IOException { + if (writeTypeAndValueKey) { + writeTypeAndValueKey = false; + writeKey("type", false); + writeVal("type", type); + writeMapSeparator(); + writeKey("value", false); } } @Override public void writeInt(String name, String val) throws IOException { - ifNeededWriteTypeAsKey("int"); + ifNeededWriteTypeAndValueKey("int"); super.writeInt(name, val); } @Override public void writeLong(String name, String val) throws IOException { - ifNeededWriteTypeAsKey("long"); + ifNeededWriteTypeAndValueKey("long"); super.writeLong(name, val); } @Override public void writeFloat(String name, String val) throws IOException { - ifNeededWriteTypeAsKey("float"); + ifNeededWriteTypeAndValueKey("float"); super.writeFloat(name, val); } @Override public void writeDouble(String name, String val) throws IOException { - ifNeededWriteTypeAsKey("double"); + ifNeededWriteTypeAndValueKey("double"); super.writeDouble(name, val); } @Override public void writeBool(String name, String val) throws IOException { - ifNeededWriteTypeAsKey("bool"); + ifNeededWriteTypeAndValueKey("bool"); super.writeBool(name, val); } @Override public void writeDate(String name, String val) throws IOException { - ifNeededWriteTypeAsKey("date"); + ifNeededWriteTypeAndValueKey("date"); super.writeDate(name, val); } @Override public void writeStr(String name, String val, boolean needsEscaping) throws IOException { - ifNeededWriteTypeAsKey("str"); + ifNeededWriteTypeAndValueKey("str"); super.writeStr(name, val, needsEscaping); } @Override public void writeSolrDocument(String name, SolrDocument doc, ReturnFields returnFields, int idx) throws IOException { - ifNeededWriteTypeAsKey("doc"); + ifNeededWriteTypeAndValueKey("doc"); super.writeSolrDocument(name, doc, returnFields, idx); } @Override public void writeStartDocumentList(String name, long start, int size, long numFound, Float maxScore) throws IOException { - ifNeededWriteTypeAsKey("doclist"); + ifNeededWriteTypeAndValueKey("doclist"); super.writeStartDocumentList(name, start, size, numFound, maxScore); } @Override public void writeMap(String name, Map val, boolean excludeOuter, boolean isFirstVal) throws IOException { - ifNeededWriteTypeAsKey("map"); + ifNeededWriteTypeAndValueKey("map"); super.writeMap(name, val, excludeOuter, isFirstVal); } @Override public void writeArray(String name, Iterator val) throws IOException { - ifNeededWriteTypeAsKey("array"); + ifNeededWriteTypeAndValueKey("array"); super.writeArray(name, val); } @Override public void writeNull(String name) throws IOException { - ifNeededWriteTypeAsKey("null"); + ifNeededWriteTypeAndValueKey("null"); super.writeNull(name); } } diff --git a/solr/core/src/test/org/apache/solr/response/JSONWriterTest.java b/solr/core/src/test/org/apache/solr/response/JSONWriterTest.java index a056016d5b8..45ca7087792 100644 --- a/solr/core/src/test/org/apache/solr/response/JSONWriterTest.java +++ b/solr/core/src/test/org/apache/solr/response/JSONWriterTest.java @@ -81,7 +81,7 @@ public class JSONWriterTest extends SolrTestCaseJ4 { JSONWriter.JSON_NL_MAP, JSONWriter.JSON_NL_ARROFARR, JSONWriter.JSON_NL_ARROFMAP, - JSONWriter.JSON_NL_ARROFNVP, + JSONWriter.JSON_NL_ARROFNTV, }; for (final String namedListStyle : namedListStyles) { implTestJSON(namedListStyle); @@ -116,8 +116,10 @@ public class JSONWriterTest extends SolrTestCaseJ4 { expectedNLjson = "\"nl\":[[\"data1\",\"he\\u2028llo\\u2029!\"],[null,42],[null,null]]"; } else if (namedListStyle == JSONWriter.JSON_NL_ARROFMAP) { expectedNLjson = "\"nl\":[{\"data1\":\"he\\u2028llo\\u2029!\"},42,null]"; - } else if (namedListStyle == JSONWriter.JSON_NL_ARROFNVP) { - expectedNLjson = "\"nl\":[{\"name\":\"data1\",\"str\":\"he\\u2028llo\\u2029!\"},{\"int\":42},{\"null\":null}]"; + } else if (namedListStyle == JSONWriter.JSON_NL_ARROFNTV) { + expectedNLjson = "\"nl\":[{\"name\":\"data1\",\"type\":\"str\",\"value\":\"he\\u2028llo\\u2029!\"}," + + "{\"name\":null,\"type\":\"int\",\"value\":42}," + + "{\"name\":null,\"type\":\"null\",\"value\":null}]"; } else { expectedNLjson = null; fail("unexpected namedListStyle="+namedListStyle); @@ -168,7 +170,7 @@ public class JSONWriterTest extends SolrTestCaseJ4 { } @Test - public void testArrnvpWriterOverridesAllWrites() { + public void testArrntvWriterOverridesAllWrites() { // List rather than Set because two not-overridden methods could share name but not signature final List methodsExpectedNotOverriden = new ArrayList<>(14); methodsExpectedNotOverriden.add("writeResponse"); @@ -189,7 +191,7 @@ public class JSONWriterTest extends SolrTestCaseJ4 { methodsExpectedNotOverriden.add("public void org.apache.solr.response.JSONWriter.writeMap(org.apache.solr.common.MapWriter) throws java.io.IOException"); methodsExpectedNotOverriden.add("public void org.apache.solr.response.JSONWriter.writeIterator(org.apache.solr.common.IteratorWriter) throws java.io.IOException"); - final Class subClass = ArrayOfNamedValuePairJSONWriter.class; + final Class subClass = ArrayOfNameTypeValueJSONWriter.class; final Class superClass = subClass.getSuperclass(); for (final Method superClassMethod : superClass.getDeclaredMethods()) { @@ -231,14 +233,14 @@ public class JSONWriterTest extends SolrTestCaseJ4 { } @Test - public void testArrnvpWriterLacksMethodsOfItsOwn() { - final Class subClass = ArrayOfNamedValuePairJSONWriter.class; + public void testArrntvWriterLacksMethodsOfItsOwn() { + final Class subClass = ArrayOfNameTypeValueJSONWriter.class; final Class superClass = subClass.getSuperclass(); // ArrayOfNamedValuePairJSONWriter is a simple sub-class // which should have (almost) no methods of its own for (final Method subClassMethod : subClass.getDeclaredMethods()) { // only own private method of its own - if (subClassMethod.getName().equals("ifNeededWriteTypeAsKey")) continue; + if (subClassMethod.getName().equals("ifNeededWriteTypeAndValueKey")) continue; try { final Method superClassMethod = superClass.getDeclaredMethod( subClassMethod.getName(), @@ -260,7 +262,7 @@ public class JSONWriterTest extends SolrTestCaseJ4 { assertEquals("flat", JSONWriter.JSON_NL_FLAT); assertEquals("arrarr", JSONWriter.JSON_NL_ARROFARR); assertEquals("arrmap", JSONWriter.JSON_NL_ARROFMAP); - assertEquals("arrnvp", JSONWriter.JSON_NL_ARROFNVP); + assertEquals("arrntv", JSONWriter.JSON_NL_ARROFNTV); assertEquals("json.wrf", JSONWriter.JSON_WRAPPER_FUNCTION); } From dc6dcdda8078eb9f100fd6c66b5d488d057b019b Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 28 Dec 2016 20:12:02 +0100 Subject: [PATCH 38/83] LUCENE-7605: Use codec-specific impl of live docs when sorting. --- .../lucene/index/DocumentsWriterPerThread.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index 49d03adc707..48901e50229 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -33,10 +33,10 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.TrackingDirectoryWrapper; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.ByteBlockPool.Allocator; import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; import org.apache.lucene.util.Counter; -import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.IntBlockPool; import org.apache.lucene.util.MutableBits; @@ -496,15 +496,15 @@ class DocumentsWriterPerThread { return filesToDelete; } - private MutableBits sortLiveDocs(MutableBits bits, Sorter.DocMap sortMap) { - assert bits != null && sortMap != null; - FixedBitSet bitSet = new FixedBitSet(bits.length()); - for (int i = 0; i < bits.length(); i++) { - if (bits.get(i)) { - bitSet.set(sortMap.oldToNew(i)); + private MutableBits sortLiveDocs(Bits liveDocs, Sorter.DocMap sortMap) throws IOException { + assert liveDocs != null && sortMap != null; + MutableBits sortedLiveDocs = codec.liveDocsFormat().newLiveDocs(liveDocs.length()); + for (int i = 0; i < liveDocs.length(); i++) { + if (liveDocs.get(i) == false) { + sortedLiveDocs.clear(sortMap.oldToNew(i)); } } - return bitSet; + return sortedLiveDocs; } /** From 96ed221fb6924dd167591004a5eaf70d53f92e4f Mon Sep 17 00:00:00 2001 From: markrmiller Date: Wed, 28 Dec 2016 17:40:03 -0500 Subject: [PATCH 39/83] SOLR-9859: replication.properties cannot be updated after being written and neither eplication.properties or ndex.properties are durable in the face of a crash. --- solr/CHANGES.txt | 3 +++ .../apache/solr/core/DirectoryFactory.java | 15 +++++++++++ .../solr/core/HdfsDirectoryFactory.java | 9 +++++++ .../solr/core/StandardDirectoryFactory.java | 22 ++++++++++++++++ .../org/apache/solr/handler/IndexFetcher.java | 25 +++++++++++-------- .../solr/handler/TestReplicationHandler.java | 7 +++++- 6 files changed, 69 insertions(+), 12 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 7a708a6b95c..c3cac28d665 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -287,6 +287,9 @@ Bug Fixes * SOLR-9699,SOLR-4668: fix exception from core status in parallel with core reload (Mikhail Khludnev) +* SOLR-9859: replication.properties cannot be updated after being written and neither replication.properties or + index.properties are durable in the face of a crash. (Pushkar Raste, Chris de Kok, Cao Manh Dat, Mark Miller) + Other Changes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java index 228260a3a95..396a30dc75d 100644 --- a/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java +++ b/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java @@ -19,6 +19,7 @@ package org.apache.solr.core; import java.io.Closeable; import java.io.File; import java.io.FileFilter; +import java.io.FileNotFoundException; import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.Collection; @@ -184,6 +185,20 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin, fromDir.deleteFile(fileName); } + // sub classes perform an atomic rename if possible, otherwise fall back to delete + rename + // this is important to support for index roll over durability after crashes + public void renameWithOverwrite(Directory dir, String fileName, String toName) throws IOException { + try { + dir.deleteFile(toName); + } catch (FileNotFoundException e) { + + } catch (Exception e) { + log.error("Exception deleting file", e); + } + + dir.rename(fileName, toName); + } + /** * Returns the Directory for a given path, using the specified rawLockType. * Will return the same Directory instance for the same path. diff --git a/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java index b003287aa41..d481e0333e0 100644 --- a/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java +++ b/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java @@ -29,8 +29,10 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Options; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.security.UserGroupInformation; @@ -568,4 +570,11 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory implements Sol } } } + + // perform an atomic rename if possible + public void renameWithOverwrite(Directory dir, String fileName, String toName) throws IOException { + String hdfsDirPath = getPath(dir); + FileContext fileContext = FileContext.getFileContext(getConf()); + fileContext.rename(new Path(hdfsDirPath + "/" + fileName), new Path(hdfsDirPath + "/" + toName), Options.Rename.OVERWRITE); + } } diff --git a/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java index 532655bc30e..d4181370aae 100644 --- a/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java +++ b/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java @@ -18,6 +18,11 @@ package org.apache.solr.core; import java.io.File; import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.nio.file.AtomicMoveNotSupportedException; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; import java.util.Locale; import org.apache.commons.io.FileUtils; @@ -151,5 +156,22 @@ public class StandardDirectoryFactory extends CachingDirectoryFactory { return baseDir; } + + // perform an atomic rename if possible + public void renameWithOverwrite(Directory dir, String fileName, String toName) throws IOException { + Directory baseDir = getBaseDir(dir); + if (baseDir instanceof FSDirectory) { + Path path = ((FSDirectory) baseDir).getDirectory().toAbsolutePath(); + try { + Files.move(FileSystems.getDefault().getPath(path.toString(), fileName), + FileSystems.getDefault().getPath(path.toString(), toName), StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + } catch (AtomicMoveNotSupportedException e) { + Files.move(FileSystems.getDefault().getPath(path.toString(), fileName), + FileSystems.getDefault().getPath(path.toString(), toName), StandardCopyOption.REPLACE_EXISTING); + } + } else { + super.renameWithOverwrite(dir, fileName, toName); + } + } } diff --git a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java index bdbd4e797ad..8bdd2b8009f 100644 --- a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java +++ b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java @@ -685,15 +685,19 @@ public class IndexFetcher { sb = readToStringBuilder(replicationTime, props.getProperty(REPLICATION_FAILED_AT_LIST)); props.setProperty(REPLICATION_FAILED_AT_LIST, sb.toString()); } - - final IndexOutput out = dir.createOutput(REPLICATION_PROPERTIES, DirectoryFactory.IOCONTEXT_NO_CACHE); + + + String tmpFileName = REPLICATION_PROPERTIES + "." + System.nanoTime(); + final IndexOutput out = dir.createOutput(tmpFileName, DirectoryFactory.IOCONTEXT_NO_CACHE); Writer outFile = new OutputStreamWriter(new PropertiesOutputStream(out), StandardCharsets.UTF_8); try { props.store(outFile, "Replication details"); - dir.sync(Collections.singleton(REPLICATION_PROPERTIES)); + dir.sync(Collections.singleton(tmpFileName)); } finally { IOUtils.closeQuietly(outFile); } + + solrCore.getDirectoryFactory().renameWithOverwrite(dir, tmpFileName, REPLICATION_PROPERTIES); } catch (Exception e) { LOG.warn("Exception while updating statistics", e); } finally { @@ -1206,24 +1210,23 @@ public class IndexFetcher { IOUtils.closeQuietly(is); } } - try { - dir.deleteFile(IndexFetcher.INDEX_PROPERTIES); - } catch (IOException e) { - // no problem - } - final IndexOutput out = dir.createOutput(IndexFetcher.INDEX_PROPERTIES, DirectoryFactory.IOCONTEXT_NO_CACHE); + + String tmpFileName = IndexFetcher.INDEX_PROPERTIES + "." + System.nanoTime(); + final IndexOutput out = dir.createOutput(tmpFileName, DirectoryFactory.IOCONTEXT_NO_CACHE); p.put("index", tmpIdxDirName); Writer os = null; try { os = new OutputStreamWriter(new PropertiesOutputStream(out), StandardCharsets.UTF_8); - p.store(os, IndexFetcher.INDEX_PROPERTIES); - dir.sync(Collections.singleton(INDEX_PROPERTIES)); + p.store(os, tmpFileName); + dir.sync(Collections.singleton(tmpFileName)); } catch (Exception e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unable to write " + IndexFetcher.INDEX_PROPERTIES, e); } finally { IOUtils.closeQuietly(os); } + + solrCore.getDirectoryFactory().renameWithOverwrite(dir, tmpFileName, IndexFetcher.INDEX_PROPERTIES); return true; } catch (IOException e1) { diff --git a/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java b/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java index 08c462bc70b..685ef9987f4 100644 --- a/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java +++ b/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java @@ -35,6 +35,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Date; +import java.util.List; import java.util.Properties; import java.util.Set; import java.util.concurrent.TimeUnit; @@ -304,7 +305,11 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { // check details on the slave a couple of times before & after fetching for (int i = 0; i < 3; i++) { NamedList details = getDetails(slaveClient); - + List replicatedAtCount = (List) ((NamedList) details.get("slave")).get("indexReplicatedAtList"); + if (i > 0) { + assertEquals(i, replicatedAtCount.size()); + } + assertEquals("slave isMaster?", "false", details.get("isMaster")); assertEquals("slave isSlave?", From 262049fc8f60a166f0eed0aef5d7ddd1e7c90bc7 Mon Sep 17 00:00:00 2001 From: markrmiller Date: Wed, 28 Dec 2016 17:42:41 -0500 Subject: [PATCH 40/83] SOLR-9899: StandardDirectoryFactory should use optimizations for all FilterDirectorys not just NRTCachingDirectory. --- solr/CHANGES.txt | 3 +++ .../org/apache/solr/core/DirectoryFactory.java | 11 +++++++++++ .../solr/core/StandardDirectoryFactory.java | 15 --------------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index c3cac28d665..501527addea 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -343,6 +343,9 @@ Other Changes * SOLR-9448: providing a test to workaround a differently named uniqueKey field (Mikhail Khludnev) +* SOLR-9899: StandardDirectoryFactory should use optimizations for all FilterDirectorys not just NRTCachingDirectory. + (Mark Miller) + ================== 6.3.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. diff --git a/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java index 396a30dc75d..136a0a6fff5 100644 --- a/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java +++ b/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java @@ -27,6 +27,7 @@ import java.util.Collections; import org.apache.commons.io.FileUtils; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.LockFactory; @@ -371,4 +372,14 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin, public void initCoreContainer(CoreContainer cc) { this.coreContainer = cc; } + + // special hack to work with FilterDirectory + protected Directory getBaseDir(Directory dir) { + Directory baseDir = dir; + while (baseDir instanceof FilterDirectory) { + baseDir = ((FilterDirectory)baseDir).getDelegate(); + } + + return baseDir; + } } diff --git a/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java index d4181370aae..b24be143153 100644 --- a/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java +++ b/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java @@ -30,7 +30,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.LockFactory; -import org.apache.lucene.store.NRTCachingDirectory; import org.apache.lucene.store.NativeFSLockFactory; import org.apache.lucene.store.NoLockFactory; import org.apache.lucene.store.SimpleFSLockFactory; @@ -116,8 +115,6 @@ public class StandardDirectoryFactory extends CachingDirectoryFactory { * carefully - some Directory wrappers will * cache files for example. * - * This implementation works with NRTCachingDirectory. - * * You should first {@link Directory#sync(java.util.Collection)} any file that will be * moved or avoid cached files through settings. * @@ -144,18 +141,6 @@ public class StandardDirectoryFactory extends CachingDirectoryFactory { super.move(fromDir, toDir, fileName, ioContext); } - - // special hack to work with NRTCachingDirectory - private Directory getBaseDir(Directory dir) { - Directory baseDir; - if (dir instanceof NRTCachingDirectory) { - baseDir = ((NRTCachingDirectory)dir).getDelegate(); - } else { - baseDir = dir; - } - - return baseDir; - } // perform an atomic rename if possible public void renameWithOverwrite(Directory dir, String fileName, String toName) throws IOException { From f29d2b5668296dfcdb8d650305449674faa29847 Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Thu, 29 Dec 2016 01:56:23 +0100 Subject: [PATCH 41/83] LUCENE-7595: Improve RAMUsageTester in test-framework to estimate memory usage of runtime classes and work with Java 9 EA (b148+). Disable static field heap usage checker in LuceneTestCase --- lucene/CHANGES.txt | 4 + .../lucene/search/TestLRUQueryCache.java | 3 +- .../apache/lucene/util/LuceneTestCase.java | 90 ++++++++-------- .../apache/lucene/util/RamUsageTester.java | 102 +++++++++++++++--- 4 files changed, 139 insertions(+), 60 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 255867d68c3..7a118f1d8c2 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -208,6 +208,10 @@ Other * LUCENE-7599: Simplify TestRandomChains using Java's built-in Predicate and Function interfaces. (Ahmet Arslan via Adrien Grand) +* LUCENE-7595: Improve RAMUsageTester in test-framework to estimate memory usage of + runtime classes and work with Java 9 EA (b148+). Disable static field heap usage + checker in LuceneTestCase. (Uwe Schindler, Dawid Weiss) + Build * LUCENE-7387: fix defaultCodec in build.xml to account for the line ending (hossman) diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java index 87382f98387..9ebacf7292a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java @@ -265,6 +265,8 @@ public class TestLRUQueryCache extends LuceneTestCase { // This test makes sure that by making the same assumptions as LRUQueryCache, RAMUsageTester // computes the same memory usage. public void testRamBytesUsedAgreesWithRamUsageTester() throws IOException { + assumeFalse("LUCENE-7595: RamUsageTester does not work exact in Java 9 (estimations for maps and lists)", Constants.JRE_IS_MINIMUM_JAVA9); + final LRUQueryCache queryCache = new LRUQueryCache(1 + random().nextInt(5), 1 + random().nextInt(10000), context -> random().nextBoolean()); // an accumulator that only sums up memory usage of referenced filters and doc id sets final RamUsageTester.Accumulator acc = new RamUsageTester.Accumulator() { @@ -379,7 +381,6 @@ public class TestLRUQueryCache extends LuceneTestCase { // by the cache itself, not cache entries, and we want to make sure that // memory usage is not grossly underestimated. public void testRamBytesUsedConstantEntryOverhead() throws IOException { - LuceneTestCase.assumeFalse("RamUsageTester does not fully work on Java 9", Constants.JRE_IS_MINIMUM_JAVA9); final LRUQueryCache queryCache = new LRUQueryCache(1000000, 10000000, context -> true); final RamUsageTester.Accumulator acc = new RamUsageTester.Accumulator() { diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java index 1848c4e7964..50139a099de 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java @@ -598,51 +598,55 @@ public abstract class LuceneTestCase extends Assert { * other. */ @ClassRule - public static TestRule classRules = RuleChain - .outerRule(new TestRuleIgnoreTestSuites()) - .around(ignoreAfterMaxFailures) - .around(suiteFailureMarker = new TestRuleMarkFailure()) - .around(new TestRuleAssertionsRequired()) - .around(new TestRuleLimitSysouts(suiteFailureMarker)) - .around(tempFilesCleanupRule = new TestRuleTemporaryFilesCleanup(suiteFailureMarker)) - .around(new StaticFieldsInvariantRule(STATIC_LEAK_THRESHOLD, true) { - @Override - protected boolean accept(java.lang.reflect.Field field) { - // Don't count known classes that consume memory once. - if (STATIC_LEAK_IGNORED_TYPES.contains(field.getType().getName())) { - return false; + public static TestRule classRules; + static { + RuleChain r = RuleChain.outerRule(new TestRuleIgnoreTestSuites()) + .around(ignoreAfterMaxFailures) + .around(suiteFailureMarker = new TestRuleMarkFailure()) + .around(new TestRuleAssertionsRequired()) + .around(new TestRuleLimitSysouts(suiteFailureMarker)) + .around(tempFilesCleanupRule = new TestRuleTemporaryFilesCleanup(suiteFailureMarker)); + // TODO LUCENE-7595: Java 9 does not allow to look into runtime classes, so we have to fix the RAM usage checker! + if (!Constants.JRE_IS_MINIMUM_JAVA9) { + r = r.around(new StaticFieldsInvariantRule(STATIC_LEAK_THRESHOLD, true) { + @Override + protected boolean accept(java.lang.reflect.Field field) { + // Don't count known classes that consume memory once. + if (STATIC_LEAK_IGNORED_TYPES.contains(field.getType().getName())) { + return false; + } + // Don't count references from ourselves, we're top-level. + if (field.getDeclaringClass() == LuceneTestCase.class) { + return false; + } + return super.accept(field); } - // Don't count references from ourselves, we're top-level. - if (field.getDeclaringClass() == LuceneTestCase.class) { - return false; + }); + } + classRules = r.around(new NoClassHooksShadowingRule()) + .around(new NoInstanceHooksOverridesRule() { + @Override + protected boolean verify(Method key) { + String name = key.getName(); + return !(name.equals("setUp") || name.equals("tearDown")); } - return super.accept(field); - } - }) - .around(new NoClassHooksShadowingRule()) - .around(new NoInstanceHooksOverridesRule() { - @Override - protected boolean verify(Method key) { - String name = key.getName(); - return !(name.equals("setUp") || name.equals("tearDown")); - } - }) - .around(classNameRule = new TestRuleStoreClassName()) - .around(new TestRuleRestoreSystemProperties( - // Enlist all properties to which we have write access (security manager); - // these should be restored to previous state, no matter what the outcome of the test. - - // We reset the default locale and timezone; these properties change as a side-effect - "user.language", - "user.timezone", - - // TODO: these should, ideally, be moved to Solr's base class. - "solr.directoryFactory", - "solr.solr.home", - "solr.data.dir" - )) - .around(classEnvRule = new TestRuleSetupAndRestoreClassEnv()); - + }) + .around(classNameRule = new TestRuleStoreClassName()) + .around(new TestRuleRestoreSystemProperties( + // Enlist all properties to which we have write access (security manager); + // these should be restored to previous state, no matter what the outcome of the test. + + // We reset the default locale and timezone; these properties change as a side-effect + "user.language", + "user.timezone", + + // TODO: these should, ideally, be moved to Solr's base class. + "solr.directoryFactory", + "solr.solr.home", + "solr.data.dir" + )) + .around(classEnvRule = new TestRuleSetupAndRestoreClassEnv()); + } // ----------------------------------------------------------------- // Test level rules. diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java b/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java index 985052654c1..daf81a96b35 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java @@ -16,9 +16,12 @@ */ package org.apache.lucene.util; +import java.io.ByteArrayOutputStream; +import java.io.File; import java.lang.reflect.Array; import java.lang.reflect.Field; import java.lang.reflect.Modifier; +import java.nio.file.Path; import java.security.AccessController; import java.security.PrivilegedAction; import java.util.AbstractList; @@ -30,6 +33,10 @@ import java.util.IdentityHashMap; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.ToLongFunction; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; /** Crawls object graph to collect RAM usage for testing */ public final class RamUsageTester { @@ -40,9 +47,7 @@ public final class RamUsageTester { /** Accumulate transitive references for the provided fields of the given * object into queue and return the shallow size of this object. */ public long accumulateObject(Object o, long shallowSize, Map fieldValues, Collection queue) { - for (Object value : fieldValues.values()) { - queue.add(value); - } + queue.addAll(fieldValues.values()); return shallowSize; } @@ -130,10 +135,10 @@ public final class RamUsageTester { @Override public int size() { return len; - } - - }; - } + } + + }; + } totalSize += accumulator.accumulateArray(ob, shallowSize, values, stack); } else { /* @@ -145,13 +150,36 @@ public final class RamUsageTester { if (cachedInfo == null) { classCache.put(obClazz, cachedInfo = createCacheEntry(obClazz)); } - - Map fieldValues = new HashMap<>(); - for (Field f : cachedInfo.referenceFields) { - fieldValues.put(f, f.get(ob)); + + boolean needsReflection = true; + if (Constants.JRE_IS_MINIMUM_JAVA9) { + // Java 9: Best guess for some known types, as we cannot precisely look into runtime classes: + final ToLongFunction func = SIMPLE_TYPES.get(obClazz); + if (func != null) { // some simple type like String where the size is easy to get from public properties + totalSize += accumulator.accumulateObject(ob, cachedInfo.alignedShallowInstanceSize + func.applyAsLong(ob), + Collections.emptyMap(), stack); + needsReflection = false; + } else if (ob instanceof Iterable) { + final List values = StreamSupport.stream(((Iterable) ob).spliterator(), false) + .collect(Collectors.toList()); + totalSize += accumulator.accumulateArray(ob, cachedInfo.alignedShallowInstanceSize + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER, values, stack); + needsReflection = false; + } else if (ob instanceof Map) { + final List values = ((Map) ob).entrySet().stream() + .flatMap(e -> Stream.of(e.getKey(), e.getValue())) + .collect(Collectors.toList()); + totalSize += accumulator.accumulateArray(ob, cachedInfo.alignedShallowInstanceSize + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER, values, stack); + totalSize += RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; + needsReflection = false; + } + } + if (needsReflection) { + final Map fieldValues = new HashMap<>(); + for (Field f : cachedInfo.referenceFields) { + fieldValues.put(f, f.get(ob)); + } + totalSize += accumulator.accumulateObject(ob, cachedInfo.alignedShallowInstanceSize, fieldValues, stack); } - - totalSize += accumulator.accumulateObject(ob, cachedInfo.alignedShallowInstanceSize, fieldValues, stack); } catch (IllegalAccessException e) { // this should never happen as we enabled setAccessible(). throw new RuntimeException("Reflective field access failed?", e); @@ -167,7 +195,41 @@ public final class RamUsageTester { return totalSize; } - + /** + * This map contains a function to calculate sizes of some "simple types" like String just from their public properties. + * This is needed for Java 9, which does not allow to look into runtime class fields. + */ + @SuppressWarnings("serial") + private static final Map, ToLongFunction> SIMPLE_TYPES = Collections.unmodifiableMap(new IdentityHashMap, ToLongFunction>() { + { init(); } + + @SuppressForbidden(reason = "We measure some forbidden classes") + private void init() { + // String types: + a(String.class, v -> charArraySize(v.length())); // may not be correct with Java 9's compact strings! + a(StringBuilder.class, v -> charArraySize(v.capacity())); + a(StringBuffer.class, v -> charArraySize(v.capacity())); + // Types with large buffers: + a(ByteArrayOutputStream.class, v -> byteArraySize(v.size())); + // For File and Path, we just take the length of String representation as approximation: + a(File.class, v -> charArraySize(v.toString().length())); + a(Path.class, v -> charArraySize(v.toString().length())); + } + + @SuppressWarnings("unchecked") + private void a(Class clazz, ToLongFunction func) { + put(clazz, (ToLongFunction) func); + } + + private long charArraySize(int len) { + return RamUsageEstimator.alignObjectSize((long)RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long)Character.BYTES * len); + } + + private long byteArraySize(int len) { + return RamUsageEstimator.alignObjectSize((long)RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + len); + } + }); + /** * Cached information about a given class. */ @@ -202,8 +264,16 @@ public final class RamUsageTester { shallowInstanceSize = RamUsageEstimator.adjustForField(shallowInstanceSize, f); if (!f.getType().isPrimitive()) { - f.setAccessible(true); - referenceFields.add(f); + try { + f.setAccessible(true); + referenceFields.add(f); + } catch (RuntimeException re) { + if ("java.lang.reflect.InaccessibleObjectException".equals(re.getClass().getName())) { + // LUCENE-7595: this is Java 9, which prevents access to fields in foreign modules + } else { + throw re; + } + } } } } From 20362deb7e6814c1922163595e7edeb652d3ce37 Mon Sep 17 00:00:00 2001 From: David Smiley Date: Wed, 28 Dec 2016 22:57:44 -0500 Subject: [PATCH 42/83] SOLR-9897: Add hl.requireFieldMatch=false support when using the UnifiedHighlighter --- solr/CHANGES.txt | 7 +++++-- .../solr/highlight/UnifiedSolrHighlighter.java | 15 ++++++++++++++- .../highlight/TestUnifiedSolrHighlighter.java | 10 ++++++++++ .../solr/common/params/HighlightParams.java | 2 +- 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 501527addea..852a30680df 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -109,8 +109,8 @@ Upgrade Notes * SOLR-9708: You are encouraged to try out the UnifiedHighlighter by setting hl.method=unified and report feedback. It might become the default in 7.0. It's more efficient/faster than the other highlighters, especially compared to the - original Highlighter. That said, some options aren't supported yet, notably hl.fragsize and - hl.requireFieldMatch=false. It will get more features in time, especially with your input. See HighlightParams.java + original Highlighter. That said, some options aren't supported yet, notably hl.fragsize. + It will get more features in time, especially with your input. See HighlightParams.java for a listing of highlight parameters annotated with which highlighters use them. hl.useFastVectorHighlighter is now considered deprecated in lieu of hl.method=fastVector. @@ -199,6 +199,9 @@ New Features * SOLR-9880: Add Ganglia, Graphite and SLF4J metrics reporters. (ab) +* SOLR-9897: Add hl.requireFieldMatch toggle support when using the UnifiedHighlighter. Defaults to false like the + other highlighters that support this. (David Smiley) + Optimizations ---------------------- * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have diff --git a/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java index c38546ee116..910fa2b8d75 100644 --- a/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java +++ b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; +import java.util.function.Predicate; import org.apache.lucene.document.Document; import org.apache.lucene.search.DocIdSetIterator; @@ -221,9 +222,10 @@ public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInf * From {@link #getHighlighter(org.apache.solr.request.SolrQueryRequest)}. */ protected static class SolrExtendedUnifiedHighlighter extends UnifiedHighlighter { + protected final static Predicate NOT_REQUIRED_FIELD_MATCH_PREDICATE = s -> true; protected final SolrParams params; - protected final IndexSchema schema; + protected final IndexSchema schema; protected final RTimerTree loadFieldValuesTimer; public SolrExtendedUnifiedHighlighter(SolrQueryRequest req) { @@ -360,6 +362,17 @@ public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInf return params.getFieldBool(field, HighlightParams.USE_PHRASE_HIGHLIGHTER, true); } + @Override + protected Predicate getFieldMatcher(String field) { + // TODO define hl.queryFieldPattern as a more advanced alternative to hl.requireFieldMatch. + + // note that the UH & PH at Lucene level default to effectively "true" + if (params.getFieldBool(field, HighlightParams.FIELD_MATCH, false)) { + return field::equals; // requireFieldMatch + } else { + return NOT_REQUIRED_FIELD_MATCH_PREDICATE; + } + } } } \ No newline at end of file diff --git a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java index 95754a4ac1e..e2511bef53d 100644 --- a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java +++ b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java @@ -225,5 +225,15 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 { req("q", "text:document", "sort", "id asc", "hl", "true", "hl.encoder", "html"), "//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='Document one has a first <i>sentence</i>.'"); } + + public void testRequireFieldMatch() { + // We highlight on field text3 (hl.fl), but our query only references the "text" field. Nonetheless, the query word + // "document" is found in all fields here. + + assertQ(req("q", "id:101", "hl", "true", "hl.q", "text:document", "hl.fl", "text3"), //hl.requireFieldMatch is false by default + "count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/*)=1"); + assertQ(req("q", "id:101", "hl", "true", "hl.q", "text:document", "hl.fl", "text3", "hl.requireFieldMatch", "true"), + "count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/*)=0"); + } } diff --git a/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java b/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java index fd752bfc60a..917e9f57926 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java @@ -38,7 +38,7 @@ public interface HighlightParams { // query interpretation public static final String Q = HIGHLIGHT+".q"; // all public static final String QPARSER = HIGHLIGHT+".qparser"; // all - public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch"; // OH, FVH + public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch"; // OH, FVH, UH public static final String USE_PHRASE_HIGHLIGHTER = HIGHLIGHT+".usePhraseHighlighter"; // OH, FVH, UH public static final String HIGHLIGHT_MULTI_TERM = HIGHLIGHT+".highlightMultiTerm"; // all From 662be93ed11abebaff1d13711f3bffca478ba61e Mon Sep 17 00:00:00 2001 From: Shalin Shekhar Mangar Date: Thu, 29 Dec 2016 09:57:03 +0530 Subject: [PATCH 43/83] SOLR-9877: Null check for metric registry before attempting to use it --- .../solr/util/stats/InstrumentedHttpRequestExecutor.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/util/stats/InstrumentedHttpRequestExecutor.java b/solr/core/src/java/org/apache/solr/util/stats/InstrumentedHttpRequestExecutor.java index ad76d734361..04267802160 100644 --- a/solr/core/src/java/org/apache/solr/util/stats/InstrumentedHttpRequestExecutor.java +++ b/solr/core/src/java/org/apache/solr/util/stats/InstrumentedHttpRequestExecutor.java @@ -53,11 +53,16 @@ public class InstrumentedHttpRequestExecutor extends HttpRequestExecutor impleme @Override public HttpResponse execute(HttpRequest request, HttpClientConnection conn, HttpContext context) throws IOException, HttpException { - final Timer.Context timerContext = timer(request).time(); + Timer.Context timerContext = null; + if (metricsRegistry != null) { + timerContext = timer(request).time(); + } try { return super.execute(request, conn, context); } finally { - timerContext.stop(); + if (timerContext != null) { + timerContext.stop(); + } } } From 2781145eb3760489922530fd92d5f1d4c35215a9 Mon Sep 17 00:00:00 2001 From: markrmiller Date: Thu, 29 Dec 2016 05:29:51 -0500 Subject: [PATCH 44/83] SOLR-9902: StandardDirectoryFactory should use Files API for it's move implementation. --- solr/CHANGES.txt | 1 + .../solr/core/StandardDirectoryFactory.java | 20 ++++++++++--------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 852a30680df..06566e069e4 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -222,6 +222,7 @@ Optimizations resulting in less produced garbage and 5-7% better performance. (yonik) +* SOLR-9902: StandardDirectoryFactory should use Files API for it's move implementation. (Mark Miller) Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java index b24be143153..1d8793afc68 100644 --- a/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java +++ b/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java @@ -129,13 +129,14 @@ public class StandardDirectoryFactory extends CachingDirectoryFactory { Directory baseToDir = getBaseDir(toDir); if (baseFromDir instanceof FSDirectory && baseToDir instanceof FSDirectory) { - File dir1 = ((FSDirectory) baseFromDir).getDirectory().toFile(); - File dir2 = ((FSDirectory) baseToDir).getDirectory().toFile(); - File indexFileInTmpDir = new File(dir1, fileName); - File indexFileInIndex = new File(dir2, fileName); - boolean success = indexFileInTmpDir.renameTo(indexFileInIndex); - if (success) { - return; + + Path path1 = ((FSDirectory) baseFromDir).getDirectory().toAbsolutePath(); + Path path2 = ((FSDirectory) baseFromDir).getDirectory().toAbsolutePath(); + + try { + Files.move(path1.resolve(fileName), path2.resolve(fileName), StandardCopyOption.ATOMIC_MOVE); + } catch (AtomicMoveNotSupportedException e) { + Files.move(path1.resolve(fileName), path2.resolve(fileName)); } } @@ -148,8 +149,9 @@ public class StandardDirectoryFactory extends CachingDirectoryFactory { if (baseDir instanceof FSDirectory) { Path path = ((FSDirectory) baseDir).getDirectory().toAbsolutePath(); try { - Files.move(FileSystems.getDefault().getPath(path.toString(), fileName), - FileSystems.getDefault().getPath(path.toString(), toName), StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + Files.move(path.resolve(fileName), + path.resolve(toName), StandardCopyOption.ATOMIC_MOVE, + StandardCopyOption.REPLACE_EXISTING); } catch (AtomicMoveNotSupportedException e) { Files.move(FileSystems.getDefault().getPath(path.toString(), fileName), FileSystems.getDefault().getPath(path.toString(), toName), StandardCopyOption.REPLACE_EXISTING); From a5e5c4a04385eb030aac1ec6126ff9b82407158f Mon Sep 17 00:00:00 2001 From: markrmiller Date: Thu, 29 Dec 2016 05:40:45 -0500 Subject: [PATCH 45/83] tests: bump up fudge --- .../src/test/org/apache/solr/update/SoftAutoCommitTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/update/SoftAutoCommitTest.java b/solr/core/src/test/org/apache/solr/update/SoftAutoCommitTest.java index 4106884a8e2..c9c96915300 100644 --- a/solr/core/src/test/org/apache/solr/update/SoftAutoCommitTest.java +++ b/solr/core/src/test/org/apache/solr/update/SoftAutoCommitTest.java @@ -107,7 +107,7 @@ public class SoftAutoCommitTest extends AbstractSolrTestCase { monitor.assertSaneOffers(); // Wait for the soft commit with some fudge - Long soft529 = monitor.soft.poll(softCommitWaitMillis * 2, MILLISECONDS); + Long soft529 = monitor.soft.poll(softCommitWaitMillis * 3, MILLISECONDS); assertNotNull("soft529 wasn't fast enough", soft529); monitor.assertSaneOffers(); From 197590a928cfefa51b1a8307046e5a11e5400e34 Mon Sep 17 00:00:00 2001 From: markrmiller Date: Wed, 28 Dec 2016 16:16:14 -0500 Subject: [PATCH 46/83] SOLR-9901: Implement move in HdfsDirectoryFactory. --- solr/CHANGES.txt | 2 ++ .../solr/core/HdfsDirectoryFactory.java | 20 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 06566e069e4..138385940ea 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -294,6 +294,8 @@ Bug Fixes * SOLR-9859: replication.properties cannot be updated after being written and neither replication.properties or index.properties are durable in the face of a crash. (Pushkar Raste, Chris de Kok, Cao Manh Dat, Mark Miller) +* SOLR-9901: Implement move in HdfsDirectoryFactory. (Mark Miller) + Other Changes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java index d481e0333e0..e1e3d6ed5ff 100644 --- a/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java +++ b/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java @@ -37,6 +37,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.security.UserGroupInformation; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; import org.apache.lucene.store.LockFactory; import org.apache.lucene.store.NRTCachingDirectory; import org.apache.lucene.store.NoLockFactory; @@ -577,4 +578,23 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory implements Sol FileContext fileContext = FileContext.getFileContext(getConf()); fileContext.rename(new Path(hdfsDirPath + "/" + fileName), new Path(hdfsDirPath + "/" + toName), Options.Rename.OVERWRITE); } + + @Override + public void move(Directory fromDir, Directory toDir, String fileName, IOContext ioContext) throws IOException { + + Directory baseFromDir = getBaseDir(fromDir); + Directory baseToDir = getBaseDir(toDir); + + if (baseFromDir instanceof HdfsDirectory && baseToDir instanceof HdfsDirectory) { + Path dir1 = ((HdfsDirectory) baseFromDir).getHdfsDirPath(); + Path dir2 = ((HdfsDirectory) baseToDir).getHdfsDirPath(); + Path file1 = new Path(dir1, fileName); + Path file2 = new Path(dir2, fileName); + FileContext fileContext = FileContext.getFileContext(getConf()); + fileContext.rename(file1, file2); + return; + } + + super.move(fromDir, toDir, fileName, ioContext); + } } From 5f55ae0b73ec546132f7188490065798bba0ffad Mon Sep 17 00:00:00 2001 From: markrmiller Date: Thu, 29 Dec 2016 05:53:51 -0500 Subject: [PATCH 47/83] tests: raise commit time to avoid false fails --- solr/core/src/test/org/apache/solr/update/AutoCommitTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/update/AutoCommitTest.java b/solr/core/src/test/org/apache/solr/update/AutoCommitTest.java index f68e56309a6..cb030e4db57 100644 --- a/solr/core/src/test/org/apache/solr/update/AutoCommitTest.java +++ b/solr/core/src/test/org/apache/solr/update/AutoCommitTest.java @@ -239,7 +239,7 @@ public class AutoCommitTest extends AbstractSolrTestCase { CommitTracker tracker = updater.softCommitTracker; // too low of a number can cause a slow host to commit before the test code checks that it // isn't there... causing a failure at "shouldn't find any" - tracker.setTimeUpperBound(1000); + tracker.setTimeUpperBound(1500); tracker.setDocsUpperBound(-1); // updater.commitCallbacks.add(trigger); From b4de6288fb739b53ad138a16bc862130dc9318a8 Mon Sep 17 00:00:00 2001 From: markrmiller Date: Thu, 29 Dec 2016 05:59:25 -0500 Subject: [PATCH 48/83] tests: bump timeout --- .../org/apache/solr/cloud/LeaderFailoverAfterPartitionTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderFailoverAfterPartitionTest.java b/solr/core/src/test/org/apache/solr/cloud/LeaderFailoverAfterPartitionTest.java index 9f1abdedc6d..f172267281d 100644 --- a/solr/core/src/test/org/apache/solr/cloud/LeaderFailoverAfterPartitionTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/LeaderFailoverAfterPartitionTest.java @@ -159,7 +159,7 @@ public class LeaderFailoverAfterPartitionTest extends HttpPartitionTest { proxy0.reopen(); - long timeout = System.nanoTime() + TimeUnit.NANOSECONDS.convert(60, TimeUnit.SECONDS); + long timeout = System.nanoTime() + TimeUnit.NANOSECONDS.convert(90, TimeUnit.SECONDS); while (System.nanoTime() < timeout) { List activeReps = getActiveOrRecoveringReplicas(testCollectionName, "shard1"); if (activeReps.size() >= 2) break; From c58eaa1a49bf518f3b0f70701ffd31f0cca79c17 Mon Sep 17 00:00:00 2001 From: markrmiller Date: Wed, 28 Dec 2016 08:36:08 -0500 Subject: [PATCH 49/83] tests: speed up very slow test --- .../apache/solr/cloud/CdcrBootstrapTest.java | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/CdcrBootstrapTest.java b/solr/core/src/test/org/apache/solr/cloud/CdcrBootstrapTest.java index aca5e0bcd61..6959bd825d5 100644 --- a/solr/core/src/test/org/apache/solr/cloud/CdcrBootstrapTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/CdcrBootstrapTest.java @@ -76,11 +76,11 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { .withProperty("solr.directoryFactory", "solr.StandardDirectoryFactory") .process(source.getSolrClient()); - // index 10000 docs with a hard commit every 1000 documents CloudSolrClient sourceSolrClient = source.getSolrClient(); sourceSolrClient.setDefaultCollection("cdcr-source"); + int docs = (TEST_NIGHTLY ? 100 : 10); int numDocs = 0; - for (int k = 0; k < 100; k++) { + for (int k = 0; k < docs; k++) { UpdateRequest req = new UpdateRequest(); for (; numDocs < (k + 1) * 100; numDocs++) { SolrInputDocument doc = new SolrInputDocument(); @@ -89,7 +89,7 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { req.add(doc); } req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); - System.out.println("Adding 100 docs with commit=true, numDocs=" + numDocs); + System.out.println("Adding " + docs + " docs with commit=true, numDocs=" + numDocs); req.process(sourceSolrClient); } @@ -170,11 +170,11 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { .withProperty("solr.directoryFactory", "solr.StandardDirectoryFactory") .process(source.getSolrClient()); - // index 10000 docs with a hard commit every 1000 documents CloudSolrClient sourceSolrClient = source.getSolrClient(); sourceSolrClient.setDefaultCollection("cdcr-source"); + int docs = (TEST_NIGHTLY ? 100 : 10); int numDocs = 0; - for (int k = 0; k < 100; k++) { + for (int k = 0; k < docs; k++) { UpdateRequest req = new UpdateRequest(); for (; numDocs < (k + 1) * 100; numDocs++) { SolrInputDocument doc = new SolrInputDocument(); @@ -183,7 +183,7 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { req.add(doc); } req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); - System.out.println("Adding 100 docs with commit=true, numDocs=" + numDocs); + System.out.println("Adding " + docs + " docs with commit=true, numDocs=" + numDocs); req.process(sourceSolrClient); } @@ -209,7 +209,7 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { cdcrDisableBuffer(sourceSolrClient); int c = 0; - for (int k = 0; k < 100; k++) { + for (int k = 0; k < 10; k++) { UpdateRequest req = new UpdateRequest(); for (; c < (k + 1) * 100; c++, numDocs++) { SolrInputDocument doc = new SolrInputDocument(); @@ -256,11 +256,11 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { .withProperty("solr.directoryFactory", "solr.StandardDirectoryFactory") .process(source.getSolrClient()); - // index 10000 docs with a hard commit every 1000 documents CloudSolrClient sourceSolrClient = source.getSolrClient(); sourceSolrClient.setDefaultCollection("cdcr-source"); + int docs = (TEST_NIGHTLY ? 100 : 10); int numDocs = 0; - for (int k = 0; k < 100; k++) { + for (int k = 0; k < docs; k++) { UpdateRequest req = new UpdateRequest(); for (; numDocs < (k + 1) * 100; numDocs++) { SolrInputDocument doc = new SolrInputDocument(); @@ -269,7 +269,7 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { req.add(doc); } req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); - System.out.println("Adding 100 docs with commit=true, numDocs=" + numDocs); + System.out.println("Adding " + docs + " docs with commit=true, numDocs=" + numDocs); req.process(sourceSolrClient); } @@ -286,9 +286,8 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { cdcrStart(targetSolrClient); cdcrStart(sourceSolrClient); - int c = 0; - for (int k = 0; k < 100; k++) { + for (int k = 0; k < docs; k++) { UpdateRequest req = new UpdateRequest(); for (; c < (k + 1) * 100; c++, numDocs++) { SolrInputDocument doc = new SolrInputDocument(); @@ -297,7 +296,7 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { req.add(doc); } req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); - System.out.println("Adding 100 docs with commit=true, numDocs=" + numDocs); + System.out.println("Adding " + docs + " docs with commit=true, numDocs=" + numDocs); req.process(sourceSolrClient); } From fa959ad25d2460ebb41fae6bcf496a5ce785e989 Mon Sep 17 00:00:00 2001 From: markrmiller Date: Thu, 29 Dec 2016 06:42:14 -0500 Subject: [PATCH 50/83] tests: speed up non nightly run --- .../src/test/org/apache/solr/cloud/TestStressLiveNodes.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/TestStressLiveNodes.java b/solr/core/src/test/org/apache/solr/cloud/TestStressLiveNodes.java index 80b717eb315..52ab3a3def2 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestStressLiveNodes.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestStressLiveNodes.java @@ -132,7 +132,7 @@ public class TestStressLiveNodes extends SolrCloudTestCase { public void testStress() throws Exception { // do many iters, so we have "bursts" of adding nodes that we then check - final int numIters = atLeast(1000); + final int numIters = atLeast(TEST_NIGHTLY ? 1000 : 100); for (int iter = 0; iter < numIters; iter++) { // sanity check that ZK says there is in fact 1 live node From 12aff1cfcc48d7c89008447d482bf610242e0431 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 27 Oct 2016 16:50:28 +0100 Subject: [PATCH 51/83] SOLR-9132: Cut over some more tests --- .../java/org/apache/solr/cloud/Overseer.java | 2 +- .../solr/cloud/CollectionsAPISolrJTest.java | 470 +++++++----------- .../DeleteLastCustomShardedReplicaTest.java | 104 +--- .../apache/solr/cloud/DeleteShardTest.java | 205 +++----- .../cloud/OverseerModifyCollectionTest.java | 92 ++-- .../apache/solr/cloud/OverseerRolesTest.java | 165 ++---- .../apache/solr/cloud/OverseerStatusTest.java | 55 +- .../solr/cloud/RemoteQueryErrorTest.java | 53 +- .../cloud/TestDownShardTolerantSearch.java | 40 +- .../TestExclusionRuleCollectionAccess.java | 36 +- .../PKIAuthenticationIntegrationTest.java | 40 +- .../solrj/request/CollectionAdminRequest.java | 2 + 12 files changed, 453 insertions(+), 811 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java index d7285fa20a6..a618874f3ba 100644 --- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java +++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java @@ -371,7 +371,7 @@ public class Overseer implements Closeable { return Collections.singletonList(new SliceMutator(getZkStateReader()).updateShardState(clusterState, message)); case QUIT: if (myId.equals(message.get("id"))) { - log.info("Quit command received {}", LeaderElector.getNodeName(myId)); + log.info("Quit command received {} {}", message, LeaderElector.getNodeName(myId)); overseerCollectionConfigSetProcessor.close(); close(); } else { diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java index b04bfbc3ffa..616b657344d 100644 --- a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java @@ -16,70 +16,47 @@ */ package org.apache.solr.cloud; -import java.io.File; import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; -import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Objects; -import java.util.Properties; -import java.util.concurrent.TimeUnit; -import org.apache.commons.codec.binary.StringUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; -import org.apache.solr.client.solrj.request.CoreAdminRequest; +import org.apache.solr.client.solrj.request.CoreStatus; import org.apache.solr.client.solrj.response.CollectionAdminResponse; -import org.apache.solr.client.solrj.response.CoreAdminResponse; -import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.ClusterProperties; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.common.util.NamedList; -import org.apache.solr.util.TimeOut; import org.apache.zookeeper.KeeperException; +import org.junit.BeforeClass; import org.junit.Test; -import static org.apache.solr.cloud.ReplicaPropertiesBase.verifyUniqueAcrossCollection; - @LuceneTestCase.Slow -public class CollectionsAPISolrJTest extends AbstractFullDistribZkTestBase { +public class CollectionsAPISolrJTest extends SolrCloudTestCase { - @Test - public void test() throws Exception { - testCreateAndDeleteCollection(); - testCreateAndDeleteShard(); - testReloadCollection(); - testCreateAndDeleteAlias(); - testSplitShard(); - testCreateCollectionWithPropertyParam(); - testAddAndDeleteReplica(); - testClusterProp(); - testAddAndRemoveRole(); - testOverseerStatus(); - testList(); - testAddAndDeleteReplicaProp(); - testBalanceShardUnique(); + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(4) + .addConfig("conf", configset("cloud-minimal")) + .configure(); } - protected void testCreateAndDeleteCollection() throws Exception { + @Test + public void testCreateAndDeleteCollection() throws Exception { String collectionName = "solrj_test"; - CollectionAdminRequest.Create createCollectionRequest = new CollectionAdminRequest.Create() - .setCollectionName(collectionName) - .setNumShards(2) - .setReplicationFactor(2) - .setConfigName("conf1") - .setRouterField("myOwnField") - .setStateFormat(1); - - CollectionAdminResponse response = createCollectionRequest.process(cloudClient); + CollectionAdminResponse response = CollectionAdminRequest.createCollection(collectionName, "conf", 2, 2) + .setStateFormat(1) + .process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); assertTrue(response.isSuccess()); @@ -91,57 +68,44 @@ public class CollectionsAPISolrJTest extends AbstractFullDistribZkTestBase { assertTrue(status.get("QTime") > 0); } - cloudClient.setDefaultCollection(collectionName); - CollectionAdminRequest.Delete deleteCollectionRequest = new CollectionAdminRequest.Delete() - .setCollectionName(collectionName); - response = deleteCollectionRequest.process(cloudClient); + response = CollectionAdminRequest.deleteCollection(collectionName).process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); assertTrue(response.isSuccess()); Map> nodesStatus = response.getCollectionNodesStatus(); - assertNull("Deleted collection " + collectionName + "still exists", - cloudClient.getZkStateReader().getClusterState().getCollectionOrNull(collectionName)); assertEquals(4, nodesStatus.size()); - + + waitForState("Expected " + collectionName + " to disappear from cluster state", collectionName, (n, c) -> c == null); + // Test Creating a collection with new stateformat. collectionName = "solrj_newstateformat"; - createCollectionRequest = new CollectionAdminRequest.Create() - .setCollectionName(collectionName) - .setNumShards(2) - .setConfigName("conf1") - .setStateFormat(2); - response = createCollectionRequest.process(cloudClient); + response = CollectionAdminRequest.createCollection(collectionName, "conf", 2, 2) + .setStateFormat(2) + .process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); assertTrue(response.isSuccess()); - waitForRecoveriesToFinish(collectionName, false); - assertTrue("Collection state does not exist", - cloudClient.getZkStateReader().getZkClient() - .exists(ZkStateReader.getCollectionPath(collectionName), true)); + waitForState("Expected " + collectionName + " to appear in cluster state", collectionName, (n, c) -> c != null); } - - protected void testCreateAndDeleteShard() throws IOException, SolrServerException { + + @Test + public void testCreateAndDeleteShard() throws IOException, SolrServerException { + // Create an implicit collection String collectionName = "solrj_implicit"; - CollectionAdminResponse response = new CollectionAdminRequest.Create() - .setCollectionName(collectionName) - .setShards("shardA,shardB") - .setConfigName("conf1") - .setRouterName("implicit").process(cloudClient); + CollectionAdminResponse response + = CollectionAdminRequest.createCollectionWithImplicitRouter(collectionName, "conf", "shardA,shardB", 1) + .process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); assertTrue(response.isSuccess()); Map> coresStatus = response.getCollectionCoresStatus(); assertEquals(2, coresStatus.size()); - cloudClient.setDefaultCollection(collectionName); // Add a shard to the implicit collection - response = new CollectionAdminRequest - .CreateShard() - .setCollectionName(collectionName) - .setShardName("shardC").process(cloudClient); + response = CollectionAdminRequest.createShard(collectionName, "shardC").process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); assertTrue(response.isSuccess()); @@ -149,57 +113,38 @@ public class CollectionsAPISolrJTest extends AbstractFullDistribZkTestBase { assertEquals(1, coresStatus.size()); assertEquals(0, (int) coresStatus.get(collectionName + "_shardC_replica1").get("status")); - CollectionAdminRequest.DeleteShard deleteShardRequest = new CollectionAdminRequest - .DeleteShard() - .setCollectionName(collectionName) - .setShardName("shardC"); - response = deleteShardRequest.process(cloudClient); + response = CollectionAdminRequest.deleteShard(collectionName, "shardC").process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); assertTrue(response.isSuccess()); Map> nodesStatus = response.getCollectionNodesStatus(); assertEquals(1, nodesStatus.size()); } - - protected void testReloadCollection() throws IOException, SolrServerException { - cloudClient.setDefaultCollection(DEFAULT_COLLECTION); - CollectionAdminRequest.Reload reloadCollectionRequest = new CollectionAdminRequest.Reload() - .setCollectionName("collection1"); - CollectionAdminResponse response = reloadCollectionRequest.process(cloudClient); + @Test + public void testCreateAndDeleteAlias() throws IOException, SolrServerException { + + final String collection = "aliasedCollection"; + CollectionAdminRequest.createCollection(collection, "conf", 1, 1).process(cluster.getSolrClient()); + + CollectionAdminResponse response + = CollectionAdminRequest.createAlias("solrj_alias", collection).process(cluster.getSolrClient()); + assertEquals(0, response.getStatus()); + + response = CollectionAdminRequest.deleteAlias("solrj_alias").process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); } - - protected void testCreateAndDeleteAlias() throws IOException, SolrServerException { - CollectionAdminRequest.CreateAlias createAliasRequest = new CollectionAdminRequest - .CreateAlias() - .setAliasName("solrj_alias") - .setAliasedCollections(DEFAULT_COLLECTION); - CollectionAdminResponse response = createAliasRequest.process(cloudClient); - assertEquals(0, response.getStatus()); + @Test + public void testSplitShard() throws Exception { - CollectionAdminRequest.DeleteAlias deleteAliasRequest = new CollectionAdminRequest.DeleteAlias() - .setAliasName("solrj_alias"); - deleteAliasRequest.process(cloudClient); - - assertEquals(0, response.getStatus()); - } - - protected void testSplitShard() throws Exception { - String collectionName = "solrj_test_splitshard"; - cloudClient.setDefaultCollection(collectionName); - - CollectionAdminRequest.Create createCollectionRequest = new CollectionAdminRequest.Create() - .setConfigName("conf1") - .setNumShards(2) - .setCollectionName(collectionName); - createCollectionRequest.process(cloudClient); - - CollectionAdminRequest.SplitShard splitShardRequest = new CollectionAdminRequest.SplitShard() - .setCollectionName(collectionName) - .setShardName("shard1"); - CollectionAdminResponse response = splitShardRequest.process(cloudClient); + final String collectionName = "solrj_test_splitshard"; + CollectionAdminRequest.createCollection(collectionName, "conf", 2, 1) + .process(cluster.getSolrClient()); + + CollectionAdminResponse response = CollectionAdminRequest.splitShard(collectionName) + .setShardName("shard1") + .process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); assertTrue(response.isSuccess()); @@ -207,267 +152,204 @@ public class CollectionsAPISolrJTest extends AbstractFullDistribZkTestBase { assertEquals(0, (int) coresStatus.get(collectionName + "_shard1_0_replica1").get("status")); assertEquals(0, (int) coresStatus.get(collectionName + "_shard1_1_replica1").get("status")); - waitForRecoveriesToFinish(collectionName, false); - waitForThingsToLevelOut(10); + waitForState("Expected all shards to be active and parent shard to be removed", collectionName, (n, c) -> { + if (c.getSlice("shard1").getState() == Slice.State.ACTIVE) + return false; + for (Replica r : c.getReplicas()) { + if (r.isActive(n) == false) + return false; + } + return true; + }); // Test splitting using split.key - splitShardRequest = new CollectionAdminRequest.SplitShard() - .setCollectionName(collectionName) - .setSplitKey("b!"); - response = splitShardRequest.process(cloudClient); + response = CollectionAdminRequest.splitShard(collectionName) + .setSplitKey("b!") + .process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); assertTrue(response.isSuccess()); - waitForRecoveriesToFinish(collectionName, false); - waitForThingsToLevelOut(10); - - ClusterState clusterState = cloudClient.getZkStateReader().getClusterState(); - Collection slices = clusterState.getActiveSlices(collectionName); - assertEquals("ClusterState: "+ clusterState.getActiveSlices(collectionName), 5, slices.size()); + waitForState("Expected 5 slices to be active", collectionName, (n, c) -> c.getActiveSlices().size() == 5); } - private void testCreateCollectionWithPropertyParam() throws Exception { + @Test + public void testCreateCollectionWithPropertyParam() throws Exception { + String collectionName = "solrj_test_core_props"; - File tmpDir = createTempDir("testPropertyParamsForCreate").toFile(); - File dataDir = new File(tmpDir, "dataDir-" + TestUtil.randomSimpleString(random(), 1, 5)); - File ulogDir = new File(tmpDir, "ulogDir-" + TestUtil.randomSimpleString(random(), 1, 5)); + Path tmpDir = createTempDir("testPropertyParamsForCreate"); + Path dataDir = tmpDir.resolve("dataDir-" + TestUtil.randomSimpleString(random(), 1, 5)); + Path ulogDir = tmpDir.resolve("ulogDir-" + TestUtil.randomSimpleString(random(), 1, 5)); - Properties properties = new Properties(); - properties.put(CoreAdminParams.DATA_DIR, dataDir.getAbsolutePath()); - properties.put(CoreAdminParams.ULOG_DIR, ulogDir.getAbsolutePath()); + CollectionAdminResponse response = CollectionAdminRequest.createCollection(collectionName, "conf", 1, 1) + .withProperty(CoreAdminParams.DATA_DIR, dataDir.toString()) + .withProperty(CoreAdminParams.ULOG_DIR, ulogDir.toString()) + .process(cluster.getSolrClient()); - CollectionAdminRequest.Create createReq = new CollectionAdminRequest.Create() - .setCollectionName(collectionName) - .setNumShards(1) - .setConfigName("conf1") - .setProperties(properties); - - CollectionAdminResponse response = createReq.process(cloudClient); assertEquals(0, response.getStatus()); assertTrue(response.isSuccess()); Map> coresStatus = response.getCollectionCoresStatus(); assertEquals(1, coresStatus.size()); - DocCollection testCollection = cloudClient.getZkStateReader() - .getClusterState().getCollection(collectionName); + DocCollection testCollection = getCollectionState(collectionName); Replica replica1 = testCollection.getReplica("core_node1"); + CoreStatus coreStatus = getCoreStatus(replica1); - try (HttpSolrClient client = getHttpSolrClient(replica1.getStr("base_url"))) { - CoreAdminResponse status = CoreAdminRequest.getStatus(replica1.getStr("core"), client); - NamedList coreStatus = status.getCoreStatus(replica1.getStr("core")); - String dataDirStr = (String) coreStatus.get("dataDir"); - assertEquals("Data dir does not match param given in property.dataDir syntax", - new File(dataDirStr).getAbsolutePath(), dataDir.getAbsolutePath()); - } + assertEquals(Paths.get(coreStatus.getDataDirectory()).toString(), dataDir.toString()); - CollectionAdminRequest.Delete deleteCollectionRequest = new CollectionAdminRequest.Delete(); - deleteCollectionRequest.setCollectionName(collectionName); - deleteCollectionRequest.process(cloudClient); } - private void testAddAndDeleteReplica() throws Exception { - String collectionName = "solrj_replicatests"; - createCollection(collectionName, cloudClient, 1, 2); + @Test + public void testAddAndDeleteReplica() throws Exception { - cloudClient.setDefaultCollection(collectionName); + final String collectionName = "solrj_replicatests"; + CollectionAdminRequest.createCollection(collectionName, "conf", 1, 2) + .process(cluster.getSolrClient()); - String newReplicaName = Assign.assignNode(cloudClient.getZkStateReader().getClusterState().getCollection(collectionName)); - ArrayList nodeList = new ArrayList<>(cloudClient.getZkStateReader().getClusterState().getLiveNodes()); + String newReplicaName = Assign.assignNode(getCollectionState(collectionName)); + ArrayList nodeList + = new ArrayList<>(cluster.getSolrClient().getZkStateReader().getClusterState().getLiveNodes()); Collections.shuffle(nodeList, random()); - CollectionAdminRequest.AddReplica addReplica = new CollectionAdminRequest.AddReplica() - .setCollectionName(collectionName) - .setShardName("shard1") - .setNode(nodeList.get(0)); - CollectionAdminResponse response = addReplica.process(cloudClient); + final String node = nodeList.get(0); + + CollectionAdminResponse response = CollectionAdminRequest.addReplicaToShard(collectionName, "shard1") + .setNode(node) + .process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); assertTrue(response.isSuccess()); - TimeOut timeout = new TimeOut(3, TimeUnit.SECONDS); - Replica newReplica = null; - - while (! timeout.hasTimedOut() && newReplica == null) { - Slice slice = cloudClient.getZkStateReader().getClusterState().getSlice(collectionName, "shard1"); - newReplica = slice.getReplica(newReplicaName); - } - - assertNotNull(newReplica); - - assertEquals("Replica should be created on the right node", - cloudClient.getZkStateReader().getBaseUrlForNodeName(nodeList.get(0)), - newReplica.getStr(ZkStateReader.BASE_URL_PROP) - ); + waitForState("Expected to see replica " + newReplicaName + " on node " + node, collectionName, (n, c) -> { + Replica r = c.getSlice("shard1").getReplica(newReplicaName); + return r != null && r.getNodeName().equals(node); + }); // Test DELETEREPLICA - CollectionAdminRequest.DeleteReplica deleteReplicaRequest = new CollectionAdminRequest.DeleteReplica() - .setCollectionName(collectionName) - .setShardName("shard1") - .setReplica(newReplicaName); - response = deleteReplicaRequest.process(cloudClient); - + response = CollectionAdminRequest.deleteReplica(collectionName, "shard1", newReplicaName) + .process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); - timeout = new TimeOut(3, TimeUnit.SECONDS); + waitForState("Expected replica " + newReplicaName + " to vanish from cluster state", collectionName, + (n, c) -> c.getSlice("shard1").getReplica(newReplicaName) == null); - while (! timeout.hasTimedOut() && newReplica != null) { - Slice slice = cloudClient.getZkStateReader().getClusterState().getSlice(collectionName, "shard1"); - newReplica = slice.getReplica(newReplicaName); - } - - assertNull(newReplica); } - private void testClusterProp() throws InterruptedException, IOException, SolrServerException { - CollectionAdminRequest.ClusterProp clusterPropRequest = new CollectionAdminRequest.ClusterProp() - .setPropertyName(ZkStateReader.LEGACY_CLOUD) - .setPropertyValue("false"); - CollectionAdminResponse response = clusterPropRequest.process(cloudClient); + @Test + public void testClusterProp() throws InterruptedException, IOException, SolrServerException { + + CollectionAdminResponse response = CollectionAdminRequest.setClusterProperty(ZkStateReader.LEGACY_CLOUD, "false") + .process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); - TimeOut timeout = new TimeOut(3, TimeUnit.SECONDS); - boolean changed = false; - - while(! timeout.hasTimedOut()){ - Thread.sleep(10); - changed = Objects.equals("false", - cloudClient.getZkStateReader().getClusterProperty(ZkStateReader.LEGACY_CLOUD, "none")); - if(changed) break; - } - assertTrue("The Cluster property wasn't set", changed); + ClusterProperties props = new ClusterProperties(zkClient()); + assertEquals("Cluster property was not set", props.getClusterProperty(ZkStateReader.LEGACY_CLOUD, "true"), "false"); // Unset ClusterProp that we set. - clusterPropRequest = new CollectionAdminRequest.ClusterProp() - .setPropertyName(ZkStateReader.LEGACY_CLOUD) - .setPropertyValue(null); - clusterPropRequest.process(cloudClient); + CollectionAdminRequest.setClusterProperty(ZkStateReader.LEGACY_CLOUD, null).process(cluster.getSolrClient()); + assertEquals("Cluster property was not unset", props.getClusterProperty(ZkStateReader.LEGACY_CLOUD, "true"), "true"); - timeout = new TimeOut(3, TimeUnit.SECONDS); - changed = false; - while(! timeout.hasTimedOut()) { - Thread.sleep(10); - changed = (cloudClient.getZkStateReader().getClusterProperty(ZkStateReader.LEGACY_CLOUD, (String) null) == null); - if(changed) - break; - } - assertTrue("The Cluster property wasn't unset", changed); } - private void testAddAndRemoveRole() throws InterruptedException, IOException, SolrServerException { - cloudClient.setDefaultCollection(DEFAULT_COLLECTION); - Replica replica = cloudClient.getZkStateReader().getLeaderRetry(DEFAULT_COLLECTION, SHARD1); - CollectionAdminRequest.AddRole addRoleRequest = new CollectionAdminRequest.AddRole() - .setNode(replica.getNodeName()) - .setRole("overseer"); - addRoleRequest.process(cloudClient); + @Test + @SuppressWarnings("unchecked") + public void testAddAndRemoveRole() throws InterruptedException, IOException, SolrServerException { - CollectionAdminRequest.ClusterStatus clusterStatusRequest = new CollectionAdminRequest.ClusterStatus() - .setCollectionName(DEFAULT_COLLECTION); - CollectionAdminResponse response = clusterStatusRequest.process(cloudClient); + String node = cluster.getRandomJetty(random()).getNodeName(); + + CollectionAdminRequest.addRole(node, "overseer").process(cluster.getSolrClient()); + + CollectionAdminResponse response = CollectionAdminRequest.getClusterStatus().process(cluster.getSolrClient()); NamedList rsp = response.getResponse(); - NamedList cluster = (NamedList) rsp.get("cluster"); - assertNotNull("Cluster state should not be null", cluster); - Map roles = (Map) cluster.get("roles"); + NamedList cs = (NamedList) rsp.get("cluster"); + assertNotNull("Cluster state should not be null", cs); + Map roles = (Map) cs.get("roles"); assertNotNull("Role information should not be null", roles); List overseer = (List) roles.get("overseer"); assertNotNull(overseer); assertEquals(1, overseer.size()); - assertTrue(overseer.contains(replica.getNodeName())); + assertTrue(overseer.contains(node)); // Remove role - new CollectionAdminRequest.RemoveRole() - .setNode(replica.getNodeName()) - .setRole("overseer") - .process(cloudClient); - - clusterStatusRequest = new CollectionAdminRequest.ClusterStatus(); - clusterStatusRequest.setCollectionName(DEFAULT_COLLECTION); - response = clusterStatusRequest.process(cloudClient); + CollectionAdminRequest.removeRole(node, "overseer").process(cluster.getSolrClient()); + response = CollectionAdminRequest.getClusterStatus().process(cluster.getSolrClient()); rsp = response.getResponse(); - cluster = (NamedList) rsp.get("cluster"); - assertNotNull("Cluster state should not be null", cluster); - roles = (Map) cluster.get("roles"); + cs = (NamedList) rsp.get("cluster"); + assertNotNull("Cluster state should not be null", cs); + roles = (Map) cs.get("roles"); assertNotNull("Role information should not be null", roles); overseer = (List) roles.get("overseer"); - assertFalse(overseer.contains(replica.getNodeName())); + assertFalse(overseer.contains(node)); } - - private void testOverseerStatus() throws IOException, SolrServerException { - CollectionAdminResponse response = new CollectionAdminRequest.OverseerStatus().process(cloudClient); + + @Test + public void testOverseerStatus() throws IOException, SolrServerException { + CollectionAdminResponse response = new CollectionAdminRequest.OverseerStatus().process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); assertNotNull("overseer_operations shouldn't be null", response.getResponse().get("overseer_operations")); } - - private void testList() throws IOException, SolrServerException { - CollectionAdminResponse response = new CollectionAdminRequest.List().process(cloudClient); + + @Test + public void testList() throws IOException, SolrServerException { + CollectionAdminResponse response = new CollectionAdminRequest.List().process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); assertNotNull("collection list should not be null", response.getResponse().get("collections")); } - - private void testAddAndDeleteReplicaProp() throws InterruptedException, IOException, SolrServerException { - Replica replica = cloudClient.getZkStateReader().getLeaderRetry(DEFAULT_COLLECTION, SHARD1); - CollectionAdminResponse response = new CollectionAdminRequest.AddReplicaProp() - .setCollectionName(DEFAULT_COLLECTION) - .setShardName(SHARD1) - .setReplica(replica.getName()) - .setPropertyName("preferredleader") - .setPropertyValue("true").process(cloudClient); + + @Test + public void testAddAndDeleteReplicaProp() throws InterruptedException, IOException, SolrServerException { + + final String collection = "replicaProperties"; + CollectionAdminRequest.createCollection(collection, "conf", 2, 2) + .process(cluster.getSolrClient()); + + final Replica replica = getCollectionState(collection).getLeader("shard1"); + CollectionAdminResponse response + = CollectionAdminRequest.addReplicaProperty(collection, "shard1", replica.getName(), "preferredleader", "true") + .process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); - TimeOut timeout = new TimeOut(20, TimeUnit.SECONDS); - String propertyValue = null; - - String replicaName = replica.getName(); - while (! timeout.hasTimedOut()) { - ClusterState clusterState = cloudClient.getZkStateReader().getClusterState(); - replica = clusterState.getReplica(DEFAULT_COLLECTION, replicaName); - propertyValue = replica.getStr("property.preferredleader"); - if(StringUtils.equals("true", propertyValue)) - break; - Thread.sleep(50); - } - - assertEquals("Replica property was not updated, Latest value: " + - cloudClient.getZkStateReader().getClusterState().getReplica(DEFAULT_COLLECTION, replicaName), - "true", - propertyValue); + waitForState("Expecting property 'preferredleader' to appear on replica " + replica.getName(), collection, + (n, c) -> "true".equals(c.getReplica(replica.getName()).getStr("property.preferredleader"))); - response = new CollectionAdminRequest.DeleteReplicaProp() - .setCollectionName(DEFAULT_COLLECTION) - .setShardName(SHARD1) - .setReplica(replicaName) - .setPropertyName("property.preferredleader").process(cloudClient); + response = CollectionAdminRequest.deleteReplicaProperty(collection, "shard1", replica.getName(), "property.preferredleader") + .process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); - timeout = new TimeOut(20, TimeUnit.SECONDS); - boolean updated = false; - - while (! timeout.hasTimedOut()) { - ClusterState clusterState = cloudClient.getZkStateReader().getClusterState(); - replica = clusterState.getReplica(DEFAULT_COLLECTION, replicaName); - updated = replica.getStr("property.preferredleader") == null; - if(updated) - break; - Thread.sleep(50); - } - - assertTrue("Replica property was not removed", updated); + waitForState("Expecting property 'preferredleader' to be removed from replica " + replica.getName(), collection, + (n, c) -> c.getReplica(replica.getName()).getStr("property.preferredleader") == null); } - - private void testBalanceShardUnique() throws IOException, + + @Test + public void testBalanceShardUnique() throws IOException, SolrServerException, KeeperException, InterruptedException { - CollectionAdminResponse response = new CollectionAdminRequest.BalanceShardUnique() - .setCollection(DEFAULT_COLLECTION) - .setPropertyName("preferredLeader").process(cloudClient); + + final String collection = "balancedProperties"; + CollectionAdminRequest.createCollection(collection, "conf", 2, 2) + .process(cluster.getSolrClient()); + + CollectionAdminResponse response = CollectionAdminRequest.balanceReplicaProperty(collection, "preferredLeader") + .process(cluster.getSolrClient()); assertEquals(0, response.getStatus()); - verifyUniqueAcrossCollection(cloudClient, DEFAULT_COLLECTION, "property.preferredleader"); + waitForState("Expecting 'preferredleader' property to be balanced across all shards", collection, (n, c) -> { + for (Slice slice : c) { + int count = 0; + for (Replica replica : slice) { + if ("true".equals(replica.getStr("property.preferredleader"))) + count += 1; + } + if (count != 1) + return false; + } + return true; + }); + } } diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteLastCustomShardedReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/DeleteLastCustomShardedReplicaTest.java index dcc99a40a09..c46362e84fb 100644 --- a/solr/core/src/test/org/apache/solr/cloud/DeleteLastCustomShardedReplicaTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/DeleteLastCustomShardedReplicaTest.java @@ -16,106 +16,40 @@ */ package org.apache.solr.cloud; -import org.apache.solr.client.solrj.SolrRequest; -import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.impl.CloudSolrClient; -import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.common.cloud.DocCollection; -import org.apache.solr.common.cloud.ImplicitDocRouter; import org.apache.solr.common.cloud.Replica; -import org.apache.solr.common.cloud.Slice; -import org.apache.solr.common.cloud.ZkStateReader; -import org.apache.solr.common.params.MapSolrParams; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.Utils; -import org.apache.solr.util.TimeOut; +import org.junit.BeforeClass; import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.lang.invoke.MethodHandles; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.TimeUnit; +public class DeleteLastCustomShardedReplicaTest extends SolrCloudTestCase { -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.NUM_SLICES; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.SHARDS_PROP; -import static org.apache.solr.common.util.Utils.makeMap; -import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETEREPLICA; - -public class DeleteLastCustomShardedReplicaTest extends AbstractFullDistribZkTestBase { - - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - protected String getSolrXml() { - return "solr.xml"; - } - - public DeleteLastCustomShardedReplicaTest() { - sliceCount = 2; + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(2) + .addConfig("conf", configset("cloud-minimal")) + .configure(); } @Test - @ShardsFixed(num = 2) public void test() throws Exception { - try (CloudSolrClient client = createCloudClient(null)) { - int replicationFactor = 1; - int maxShardsPerNode = 5; - Map props = Utils.makeMap( - "router.name", ImplicitDocRouter.NAME, - ZkStateReader.REPLICATION_FACTOR, replicationFactor, - ZkStateReader.MAX_SHARDS_PER_NODE, maxShardsPerNode, - NUM_SLICES, 1, - SHARDS_PROP, "a,b"); + final String collectionName = "customcollreplicadeletion"; - Map> collectionInfos = new HashMap<>(); + CollectionAdminRequest.createCollectionWithImplicitRouter(collectionName, "conf", "a,b", 1) + .setMaxShardsPerNode(5) + .process(cluster.getSolrClient()); - String collectionName = "customcollreplicadeletion"; + DocCollection collectionState = getCollectionState(collectionName); + Replica replica = getRandomReplica(collectionState.getSlice("a")); - createCollection(collectionInfos, collectionName, props, client); + CollectionAdminRequest.deleteReplica(collectionName, "a", replica.getName()) + .process(cluster.getSolrClient()); - waitForRecoveriesToFinish(collectionName, false); + waitForState("Expected shard 'a' to have no replicas", collectionName, (n, c) -> { + return c.getSlice("a") == null || c.getSlice("a").getReplicas().size() == 0; + }); - DocCollection testcoll = getCommonCloudSolrClient().getZkStateReader() - .getClusterState().getCollection(collectionName); - Replica replica = testcoll.getSlice("a").getReplicas().iterator().next(); - - removeAndWaitForReplicaGone(client, collectionName, replica, "a", replicationFactor-1); - } - } - - protected void removeAndWaitForReplicaGone(CloudSolrClient client, String COLL_NAME, Replica replica, String shard, - final int expectedNumReplicasRemaining) - throws SolrServerException, IOException, InterruptedException { - Map m = makeMap("collection", COLL_NAME, "action", DELETEREPLICA.toLower(), "shard", - shard, "replica", replica.getName()); - SolrParams params = new MapSolrParams(m); - SolrRequest request = new QueryRequest(params); - request.setPath("/admin/collections"); - client.request(request); - TimeOut timeout = new TimeOut(3, TimeUnit.SECONDS); - boolean success = false; - DocCollection testcoll = null; - while (! timeout.hasTimedOut()) { - testcoll = getCommonCloudSolrClient().getZkStateReader() - .getClusterState().getCollection(COLL_NAME); - // As of SOLR-5209 the last replica deletion no longer leads to - // the deletion of the slice. - final Slice slice = testcoll.getSlice(shard); - final int actualNumReplicasRemaining = (slice == null ? 0 : slice.getReplicas().size()); - success = (actualNumReplicasRemaining == expectedNumReplicasRemaining); - if (success) { - log.info("replica cleaned up {}/{} core {}", - shard + "/" + replica.getName(), replica.getStr("core")); - log.info("current state {}", testcoll); - break; - } - Thread.sleep(100); - } - assertTrue("Replica not cleaned up", success); } } diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java b/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java index 90973636c2f..ed3d03b3894 100644 --- a/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java @@ -20,202 +20,123 @@ import java.io.IOException; import java.util.HashMap; import java.util.Map; -import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; -import org.apache.solr.client.solrj.request.CoreAdminRequest; -import org.apache.solr.client.solrj.request.QueryRequest; -import org.apache.solr.client.solrj.response.CollectionAdminResponse; -import org.apache.solr.client.solrj.response.CoreAdminResponse; +import org.apache.solr.client.solrj.request.CoreStatus; import org.apache.solr.cloud.overseer.OverseerAction; -import org.apache.solr.common.SolrException; -import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.Slice.State; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkStateReader; -import org.apache.solr.common.params.CollectionParams; -import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.Utils; import org.apache.solr.util.FileUtils; import org.apache.zookeeper.KeeperException; +import org.junit.BeforeClass; import org.junit.Test; -public class DeleteShardTest extends AbstractFullDistribZkTestBase { - - public DeleteShardTest() { - super(); - sliceCount = 2; - } +public class DeleteShardTest extends SolrCloudTestCase { // TODO: Custom hash slice deletion test + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(2) + .addConfig("conf", configset("cloud-minimal")) + .configure(); + } + @Test - @ShardsFixed(num = 2) public void test() throws Exception { - ClusterState clusterState = cloudClient.getZkStateReader().getClusterState(); - Slice slice1 = clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1); - Slice slice2 = clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD2); + final String collection = "deleteShard"; - assertNotNull("Shard1 not found", slice1); - assertNotNull("Shard2 not found", slice2); - assertSame("Shard1 is not active", Slice.State.ACTIVE, slice1.getState()); - assertSame("Shard2 is not active", Slice.State.ACTIVE, slice2.getState()); + CollectionAdminRequest.createCollection(collection, "conf", 2, 1) + .process(cluster.getSolrClient()); - try { - deleteShard(SHARD1); - fail("Deleting an active shard should not have succeeded"); - } catch (HttpSolrClient.RemoteSolrException e) { - // expected - } + DocCollection state = getCollectionState(collection); + assertEquals(State.ACTIVE, state.getSlice("shard1").getState()); + assertEquals(State.ACTIVE, state.getSlice("shard2").getState()); - setSliceState(SHARD1, Slice.State.INACTIVE); + // Can't delete an ACTIVE shard + expectThrows(Exception.class, () -> { + CollectionAdminRequest.deleteShard(collection, "shard1").process(cluster.getSolrClient()); + }); - clusterState = cloudClient.getZkStateReader().getClusterState(); + setSliceState(collection, "shard1", Slice.State.INACTIVE); - slice1 = clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1); + // Can delete an INATIVE shard + CollectionAdminRequest.deleteShard(collection, "shard1").process(cluster.getSolrClient()); + waitForState("Expected 'shard1' to be removed", collection, (n, c) -> { + return c.getSlice("shard1") == null; + }); - assertSame("Shard1 is not inactive yet.", Slice.State.INACTIVE, slice1.getState()); + // Can delete a shard under construction + setSliceState(collection, "shard2", Slice.State.CONSTRUCTION); + CollectionAdminRequest.deleteShard(collection, "shard2").process(cluster.getSolrClient()); + waitForState("Expected 'shard2' to be removed", collection, (n, c) -> { + return c.getSlice("shard2") == null; + }); - deleteShard(SHARD1); - - confirmShardDeletion(SHARD1); - - setSliceState(SHARD2, Slice.State.CONSTRUCTION); - deleteShard(SHARD2); - confirmShardDeletion(SHARD2); } - protected void confirmShardDeletion(String shard) throws SolrServerException, KeeperException, - InterruptedException { - ZkStateReader zkStateReader = cloudClient.getZkStateReader(); - ClusterState clusterState = zkStateReader.getClusterState(); - int counter = 10; - while (counter-- > 0) { - clusterState = zkStateReader.getClusterState(); - if (clusterState.getSlice("collection1", shard) == null) { - break; - } - Thread.sleep(1000); - } - - assertNull("Cluster still contains shard1 even after waiting for it to be deleted.", - clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1)); - } - - protected void deleteShard(String shard) throws SolrServerException, IOException, + protected void setSliceState(String collection, String slice, State state) throws SolrServerException, IOException, KeeperException, InterruptedException { - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("action", CollectionParams.CollectionAction.DELETESHARD.toString()); - params.set("collection", AbstractFullDistribZkTestBase.DEFAULT_COLLECTION); - params.set("shard", shard); - SolrRequest request = new QueryRequest(params); - request.setPath("/admin/collections"); + CloudSolrClient client = cluster.getSolrClient(); - String baseUrl = ((HttpSolrClient) shardToJetty.get(SHARD1).get(0).client.solrClient) - .getBaseURL(); - baseUrl = baseUrl.substring(0, baseUrl.length() - "collection1".length()); - - try (HttpSolrClient baseServer = getHttpSolrClient(baseUrl)) { - baseServer.setConnectionTimeout(15000); - baseServer.setSoTimeout(60000); - baseServer.request(request); - } - } - - protected void setSliceState(String slice, State state) throws SolrServerException, IOException, - KeeperException, InterruptedException { - DistributedQueue inQueue = Overseer.getStateUpdateQueue(cloudClient.getZkStateReader().getZkClient()); + // TODO can this be encapsulated better somewhere? + DistributedQueue inQueue = Overseer.getStateUpdateQueue(client.getZkStateReader().getZkClient()); Map propMap = new HashMap<>(); propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower()); propMap.put(slice, state.toString()); - propMap.put(ZkStateReader.COLLECTION_PROP, "collection1"); + propMap.put(ZkStateReader.COLLECTION_PROP, collection); ZkNodeProps m = new ZkNodeProps(propMap); - ZkStateReader zkStateReader = cloudClient.getZkStateReader(); inQueue.offer(Utils.toJSON(m)); - boolean transition = false; - for (int counter = 10; counter > 0; counter--) { - ClusterState clusterState = zkStateReader.getClusterState(); - State sliceState = clusterState.getSlice("collection1", slice).getState(); - if (sliceState == state) { - transition = true; - break; - } - Thread.sleep(1000); - } + waitForState("Expected shard " + slice + " to be in state " + state.toString(), collection, (n, c) -> { + return c.getSlice(slice).getState() == state; + }); - if (!transition) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not set shard [" + slice + "] as " + state); - } } @Test public void testDirectoryCleanupAfterDeleteShard() throws InterruptedException, IOException, SolrServerException { - CollectionAdminResponse rsp = new CollectionAdminRequest.Create() - .setCollectionName("deleteshard_test") - .setRouterName("implicit") - .setShards("a,b,c") - .setReplicationFactor(1) - .setConfigName("conf1") - .process(cloudClient); + + final String collection = "deleteshard_test"; + CollectionAdminRequest.createCollectionWithImplicitRouter(collection, "conf", "a,b,c", 1) + .setMaxShardsPerNode(2) + .process(cluster.getSolrClient()); // Get replica details - Replica leader = cloudClient.getZkStateReader().getLeaderRetry("deleteshard_test", "a"); - String baseUrl = (String) leader.get("base_url"); - String core = (String) leader.get("core"); + Replica leader = getCollectionState(collection).getLeader("a"); - String instanceDir; - String dataDir; + CoreStatus coreStatus = getCoreStatus(leader); + assertTrue("Instance directory doesn't exist", FileUtils.fileExists(coreStatus.getInstanceDirectory())); + assertTrue("Data directory doesn't exist", FileUtils.fileExists(coreStatus.getDataDirectory())); - try (HttpSolrClient client = getHttpSolrClient(baseUrl)) { - CoreAdminResponse statusResp = CoreAdminRequest.getStatus(core, client); - NamedList r = statusResp.getCoreStatus().get(core); - instanceDir = (String) r.findRecursive("instanceDir"); - dataDir = (String) r.get("dataDir"); - } - - assertTrue("Instance directory doesn't exist", FileUtils.fileExists(instanceDir)); - assertTrue("Data directory doesn't exist", FileUtils.fileExists(dataDir)); - - assertEquals(3, cloudClient.getZkStateReader().getClusterState().getActiveSlices("deleteshard_test").size()); + assertEquals(3, getCollectionState(collection).getActiveSlices().size()); // Delete shard 'a' - new CollectionAdminRequest.DeleteShard() - .setCollectionName("deleteshard_test") - .setShardName("a") - .process(cloudClient); + CollectionAdminRequest.deleteShard(collection, "a").process(cluster.getSolrClient()); - assertEquals(2, cloudClient.getZkStateReader().getClusterState().getActiveSlices("deleteshard_test").size()); - assertFalse("Instance directory still exists", FileUtils.fileExists(instanceDir)); - assertFalse("Data directory still exists", FileUtils.fileExists(dataDir)); + assertEquals(2, getCollectionState(collection).getActiveSlices().size()); + assertFalse("Instance directory still exists", FileUtils.fileExists(coreStatus.getInstanceDirectory())); + assertFalse("Data directory still exists", FileUtils.fileExists(coreStatus.getDataDirectory())); - leader = cloudClient.getZkStateReader().getLeaderRetry("deleteshard_test", "b"); - baseUrl = (String) leader.get("base_url"); - core = (String) leader.get("core"); - - try (HttpSolrClient client = getHttpSolrClient(baseUrl)) { - CoreAdminResponse statusResp = CoreAdminRequest.getStatus(core, client); - NamedList r = statusResp.getCoreStatus().get(core); - instanceDir = (String) r.findRecursive("instanceDir"); - dataDir = (String) r.get("dataDir"); - } + leader = getCollectionState(collection).getLeader("b"); + coreStatus = getCoreStatus(leader); // Delete shard 'b' - new CollectionAdminRequest.DeleteShard() - .setCollectionName("deleteshard_test") - .setShardName("b") + CollectionAdminRequest.deleteShard(collection, "b") .setDeleteDataDir(false) .setDeleteInstanceDir(false) - .process(cloudClient); + .process(cluster.getSolrClient()); - assertEquals(1, cloudClient.getZkStateReader().getClusterState().getActiveSlices("deleteshard_test").size()); - assertTrue("Instance directory still exists", FileUtils.fileExists(instanceDir)); - assertTrue("Data directory still exists", FileUtils.fileExists(dataDir)); + assertEquals(1, getCollectionState(collection).getActiveSlices().size()); + assertTrue("Instance directory still exists", FileUtils.fileExists(coreStatus.getInstanceDirectory())); + assertTrue("Data directory still exists", FileUtils.fileExists(coreStatus.getDataDirectory())); } } diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerModifyCollectionTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerModifyCollectionTest.java index e902ab4cbd1..c9a90a56b60 100644 --- a/solr/core/src/test/org/apache/solr/cloud/OverseerModifyCollectionTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerModifyCollectionTest.java @@ -17,78 +17,62 @@ package org.apache.solr.cloud; -import static org.apache.solr.client.solrj.SolrRequest.METHOD.POST; -import static org.apache.solr.common.params.CommonParams.COLLECTIONS_HANDLER_PATH; - -import java.lang.invoke.MethodHandles; import java.util.Map; -import org.apache.solr.client.solrj.SolrClient; -import org.apache.solr.client.solrj.impl.HttpSolrClient; -import org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException; import org.apache.solr.client.solrj.request.CollectionAdminRequest; -import org.apache.solr.client.solrj.request.ConfigSetAdminRequest; import org.apache.solr.client.solrj.request.GenericSolrRequest; -import org.apache.solr.client.solrj.response.CollectionAdminResponse; -import org.apache.solr.client.solrj.response.ConfigSetAdminResponse; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.Utils; import org.apache.zookeeper.KeeperException; +import org.junit.BeforeClass; import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -public class OverseerModifyCollectionTest extends AbstractFullDistribZkTestBase { - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - +import static org.apache.solr.client.solrj.SolrRequest.METHOD.POST; +import static org.apache.solr.common.params.CommonParams.COLLECTIONS_HANDLER_PATH; + +public class OverseerModifyCollectionTest extends SolrCloudTestCase { + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(2) + .addConfig("conf1", configset("cloud-minimal")) + .addConfig("conf2", configset("cloud-minimal")) + .configure(); + } + @Test public void testModifyColl() throws Exception { - String collName = "modifyColl"; - String newConfName = "conf" + random().nextInt(); - String oldConfName = "conf1"; - try (SolrClient client = createNewSolrClient("", getBaseUrl((HttpSolrClient) clients.get(0)))) { - CollectionAdminResponse rsp; - CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collName, oldConfName, 1, 2); - rsp = create.process(client); - assertEquals(0, rsp.getStatus()); - assertTrue(rsp.isSuccess()); - - ConfigSetAdminRequest.Create createConfig = new ConfigSetAdminRequest.Create() - .setBaseConfigSetName(oldConfName) - .setConfigSetName(newConfName); - - ConfigSetAdminResponse configRsp = createConfig.process(client); - - assertEquals(0, configRsp.getStatus()); - - ModifiableSolrParams p = new ModifiableSolrParams(); - p.add("collection", collName); - p.add("action", "MODIFYCOLLECTION"); - p.add("collection.configName", newConfName); - client.request(new GenericSolrRequest(POST, COLLECTIONS_HANDLER_PATH, p)); - } - - assertEquals(newConfName, getConfigNameFromZk(collName)); + + final String collName = "modifyColl"; + + CollectionAdminRequest.createCollection(collName, "conf1", 1, 2) + .process(cluster.getSolrClient()); + + // TODO create a modifyCollection() method on CollectionAdminRequest + ModifiableSolrParams p1 = new ModifiableSolrParams(); + p1.add("collection", collName); + p1.add("action", "MODIFYCOLLECTION"); + p1.add("collection.configName", "conf2"); + cluster.getSolrClient().request(new GenericSolrRequest(POST, COLLECTIONS_HANDLER_PATH, p1)); + + assertEquals("conf2", getConfigNameFromZk(collName)); //Try an invalid config name - try (SolrClient client = createNewSolrClient("", getBaseUrl((HttpSolrClient) clients.get(0)))) { - ModifiableSolrParams p = new ModifiableSolrParams(); - p.add("collection", collName); - p.add("action", "MODIFYCOLLECTION"); - p.add("collection.configName", "notARealConfigName"); - try{ - client.request(new GenericSolrRequest(POST, COLLECTIONS_HANDLER_PATH, p)); - fail("Exception should be thrown"); - } catch(RemoteSolrException e) { - assertTrue(e.getMessage(), e.getMessage().contains("Can not find the specified config set")); - } - } + ModifiableSolrParams p2 = new ModifiableSolrParams(); + p2.add("collection", collName); + p2.add("action", "MODIFYCOLLECTION"); + p2.add("collection.configName", "notARealConfigName"); + Exception e = expectThrows(Exception.class, () -> { + cluster.getSolrClient().request(new GenericSolrRequest(POST, COLLECTIONS_HANDLER_PATH, p2)); + }); + + assertTrue(e.getMessage(), e.getMessage().contains("Can not find the specified config set")); } private String getConfigNameFromZk(String collName) throws KeeperException, InterruptedException { - byte[] b = cloudClient.getZkStateReader().getZkClient().getData(ZkStateReader.getCollectionPathRoot(collName), null, null, false); + byte[] b = zkClient().getData(ZkStateReader.getCollectionPathRoot(collName), null, null, false); Map confData = (Map) Utils.fromJSON(b); return (String) confData.get(ZkController.CONFIGNAME_PROP); } diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java index dec54d97615..762bbeb0f27 100644 --- a/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java @@ -16,118 +16,95 @@ */ package org.apache.solr.cloud; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.solr.SolrTestCaseJ4.SuppressSSL; -import org.apache.solr.client.solrj.SolrRequest; +import java.lang.invoke.MethodHandles; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + import org.apache.solr.client.solrj.embedded.JettySolrRunner; -import org.apache.solr.client.solrj.impl.CloudSolrClient; -import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.cloud.overseer.OverseerAction; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.ZkNodeProps; -import org.apache.solr.common.params.CollectionParams.CollectionAction; -import org.apache.solr.common.params.MapSolrParams; -import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.Utils; import org.apache.solr.util.TimeOut; import org.apache.zookeeper.data.Stat; +import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.lang.invoke.MethodHandles; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.concurrent.TimeUnit; - import static org.apache.solr.cloud.OverseerCollectionConfigSetProcessor.getLeaderNode; import static org.apache.solr.cloud.OverseerCollectionConfigSetProcessor.getSortedOverseerNodeNames; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.NUM_SLICES; -import static org.apache.solr.common.util.Utils.makeMap; -import static org.apache.solr.common.cloud.ZkStateReader.MAX_SHARDS_PER_NODE; -import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR; +import static org.hamcrest.CoreMatchers.not; -@LuceneTestCase.Slow -@SuppressSSL(bugUrl = "SOLR-5776") -public class OverseerRolesTest extends AbstractFullDistribZkTestBase{ +public class OverseerRolesTest extends SolrCloudTestCase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - protected String getSolrXml() { - return "solr.xml"; + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(4) + .addConfig("conf", configset("cloud-minimal")) + .configure(); } - public OverseerRolesTest() { - sliceCount = 2; - fixShardCount(TEST_NIGHTLY ? 6 : 2); + @Before + public void clearAllOverseerRoles() throws Exception { + for (String node : OverseerCollectionConfigSetProcessor.getSortedOverseerNodeNames(zkClient())) { + CollectionAdminRequest.removeRole(node, "overseer").process(cluster.getSolrClient()); + } } @Test - public void test() throws Exception { - try (CloudSolrClient client = createCloudClient(null)) { - testQuitCommand(client); - testOverseerRole(client); - } - } + public void testQuitCommand() throws Exception { - private void testQuitCommand(CloudSolrClient client) throws Exception{ - String collectionName = "testOverseerQuit"; - - createCollection(collectionName, client); - - waitForRecoveriesToFinish(collectionName, false); - - SolrZkClient zk = client.getZkStateReader().getZkClient(); - byte[] data = new byte[0]; - data = zk.getData("/overseer_elect/leader", null, new Stat(), true); + SolrZkClient zk = zkClient(); + byte[] data = zk.getData("/overseer_elect/leader", null, new Stat(), true); Map m = (Map) Utils.fromJSON(data); String s = (String) m.get("id"); String leader = LeaderElector.getNodeName(s); - Overseer.getStateUpdateQueue(zk).offer(Utils.toJSON(new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.QUIT.toLower()))); + log.info("Current overseer: {}", leader); + Overseer.getStateUpdateQueue(zk) + .offer(Utils.toJSON(new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.QUIT.toLower(), + "id", s))); final TimeOut timeout = new TimeOut(10, TimeUnit.SECONDS); - String newLeader=null; + String newLeader = null; for(;! timeout.hasTimedOut();){ newLeader = OverseerCollectionConfigSetProcessor.getLeaderNode(zk); - if(newLeader!=null && !newLeader.equals(leader)) break; + if (newLeader != null && !newLeader.equals(leader)) + break; Thread.sleep(100); } - assertNotSame( "Leader not changed yet",newLeader,leader); + assertThat("Leader not changed yet", newLeader, not(leader)); - - - assertTrue("The old leader should have rejoined election ", OverseerCollectionConfigSetProcessor.getSortedOverseerNodeNames(zk).contains(leader)); + assertTrue("The old leader should have rejoined election", + OverseerCollectionConfigSetProcessor.getSortedOverseerNodeNames(zk).contains(leader)); } + @Test + public void testOverseerRole() throws Exception { - - - private void testOverseerRole(CloudSolrClient client) throws Exception { - String collectionName = "testOverseerCol"; - - createCollection(collectionName, client); - - waitForRecoveriesToFinish(collectionName, false); - List l = OverseerCollectionConfigSetProcessor.getSortedOverseerNodeNames(client.getZkStateReader().getZkClient()) ; + List l = OverseerCollectionConfigSetProcessor.getSortedOverseerNodeNames(zkClient()) ; log.info("All nodes {}", l); - String currentLeader = OverseerCollectionConfigSetProcessor.getLeaderNode(client.getZkStateReader().getZkClient()); + String currentLeader = OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient()); log.info("Current leader {} ", currentLeader); l.remove(currentLeader); Collections.shuffle(l, random()); String overseerDesignate = l.get(0); - log.info("overseerDesignate {}",overseerDesignate); - setOverseerRole(client, CollectionAction.ADDROLE,overseerDesignate); + log.info("overseerDesignate {}", overseerDesignate); + + CollectionAdminRequest.addRole(overseerDesignate, "overseer").process(cluster.getSolrClient()); TimeOut timeout = new TimeOut(15, TimeUnit.SECONDS); boolean leaderchanged = false; - for(;!timeout.hasTimedOut();){ - if(overseerDesignate.equals(OverseerCollectionConfigSetProcessor.getLeaderNode(client.getZkStateReader().getZkClient()))){ + for (;!timeout.hasTimedOut();) { + if (overseerDesignate.equals(OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient()))) { log.info("overseer designate is the new overseer"); leaderchanged =true; break; @@ -136,36 +113,29 @@ public class OverseerRolesTest extends AbstractFullDistribZkTestBase{ } assertTrue("could not set the new overseer . expected "+ overseerDesignate + " current order : " + - getSortedOverseerNodeNames(client.getZkStateReader().getZkClient()) + - " ldr :"+ OverseerCollectionConfigSetProcessor.getLeaderNode(client.getZkStateReader().getZkClient()) ,leaderchanged); - - + getSortedOverseerNodeNames(zkClient()) + + " ldr :"+ OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient()) ,leaderchanged); //add another node as overseer - - l.remove(overseerDesignate); - Collections.shuffle(l, random()); String anotherOverseer = l.get(0); log.info("Adding another overseer designate {}", anotherOverseer); - setOverseerRole(client, CollectionAction.ADDROLE, anotherOverseer); + CollectionAdminRequest.addRole(anotherOverseer, "overseer").process(cluster.getSolrClient()); - String currentOverseer = getLeaderNode(client.getZkStateReader().getZkClient()); + String currentOverseer = getLeaderNode(zkClient()); log.info("Current Overseer {}", currentOverseer); - String hostPort = currentOverseer.substring(0,currentOverseer.indexOf('_')); + String hostPort = currentOverseer.substring(0, currentOverseer.indexOf('_')); StringBuilder sb = new StringBuilder(); -// -// log.info("hostPort : {}", hostPort); JettySolrRunner leaderJetty = null; - for (JettySolrRunner jetty : jettys) { + for (JettySolrRunner jetty : cluster.getJettySolrRunners()) { String s = jetty.getBaseUrl().toString(); log.info("jetTy {}",s); sb.append(s).append(" , "); @@ -178,49 +148,20 @@ public class OverseerRolesTest extends AbstractFullDistribZkTestBase{ assertNotNull("Could not find a jetty2 kill", leaderJetty); log.info("leader node {}", leaderJetty.getBaseUrl()); - log.info ("current election Queue", - OverseerCollectionConfigSetProcessor.getSortedElectionNodes(client.getZkStateReader().getZkClient(), - "/overseer_elect/election")); + log.info("current election Queue", + OverseerCollectionConfigSetProcessor.getSortedElectionNodes(zkClient(), "/overseer_elect/election")); ChaosMonkey.stop(leaderJetty); timeout = new TimeOut(10, TimeUnit.SECONDS); leaderchanged = false; for (; !timeout.hasTimedOut(); ) { - currentOverseer = getLeaderNode(client.getZkStateReader().getZkClient()); + currentOverseer = getLeaderNode(zkClient()); if (anotherOverseer.equals(currentOverseer)) { leaderchanged = true; break; } Thread.sleep(100); } - assertTrue("New overseer designate has not become the overseer, expected : " + anotherOverseer + "actual : " + getLeaderNode(client.getZkStateReader().getZkClient()), leaderchanged); + assertTrue("New overseer designate has not become the overseer, expected : " + anotherOverseer + "actual : " + getLeaderNode(zkClient()), leaderchanged); } - private void setOverseerRole(CloudSolrClient client, CollectionAction action, String overseerDesignate) throws Exception, IOException { - log.info("Adding overseer designate {} ", overseerDesignate); - Map m = makeMap( - "action", action.toString().toLowerCase(Locale.ROOT), - "role", "overseer", - "node", overseerDesignate); - SolrParams params = new MapSolrParams(m); - SolrRequest request = new QueryRequest(params); - request.setPath("/admin/collections"); - client.request(request); - } - - - protected void createCollection(String COLL_NAME, CloudSolrClient client) throws Exception { - int replicationFactor = 2; - int numShards = 4; - int maxShardsPerNode = ((((numShards+1) * replicationFactor) / getCommonCloudSolrClient() - .getZkStateReader().getClusterState().getLiveNodes().size())) + 1; - - Map props = makeMap( - REPLICATION_FACTOR, replicationFactor, - MAX_SHARDS_PER_NODE, maxShardsPerNode, - NUM_SLICES, numShards); - Map> collectionInfos = new HashMap<>(); - createCollection(collectionInfos, COLL_NAME, props, client); - } - - } diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerStatusTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerStatusTest.java index b1899da9b49..80fd38e1dc1 100644 --- a/solr/core/src/test/org/apache/solr/cloud/OverseerStatusTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerStatusTest.java @@ -17,74 +17,56 @@ package org.apache.solr.cloud; import org.apache.solr.client.solrj.request.CollectionAdminRequest; -import org.apache.solr.client.solrj.response.CollectionAdminResponse; import org.apache.solr.common.params.CollectionParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; +import org.junit.BeforeClass; import org.junit.Test; -public class OverseerStatusTest extends BasicDistributedZkTest { +public class OverseerStatusTest extends SolrCloudTestCase { - public OverseerStatusTest() { - schemaString = "schema15.xml"; // we need a string id - sliceCount = 1; + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(2) + .addConfig("conf", configset("cloud-minimal")) + .configure();; } @Test - @ShardsFixed(num = 1) public void test() throws Exception { - waitForThingsToLevelOut(15); - // find existing command counts because collection may be created by base test class too int numCollectionCreates = 0, numOverseerCreates = 0; - NamedList resp = new CollectionAdminRequest.OverseerStatus().process(cloudClient).getResponse(); - if (resp != null) { - NamedList collection_operations = (NamedList) resp.get("collection_operations"); - if (collection_operations != null) { - SimpleOrderedMap createcollection = (SimpleOrderedMap) collection_operations.get(CollectionParams.CollectionAction.CREATE.toLower()); - if (createcollection != null && createcollection.get("requests") != null) { - numCollectionCreates = (Integer) createcollection.get("requests"); - } - NamedList overseer_operations = (NamedList) resp.get("overseer_operations"); - if (overseer_operations != null) { - createcollection = (SimpleOrderedMap) overseer_operations.get(CollectionParams.CollectionAction.CREATE.toLower()); - if (createcollection != null && createcollection.get("requests") != null) { - numOverseerCreates = (Integer) createcollection.get("requests"); - } - } - } - } String collectionName = "overseer_status_test"; - CollectionAdminResponse response = createCollection(collectionName, 1, 1, 1); - resp = new CollectionAdminRequest.OverseerStatus().process(cloudClient).getResponse(); + CollectionAdminRequest.createCollection(collectionName, "conf", 1, 1).process(cluster.getSolrClient()); + + NamedList resp = new CollectionAdminRequest.OverseerStatus().process(cluster.getSolrClient()).getResponse(); NamedList collection_operations = (NamedList) resp.get("collection_operations"); NamedList overseer_operations = (NamedList) resp.get("overseer_operations"); - SimpleOrderedMap createcollection = (SimpleOrderedMap) collection_operations.get(CollectionParams.CollectionAction.CREATE.toLower()); + SimpleOrderedMap createcollection + = (SimpleOrderedMap) collection_operations.get(CollectionParams.CollectionAction.CREATE.toLower()); assertEquals("No stats for create in OverseerCollectionProcessor", numCollectionCreates + 1, createcollection.get("requests")); createcollection = (SimpleOrderedMap) overseer_operations.get(CollectionParams.CollectionAction.CREATE.toLower()); assertEquals("No stats for create in Overseer", numOverseerCreates + 1, createcollection.get("requests")); // Reload the collection - new CollectionAdminRequest.Reload().setCollectionName(collectionName).process(cloudClient); + CollectionAdminRequest.reloadCollection(collectionName).process(cluster.getSolrClient()); - - resp = new CollectionAdminRequest.OverseerStatus().process(cloudClient).getResponse(); + resp = new CollectionAdminRequest.OverseerStatus().process(cluster.getSolrClient()).getResponse(); collection_operations = (NamedList) resp.get("collection_operations"); SimpleOrderedMap reload = (SimpleOrderedMap) collection_operations.get(CollectionParams.CollectionAction.RELOAD.toLower()); assertEquals("No stats for reload in OverseerCollectionProcessor", 1, reload.get("requests")); try { - new CollectionAdminRequest.SplitShard() - .setCollectionName("non_existent_collection") - .setShardName("non_existent_shard") - .process(cloudClient); + CollectionAdminRequest.splitShard("non_existent_collection") + .setShardName("non_existent_shard") + .process(cluster.getSolrClient()); fail("Split shard for non existent collection should have failed"); } catch (Exception e) { // expected because we did not correctly specify required params for split } - resp = new CollectionAdminRequest.OverseerStatus().process(cloudClient).getResponse(); + resp = new CollectionAdminRequest.OverseerStatus().process(cluster.getSolrClient()).getResponse(); collection_operations = (NamedList) resp.get("collection_operations"); SimpleOrderedMap split = (SimpleOrderedMap) collection_operations.get(CollectionParams.CollectionAction.SPLITSHARD.toLower()); assertEquals("No stats for split in OverseerCollectionProcessor", 1, split.get("errors")); @@ -111,6 +93,5 @@ public class OverseerStatusTest extends BasicDistributedZkTest { assertNotNull(updateState.get("errors")); assertNotNull(updateState.get("avgTimePerRequest")); - waitForThingsToLevelOut(15); } } diff --git a/solr/core/src/test/org/apache/solr/cloud/RemoteQueryErrorTest.java b/solr/core/src/test/org/apache/solr/cloud/RemoteQueryErrorTest.java index 24f9696b732..54503bfe1ea 100644 --- a/solr/core/src/test/org/apache/solr/cloud/RemoteQueryErrorTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/RemoteQueryErrorTest.java @@ -16,58 +16,43 @@ */ package org.apache.solr.cloud; -import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; +import org.junit.BeforeClass; import org.junit.Test; -import java.util.ArrayList; -import java.util.List; - import static org.junit.internal.matchers.StringContains.containsString; /** * Verify that remote (proxied) queries return proper error messages */ -@Slow -public class RemoteQueryErrorTest extends AbstractFullDistribZkTestBase { +public class RemoteQueryErrorTest extends SolrCloudTestCase { - public RemoteQueryErrorTest() { - super(); - sliceCount = 1; - fixShardCount(random().nextBoolean() ? 3 : 4); + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(3) + .addConfig("conf", configset("cloud-minimal")) + .configure(); } + // TODO add test for CloudSolrClient as well + @Test public void test() throws Exception { - handle.clear(); - handle.put("timestamp", SKIPVAL); - - waitForThingsToLevelOut(15); - del("*:*"); - - createCollection("collection2", 2, 1, 10); - - List numShardsNumReplicaList = new ArrayList<>(2); - numShardsNumReplicaList.add(2); - numShardsNumReplicaList.add(1); - checkForCollection("collection2", numShardsNumReplicaList, null); - waitForRecoveriesToFinish("collection2", true); + CollectionAdminRequest.createCollection("collection", "conf", 2, 1).process(cluster.getSolrClient()); - for (SolrClient solrClient : clients) { - try { - SolrInputDocument emptyDoc = new SolrInputDocument(); - solrClient.add(emptyDoc); - fail("Expected unique key exception"); - } catch (SolrException ex) { - assertThat(ex.getMessage(), containsString("Document is missing mandatory uniqueKey field: id")); - } catch(Exception ex) { - fail("Expected a SolrException to occur, instead received: " + ex.getClass()); - } finally { - solrClient.close(); + for (JettySolrRunner jetty : cluster.getJettySolrRunners()) { + try (SolrClient client = jetty.newClient()) { + SolrException e = expectThrows(SolrException.class, () -> { + client.add("collection", new SolrInputDocument()); + }); + assertThat(e.getMessage(), containsString("Document is missing mandatory uniqueKey field: id")); } } + } } diff --git a/solr/core/src/test/org/apache/solr/cloud/TestDownShardTolerantSearch.java b/solr/core/src/test/org/apache/solr/cloud/TestDownShardTolerantSearch.java index 01c444066f0..415d4e49b80 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestDownShardTolerantSearch.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestDownShardTolerantSearch.java @@ -16,13 +16,15 @@ */ package org.apache.solr.cloud; -import java.io.IOException; import java.lang.invoke.MethodHandles; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.params.ShardParams; +import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -34,41 +36,47 @@ import static org.hamcrest.CoreMatchers.is; * and also asserts that a meaningful exception is thrown when shards.tolerant=false * See SOLR-7566 */ -public class TestDownShardTolerantSearch extends AbstractFullDistribZkTestBase { +public class TestDownShardTolerantSearch extends SolrCloudTestCase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - public TestDownShardTolerantSearch() { - sliceCount = 2; + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(2) + .addConfig("conf", configset("cloud-minimal")) + .configure(); } @Test - @ShardsFixed(num = 2) public void searchingShouldFailWithoutTolerantSearchSetToTrue() throws Exception { - waitForRecoveriesToFinish(true); - indexAbunchOfDocs(); - commit(); - QueryResponse response = cloudClient.query(new SolrQuery("*:*").setRows(1)); + CollectionAdminRequest.createCollection("tolerant", "conf", 2, 1) + .process(cluster.getSolrClient()); + + UpdateRequest update = new UpdateRequest(); + for (int i = 0; i < 100; i++) { + update.add("id", Integer.toString(i)); + } + update.commit(cluster.getSolrClient(), "tolerant"); + + QueryResponse response = cluster.getSolrClient().query("tolerant", new SolrQuery("*:*").setRows(1)); assertThat(response.getStatus(), is(0)); - assertThat(response.getResults().getNumFound(), is(66L)); + assertThat(response.getResults().getNumFound(), is(100L)); - ChaosMonkey.kill(shardToJetty.get(SHARD1).get(0)); + cluster.stopJettySolrRunner(0); - response = cloudClient.query(new SolrQuery("*:*").setRows(1).setParam(ShardParams.SHARDS_TOLERANT, true)); + response = cluster.getSolrClient().query("tolerant", new SolrQuery("*:*").setRows(1).setParam(ShardParams.SHARDS_TOLERANT, true)); assertThat(response.getStatus(), is(0)); assertTrue(response.getResults().getNumFound() > 0); try { - cloudClient.query(new SolrQuery("*:*").setRows(1).setParam(ShardParams.SHARDS_TOLERANT, false)); + cluster.getSolrClient().query("tolerant", new SolrQuery("*:*").setRows(1).setParam(ShardParams.SHARDS_TOLERANT, false)); fail("Request should have failed because we killed shard1 jetty"); } catch (SolrServerException e) { log.info("error from server", e); assertNotNull(e.getCause()); assertTrue("Error message from server should have the name of the down shard", - e.getCause().getMessage().contains(SHARD1)); - } catch (IOException e) { - e.printStackTrace(); + e.getCause().getMessage().contains("shard")); } } } diff --git a/solr/core/src/test/org/apache/solr/cloud/TestExclusionRuleCollectionAccess.java b/solr/core/src/test/org/apache/solr/cloud/TestExclusionRuleCollectionAccess.java index 9ef2dcd40f9..5bf77c1a71d 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestExclusionRuleCollectionAccess.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestExclusionRuleCollectionAccess.java @@ -16,34 +16,32 @@ */ package org.apache.solr.cloud; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.UpdateRequest; +import org.junit.BeforeClass; import org.junit.Test; -@LuceneTestCase.Slow -public class TestExclusionRuleCollectionAccess extends AbstractFullDistribZkTestBase { +public class TestExclusionRuleCollectionAccess extends SolrCloudTestCase { - public TestExclusionRuleCollectionAccess() { - schemaString = "schema15.xml"; // we need a string id - sliceCount = 1; + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(1) + .addConfig("conf", configset("cloud-minimal")) + .configure(); } @Test public void doTest() throws Exception { - CollectionAdminRequest.Create req = new CollectionAdminRequest.Create(); - req.setCollectionName("css33"); - req.setNumShards(1); - req.process(cloudClient); - - waitForRecoveriesToFinish("css33", false); - - try (SolrClient c = createCloudClient("css33")) { - c.add(getDoc("id", "1")); - c.commit(); - assertEquals("Should have returned 1 result", 1, c.query(params("q", "*:*", "collection", "css33")).getResults().getNumFound()); - } + CollectionAdminRequest.createCollection("css33", "conf", 1, 1).process(cluster.getSolrClient()); + + new UpdateRequest() + .add("id", "1") + .commit(cluster.getSolrClient(), "css33"); + + assertEquals("Should have returned 1 result", 1, + cluster.getSolrClient().query("css33", params("q", "*:*", "collection", "css33")).getResults().getNumFound()); + } } diff --git a/solr/core/src/test/org/apache/solr/security/PKIAuthenticationIntegrationTest.java b/solr/core/src/test/org/apache/solr/security/PKIAuthenticationIntegrationTest.java index 30fe9338f72..bc4f4e5bd29 100644 --- a/solr/core/src/test/org/apache/solr/security/PKIAuthenticationIntegrationTest.java +++ b/solr/core/src/test/org/apache/solr/security/PKIAuthenticationIntegrationTest.java @@ -18,19 +18,21 @@ package org.apache.solr.security; import javax.servlet.ServletRequest; import javax.servlet.http.HttpServletRequest; - import java.lang.invoke.MethodHandles; import java.security.Principal; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Predicate; -import org.apache.solr.SolrTestCaseJ4; +import org.apache.http.client.HttpClient; import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.request.QueryRequest; -import org.apache.solr.cloud.AbstractFullDistribZkTestBase; +import org.apache.solr.cloud.SolrCloudTestCase; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.Utils; +import org.junit.After; +import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,27 +41,32 @@ import static java.util.Collections.singletonMap; import static org.apache.solr.common.util.Utils.makeMap; import static org.apache.solr.security.TestAuthorizationFramework.verifySecurityStatus; -@SolrTestCaseJ4.SuppressSSL -public class PKIAuthenticationIntegrationTest extends AbstractFullDistribZkTestBase { +public class PKIAuthenticationIntegrationTest extends SolrCloudTestCase { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - static final int TIMEOUT = 10000; + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(2) + .addConfig("conf", configset("cloud-minimal")) + .configure(); + } @Test public void testPkiAuth() throws Exception { - waitForThingsToLevelOut(10); + CollectionAdminRequest.createCollection("collection", "conf", 2, 1).process(cluster.getSolrClient()); + + // TODO make a SolrJ helper class for this byte[] bytes = Utils.toJSON(makeMap("authorization", singletonMap("class", MockAuthorizationPlugin.class.getName()), "authentication", singletonMap("class", MockAuthenticationPlugin.class.getName()))); + zkClient().setData(ZkStateReader.SOLR_SECURITY_CONF_PATH, bytes, true); - try (ZkStateReader zkStateReader = new ZkStateReader(zkServer.getZkAddress(), - TIMEOUT, TIMEOUT)) { - zkStateReader.getZkClient().setData(ZkStateReader.SOLR_SECURITY_CONF_PATH, bytes, true); - } - for (JettySolrRunner jetty : jettys) { + HttpClient httpClient = cluster.getSolrClient().getHttpClient(); + for (JettySolrRunner jetty : cluster.getJettySolrRunners()) { String baseUrl = jetty.getBaseUrl().toString(); - verifySecurityStatus(cloudClient.getLbClient().getHttpClient(), baseUrl + "/admin/authorization", "authorization/class", MockAuthorizationPlugin.class.getName(), 20); - verifySecurityStatus(cloudClient.getLbClient().getHttpClient(), baseUrl + "/admin/authentication", "authentication.enabled", "true", 20); + verifySecurityStatus(httpClient, baseUrl + "/admin/authorization", "authorization/class", MockAuthorizationPlugin.class.getName(), 20); + verifySecurityStatus(httpClient, baseUrl + "/admin/authentication", "authentication.enabled", "true", 20); } log.info("Starting test"); ModifiableSolrParams params = new ModifiableSolrParams(); @@ -95,13 +102,12 @@ public class PKIAuthenticationIntegrationTest extends AbstractFullDistribZkTestB } }; QueryRequest query = new QueryRequest(params); - query.process(cloudClient); + query.process(cluster.getSolrClient(), "collection"); assertTrue("all nodes must get the user solr , no:of nodes got solr : " + count.get(),count.get() > 2); } - @Override + @After public void distribTearDown() throws Exception { - super.distribTearDown(); MockAuthenticationPlugin.predicate = null; MockAuthorizationPlugin.predicate = null; } diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java index 0beaa55b644..94750c0acae 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java @@ -280,6 +280,8 @@ public abstract class CollectionAdminRequest public CollectionAdminRoleRequest(CollectionAction action, String node, String role) { super(action); + this.node = node; + this.role = role; } @Override From 87b6c2c8fcdc3a5f4adc3516f249af89b479d77a Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 28 Dec 2016 19:48:16 +0000 Subject: [PATCH 52/83] LUCENE-7607: FieldLeafComparator.setScorer() should throw IOException --- lucene/CHANGES.txt | 3 +++ .../apache/lucene/search/LeafFieldComparator.java | 2 +- .../lucene/search/SimpleFieldComparator.java | 2 +- .../solr/search/CollapsingQParserPlugin.java | 14 +++++++------- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 7a118f1d8c2..35314db9505 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -65,6 +65,9 @@ API Changes * LUCENE-7533: Classic query parser no longer allows autoGeneratePhraseQueries to be set to true when splitOnWhitespace is false (and vice-versa). +* LUCENE-7607: LeafFieldComparator.setScorer and SimpleFieldComparator.setScorer + are declared as throwing IOException (Alan Woodward) + New features * LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand) diff --git a/lucene/core/src/java/org/apache/lucene/search/LeafFieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/LeafFieldComparator.java index 60193768c39..677639cbf09 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LeafFieldComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/LeafFieldComparator.java @@ -114,6 +114,6 @@ public interface LeafFieldComparator { * * @param scorer Scorer instance that you should use to * obtain the current hit's score, if necessary. */ - void setScorer(Scorer scorer); + void setScorer(Scorer scorer) throws IOException; } diff --git a/lucene/core/src/java/org/apache/lucene/search/SimpleFieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/SimpleFieldComparator.java index 3f1e6c2a7a1..a258e2489bd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SimpleFieldComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/SimpleFieldComparator.java @@ -38,5 +38,5 @@ public abstract class SimpleFieldComparator extends FieldComparator implem } @Override - public void setScorer(Scorer scorer) {} + public void setScorer(Scorer scorer) throws IOException {} } diff --git a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java index f6bc18e1008..44aade5ac29 100644 --- a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java @@ -965,7 +965,7 @@ public class CollapsingQParserPlugin extends QParserPlugin { @Override public boolean needsScores() { return needsScores || super.needsScores(); } - public void setScorer(Scorer scorer) { + public void setScorer(Scorer scorer) throws IOException { this.collapseStrategy.setScorer(scorer); } @@ -1147,7 +1147,7 @@ public class CollapsingQParserPlugin extends QParserPlugin { @Override public boolean needsScores() { return needsScores || super.needsScores(); } - public void setScorer(Scorer scorer) { + public void setScorer(Scorer scorer) throws IOException { this.collapseStrategy.setScorer(scorer); } @@ -1523,7 +1523,7 @@ public class CollapsingQParserPlugin extends QParserPlugin { return collapsedSet; } - public void setScorer(Scorer scorer) { + public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } @@ -1952,7 +1952,7 @@ public class CollapsingQParserPlugin extends QParserPlugin { } @Override - public void setScorer(Scorer s) { + public void setScorer(Scorer s) throws IOException { super.setScorer(s); this.compareState.setScorer(s); } @@ -2100,7 +2100,7 @@ public class CollapsingQParserPlugin extends QParserPlugin { return collapsedSet; } - public void setScorer(Scorer scorer) { + public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } @@ -2522,7 +2522,7 @@ public class CollapsingQParserPlugin extends QParserPlugin { } @Override - public void setScorer(Scorer s) { + public void setScorer(Scorer s) throws IOException { super.setScorer(s); this.compareState.setScorer(s); } @@ -2673,7 +2673,7 @@ public class CollapsingQParserPlugin extends QParserPlugin { leafFieldComparators[clause] = fieldComparators[clause].getLeafComparator(context); } } - public void setScorer(Scorer s) { + public void setScorer(Scorer s) throws IOException { for (int clause = 0; clause < numClauses; clause++) { leafFieldComparators[clause].setScorer(s); } From 3f24fd81c836982be96b9b60082b53177fffe504 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 28 Dec 2016 20:10:47 +0000 Subject: [PATCH 53/83] LUCENE-5325: Add LongValuesSource and DoubleValuesSource in core --- lucene/CHANGES.txt | 5 + .../apache/lucene/search/DoubleValues.java | 38 +++ .../lucene/search/DoubleValuesSource.java | 313 ++++++++++++++++++ .../org/apache/lucene/search/LongValues.java | 38 +++ .../lucene/search/LongValuesSource.java | 217 ++++++++++++ .../lucene/search/TestDoubleValuesSource.java | 158 +++++++++ .../lucene/search/TestLongValuesSource.java | 140 ++++++++ 7 files changed, 909 insertions(+) create mode 100644 lucene/core/src/java/org/apache/lucene/search/DoubleValues.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/LongValues.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/LongValuesSource.java create mode 100644 lucene/core/src/test/org/apache/lucene/search/TestDoubleValuesSource.java create mode 100644 lucene/core/src/test/org/apache/lucene/search/TestLongValuesSource.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 35314db9505..12b615d6ac6 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -89,6 +89,11 @@ New features http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html for details. (Mike McCandless) +* LUCENE-5325: Added LongValuesSource and DoubleValuesSource, intended as + type-safe replacements for ValueSource in the queries module. These + expose per-segment LongValues or DoubleValues iterators, similar to the + existing DocValues iterator API. (Alan Woodward, Adrien Grand) + Bug Fixes * LUCENE-7547: JapaneseTokenizerFactory was failing to close the diff --git a/lucene/core/src/java/org/apache/lucene/search/DoubleValues.java b/lucene/core/src/java/org/apache/lucene/search/DoubleValues.java new file mode 100644 index 00000000000..4f12390ddf6 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/DoubleValues.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; + +/** + * Per-segment, per-document double values, which can be calculated at search-time + */ +public abstract class DoubleValues { + + /** + * Get the double value for the current document + */ + public abstract double doubleValue() throws IOException; + + /** + * Advance this instance to the given document id + * @return true if there is a value for this document + */ + public abstract boolean advanceExact(int doc) throws IOException; + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java new file mode 100644 index 00000000000..4ac8fc164ac --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java @@ -0,0 +1,313 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.Objects; +import java.util.function.DoubleToLongFunction; +import java.util.function.LongToDoubleFunction; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; + +/** + * Base class for producing {@link DoubleValues} + * + * To obtain a {@link DoubleValues} object for a leaf reader, clients should + * call {@link #getValues(LeafReaderContext, DoubleValues)}. + * + * DoubleValuesSource objects for NumericDocValues fields can be obtained by calling + * {@link #fromDoubleField(String)}, {@link #fromFloatField(String)}, {@link #fromIntField(String)} + * or {@link #fromLongField(String)}, or from {@link #fromField(String, LongToDoubleFunction)} if + * special long-to-double encoding is required. + * + * Scores may be used as a source for value calculations by wrapping a {@link Scorer} using + * {@link #fromScorer(Scorer)} and passing the resulting DoubleValues to {@link #getValues(LeafReaderContext, DoubleValues)}. + * The scores can then be accessed using the {@link #SCORES} DoubleValuesSource. + */ +public abstract class DoubleValuesSource { + + /** + * Returns a {@link DoubleValues} instance for the passed-in LeafReaderContext and scores + * + * If scores are not needed to calculate the values (ie {@link #needsScores() returns false}, callers + * may safely pass {@code null} for the {@code scores} parameter. + */ + public abstract DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException; + + /** + * Return true if document scores are needed to calculate values + */ + public abstract boolean needsScores(); + + /** + * Create a sort field based on the value of this producer + * @param reverse true if the sort should be decreasing + */ + public SortField getSortField(boolean reverse) { + return new DoubleValuesSortField(this, reverse); + } + + /** + * Convert to a LongValuesSource by casting the double values to longs + */ + public final LongValuesSource toLongValuesSource() { + return new LongValuesSource() { + @Override + public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { + DoubleValues in = DoubleValuesSource.this.getValues(ctx, scores); + return new LongValues() { + @Override + public long longValue() throws IOException { + return (long) in.doubleValue(); + } + + @Override + public boolean advanceExact(int doc) throws IOException { + return in.advanceExact(doc); + } + }; + } + + @Override + public boolean needsScores() { + return DoubleValuesSource.this.needsScores(); + } + }; + } + + /** + * Creates a DoubleValuesSource that wraps a generic NumericDocValues field + * + * @param field the field to wrap, must have NumericDocValues + * @param decoder a function to convert the long-valued doc values to doubles + */ + public static DoubleValuesSource fromField(String field, LongToDoubleFunction decoder) { + return new FieldValuesSource(field, decoder); + } + + /** + * Creates a DoubleValuesSource that wraps a double-valued field + */ + public static DoubleValuesSource fromDoubleField(String field) { + return fromField(field, Double::longBitsToDouble); + } + + /** + * Creates a DoubleValuesSource that wraps a float-valued field + */ + public static DoubleValuesSource fromFloatField(String field) { + return fromField(field, (v) -> (double)Float.intBitsToFloat((int)v)); + } + + /** + * Creates a DoubleValuesSource that wraps a long-valued field + */ + public static DoubleValuesSource fromLongField(String field) { + return fromField(field, (v) -> (double) v); + } + + /** + * Creates a DoubleValuesSource that wraps an int-valued field + */ + public static DoubleValuesSource fromIntField(String field) { + return fromLongField(field); + } + + /** + * A DoubleValuesSource that exposes a document's score + * + * If this source is used as part of a values calculation, then callers must not + * pass {@code null} as the {@link DoubleValues} parameter on {@link #getValues(LeafReaderContext, DoubleValues)} + */ + public static final DoubleValuesSource SCORES = new DoubleValuesSource() { + @Override + public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { + assert scores != null; + return scores; + } + + @Override + public boolean needsScores() { + return true; + } + }; + + /** + * Returns a DoubleValues instance that wraps scores returned by a Scorer + */ + public static DoubleValues fromScorer(Scorer scorer) { + return new DoubleValues() { + @Override + public double doubleValue() throws IOException { + return scorer.score(); + } + + @Override + public boolean advanceExact(int doc) throws IOException { + assert scorer.docID() == doc; + return true; + } + }; + } + + private static class FieldValuesSource extends DoubleValuesSource { + + final String field; + final LongToDoubleFunction decoder; + + private FieldValuesSource(String field, LongToDoubleFunction decoder) { + this.field = field; + this.decoder = decoder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FieldValuesSource that = (FieldValuesSource) o; + return Objects.equals(field, that.field) && + Objects.equals(decoder, that.decoder); + } + + @Override + public int hashCode() { + return Objects.hash(field, decoder); + } + + @Override + public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { + final NumericDocValues values = DocValues.getNumeric(ctx.reader(), field); + return toDoubleValues(values, decoder::applyAsDouble); + } + + @Override + public boolean needsScores() { + return false; + } + } + + private static class DoubleValuesSortField extends SortField { + + final DoubleValuesSource producer; + + public DoubleValuesSortField(DoubleValuesSource producer, boolean reverse) { + super(producer.toString(), new DoubleValuesComparatorSource(producer), reverse); + this.producer = producer; + } + + @Override + public boolean needsScores() { + return producer.needsScores(); + } + + @Override + public String toString() { + StringBuilder buffer = new StringBuilder("<"); + buffer.append(getField()).append(">"); + if (reverse) + buffer.append("!"); + return buffer.toString(); + } + + } + + private static class DoubleValuesHolder { + DoubleValues values; + } + + private static class DoubleValuesComparatorSource extends FieldComparatorSource { + private final DoubleValuesSource producer; + + public DoubleValuesComparatorSource(DoubleValuesSource producer) { + this.producer = producer; + } + + @Override + public FieldComparator newComparator(String fieldname, int numHits, + int sortPos, boolean reversed) throws IOException { + return new FieldComparator.DoubleComparator(numHits, fieldname, 0.0){ + + LeafReaderContext ctx; + DoubleValuesHolder holder = new DoubleValuesHolder(); + + @Override + protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException { + ctx = context; + return asNumericDocValues(holder, Double::doubleToLongBits); + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + holder.values = producer.getValues(ctx, fromScorer(scorer)); + } + }; + } + } + + private static DoubleValues toDoubleValues(NumericDocValues in, LongToDoubleFunction map) { + return new DoubleValues() { + @Override + public double doubleValue() throws IOException { + return map.applyAsDouble(in.longValue()); + } + + @Override + public boolean advanceExact(int target) throws IOException { + return in.advanceExact(target); + } + + }; + } + + private static NumericDocValues asNumericDocValues(DoubleValuesHolder in, DoubleToLongFunction converter) { + return new NumericDocValues() { + @Override + public long longValue() throws IOException { + return converter.applyAsLong(in.values.doubleValue()); + } + + @Override + public boolean advanceExact(int target) throws IOException { + return in.values.advanceExact(target); + } + + @Override + public int docID() { + throw new UnsupportedOperationException(); + } + + @Override + public int nextDoc() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int advance(int target) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long cost() { + throw new UnsupportedOperationException(); + } + }; + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/LongValues.java b/lucene/core/src/java/org/apache/lucene/search/LongValues.java new file mode 100644 index 00000000000..54d3189c724 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/LongValues.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; + +/** + * Per-segment, per-document long values, which can be calculated at search-time + */ +public abstract class LongValues { + + /** + * Get the long value for the current document + */ + public abstract long longValue() throws IOException; + + /** + * Advance this instance to the given document id + * @return true if there is a value for this document + */ + public abstract boolean advanceExact(int doc) throws IOException; + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/LongValuesSource.java b/lucene/core/src/java/org/apache/lucene/search/LongValuesSource.java new file mode 100644 index 00000000000..9d00355f023 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/LongValuesSource.java @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.Objects; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; + +/** + * Base class for producing {@link LongValues} + * + * To obtain a {@link LongValues} object for a leaf reader, clients should + * call {@link #getValues(LeafReaderContext, DoubleValues)}. + * + * LongValuesSource objects for long and int-valued NumericDocValues fields can + * be obtained by calling {@link #fromLongField(String)} and {@link #fromIntField(String)}. + * + * To obtain a LongValuesSource from a float or double-valued NumericDocValues field, + * use {@link DoubleValuesSource#fromFloatField(String)} or {@link DoubleValuesSource#fromDoubleField(String)} + * and then call {@link DoubleValuesSource#toLongValuesSource()}. + */ +public abstract class LongValuesSource { + + /** + * Returns a {@link LongValues} instance for the passed-in LeafReaderContext and scores + * + * If scores are not needed to calculate the values (ie {@link #needsScores() returns false}, callers + * may safely pass {@code null} for the {@code scores} parameter. + */ + public abstract LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException; + + /** + * Return true if document scores are needed to calculate values + */ + public abstract boolean needsScores(); + + /** + * Create a sort field based on the value of this producer + * @param reverse true if the sort should be decreasing + */ + public SortField getSortField(boolean reverse) { + return new LongValuesSortField(this, reverse); + } + + /** + * Creates a LongValuesSource that wraps a long-valued field + */ + public static LongValuesSource fromLongField(String field) { + return new FieldValuesSource(field); + } + + /** + * Creates a LongValuesSource that wraps an int-valued field + */ + public static LongValuesSource fromIntField(String field) { + return fromLongField(field); + } + + private static class FieldValuesSource extends LongValuesSource { + + final String field; + + private FieldValuesSource(String field) { + this.field = field; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FieldValuesSource that = (FieldValuesSource) o; + return Objects.equals(field, that.field); + } + + @Override + public int hashCode() { + return Objects.hash(field); + } + + @Override + public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { + final NumericDocValues values = DocValues.getNumeric(ctx.reader(), field); + return toLongValues(values); + } + + @Override + public boolean needsScores() { + return false; + } + } + + private static class LongValuesSortField extends SortField { + + final LongValuesSource producer; + + public LongValuesSortField(LongValuesSource producer, boolean reverse) { + super(producer.toString(), new LongValuesComparatorSource(producer), reverse); + this.producer = producer; + } + + @Override + public boolean needsScores() { + return producer.needsScores(); + } + + @Override + public String toString() { + StringBuilder buffer = new StringBuilder("<"); + buffer.append(getField()).append(">"); + if (reverse) + buffer.append("!"); + return buffer.toString(); + } + + } + + private static class LongValuesHolder { + LongValues values; + } + + private static class LongValuesComparatorSource extends FieldComparatorSource { + private final LongValuesSource producer; + + public LongValuesComparatorSource(LongValuesSource producer) { + this.producer = producer; + } + + @Override + public FieldComparator newComparator(String fieldname, int numHits, + int sortPos, boolean reversed) throws IOException { + return new FieldComparator.LongComparator(numHits, fieldname, 0L){ + + LeafReaderContext ctx; + LongValuesHolder holder = new LongValuesHolder(); + + @Override + protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException { + ctx = context; + return asNumericDocValues(holder); + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + holder.values = producer.getValues(ctx, DoubleValuesSource.fromScorer(scorer)); + } + }; + } + } + + private static LongValues toLongValues(NumericDocValues in) { + return new LongValues() { + @Override + public long longValue() throws IOException { + return in.longValue(); + } + + @Override + public boolean advanceExact(int target) throws IOException { + return in.advanceExact(target); + } + + }; + } + + private static NumericDocValues asNumericDocValues(LongValuesHolder in) { + return new NumericDocValues() { + @Override + public long longValue() throws IOException { + return in.values.longValue(); + } + + @Override + public boolean advanceExact(int target) throws IOException { + return in.values.advanceExact(target); + } + + @Override + public int docID() { + throw new UnsupportedOperationException(); + } + + @Override + public int nextDoc() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int advance(int target) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long cost() { + throw new UnsupportedOperationException(); + } + }; + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDoubleValuesSource.java b/lucene/core/src/test/org/apache/lucene/search/TestDoubleValuesSource.java new file mode 100644 index 00000000000..38609630587 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/TestDoubleValuesSource.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.util.Arrays; +import java.util.Collections; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleDocValuesField; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.English; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; + +public class TestDoubleValuesSource extends LuceneTestCase { + + private Directory dir; + private IndexReader reader; + private IndexSearcher searcher; + + @Override + public void setUp() throws Exception { + super.setUp(); + dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + int numDocs = TestUtil.nextInt(random(), 2049, 4000); + for (int i = 0; i < numDocs; i++) { + Document document = new Document(); + document.add(newTextField("english", English.intToEnglish(i), Field.Store.NO)); + document.add(newTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO)); + document.add(new NumericDocValuesField("int", random().nextInt())); + document.add(new NumericDocValuesField("long", random().nextLong())); + document.add(new FloatDocValuesField("float", random().nextFloat())); + document.add(new DoubleDocValuesField("double", random().nextDouble())); + iw.addDocument(document); + } + reader = iw.getReader(); + iw.close(); + searcher = newSearcher(reader); + } + + @Override + public void tearDown() throws Exception { + reader.close(); + dir.close(); + super.tearDown(); + } + + public void testSimpleFieldEquivalences() throws Exception { + checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("int", SortField.Type.INT, random().nextBoolean()))); + checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("long", SortField.Type.LONG, random().nextBoolean()))); + checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("float", SortField.Type.FLOAT, random().nextBoolean()))); + checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("double", SortField.Type.DOUBLE, random().nextBoolean()))); + } + + public void testHashCodeAndEquals() { + DoubleValuesSource vs1 = DoubleValuesSource.fromDoubleField("double"); + DoubleValuesSource vs2 = DoubleValuesSource.fromDoubleField("double"); + assertEquals(vs1, vs2); + assertEquals(vs1.hashCode(), vs2.hashCode()); + DoubleValuesSource v3 = DoubleValuesSource.fromLongField("long"); + assertFalse(vs1.equals(v3)); + } + + public void testSimpleFieldSortables() throws Exception { + int n = atLeast(4); + for (int i = 0; i < n; i++) { + Sort sort = randomSort(); + checkSorts(new MatchAllDocsQuery(), sort); + checkSorts(new TermQuery(new Term("english", "one")), sort); + } + } + + Sort randomSort() throws Exception { + boolean reversed = random().nextBoolean(); + SortField fields[] = new SortField[] { + new SortField("int", SortField.Type.INT, reversed), + new SortField("long", SortField.Type.LONG, reversed), + new SortField("float", SortField.Type.FLOAT, reversed), + new SortField("double", SortField.Type.DOUBLE, reversed), + new SortField("score", SortField.Type.SCORE) + }; + Collections.shuffle(Arrays.asList(fields), random()); + int numSorts = TestUtil.nextInt(random(), 1, fields.length); + return new Sort(Arrays.copyOfRange(fields, 0, numSorts)); + } + + // Take a Sort, and replace any field sorts with Sortables + Sort convertSortToSortable(Sort sort) { + SortField original[] = sort.getSort(); + SortField mutated[] = new SortField[original.length]; + for (int i = 0; i < mutated.length; i++) { + if (random().nextInt(3) > 0) { + SortField s = original[i]; + boolean reverse = s.getType() == SortField.Type.SCORE || s.getReverse(); + switch (s.getType()) { + case INT: + mutated[i] = DoubleValuesSource.fromIntField(s.getField()).getSortField(reverse); + break; + case LONG: + mutated[i] = DoubleValuesSource.fromLongField(s.getField()).getSortField(reverse); + break; + case FLOAT: + mutated[i] = DoubleValuesSource.fromFloatField(s.getField()).getSortField(reverse); + break; + case DOUBLE: + mutated[i] = DoubleValuesSource.fromDoubleField(s.getField()).getSortField(reverse); + break; + case SCORE: + mutated[i] = DoubleValuesSource.SCORES.getSortField(reverse); + break; + default: + mutated[i] = original[i]; + } + } else { + mutated[i] = original[i]; + } + } + + return new Sort(mutated); + } + + void checkSorts(Query query, Sort sort) throws Exception { + int size = TestUtil.nextInt(random(), 1, searcher.getIndexReader().maxDoc() / 5); + TopDocs expected = searcher.search(query, size, sort, random().nextBoolean(), random().nextBoolean()); + Sort mutatedSort = convertSortToSortable(sort); + TopDocs actual = searcher.search(query, size, mutatedSort, random().nextBoolean(), random().nextBoolean()); + + CheckHits.checkEqual(query, expected.scoreDocs, actual.scoreDocs); + + if (size < actual.totalHits) { + expected = searcher.searchAfter(expected.scoreDocs[size-1], query, size, sort); + actual = searcher.searchAfter(actual.scoreDocs[size-1], query, size, mutatedSort); + CheckHits.checkEqual(query, expected.scoreDocs, actual.scoreDocs); + } + } +} diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLongValuesSource.java b/lucene/core/src/test/org/apache/lucene/search/TestLongValuesSource.java new file mode 100644 index 00000000000..9148ad5b544 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/TestLongValuesSource.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.util.Arrays; +import java.util.Collections; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.English; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; + +public class TestLongValuesSource extends LuceneTestCase { + + private Directory dir; + private IndexReader reader; + private IndexSearcher searcher; + + @Override + public void setUp() throws Exception { + super.setUp(); + dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + int numDocs = TestUtil.nextInt(random(), 2049, 4000); + for (int i = 0; i < numDocs; i++) { + Document document = new Document(); + document.add(newTextField("english", English.intToEnglish(i), Field.Store.NO)); + document.add(newTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO)); + document.add(new NumericDocValuesField("int", random().nextInt())); + document.add(new NumericDocValuesField("long", random().nextLong())); + iw.addDocument(document); + } + reader = iw.getReader(); + iw.close(); + searcher = newSearcher(reader); + } + + @Override + public void tearDown() throws Exception { + reader.close(); + dir.close(); + super.tearDown(); + } + + public void testSimpleFieldEquivalences() throws Exception { + checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("int", SortField.Type.INT, random().nextBoolean()))); + checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("long", SortField.Type.LONG, random().nextBoolean()))); + } + + public void testHashCodeAndEquals() { + LongValuesSource vs1 = LongValuesSource.fromLongField("long"); + LongValuesSource vs2 = LongValuesSource.fromLongField("long"); + assertEquals(vs1, vs2); + assertEquals(vs1.hashCode(), vs2.hashCode()); + LongValuesSource v3 = LongValuesSource.fromLongField("int"); + assertFalse(vs1.equals(v3)); + } + + public void testSimpleFieldSortables() throws Exception { + int n = atLeast(4); + for (int i = 0; i < n; i++) { + Sort sort = randomSort(); + checkSorts(new MatchAllDocsQuery(), sort); + checkSorts(new TermQuery(new Term("english", "one")), sort); + } + } + + Sort randomSort() throws Exception { + boolean reversed = random().nextBoolean(); + SortField fields[] = new SortField[] { + new SortField("int", SortField.Type.INT, reversed), + new SortField("long", SortField.Type.LONG, reversed) + }; + Collections.shuffle(Arrays.asList(fields), random()); + int numSorts = TestUtil.nextInt(random(), 1, fields.length); + return new Sort(Arrays.copyOfRange(fields, 0, numSorts)); + } + + // Take a Sort, and replace any field sorts with Sortables + Sort convertSortToSortable(Sort sort) { + SortField original[] = sort.getSort(); + SortField mutated[] = new SortField[original.length]; + for (int i = 0; i < mutated.length; i++) { + if (random().nextInt(3) > 0) { + SortField s = original[i]; + boolean reverse = s.getType() == SortField.Type.SCORE || s.getReverse(); + switch (s.getType()) { + case INT: + mutated[i] = LongValuesSource.fromIntField(s.getField()).getSortField(reverse); + break; + case LONG: + mutated[i] = LongValuesSource.fromLongField(s.getField()).getSortField(reverse); + break; + default: + mutated[i] = original[i]; + } + } else { + mutated[i] = original[i]; + } + } + + return new Sort(mutated); + } + + void checkSorts(Query query, Sort sort) throws Exception { + int size = TestUtil.nextInt(random(), 1, searcher.getIndexReader().maxDoc() / 5); + Sort mutatedSort = convertSortToSortable(sort); + TopDocs actual = searcher.search(query, size, mutatedSort, random().nextBoolean(), random().nextBoolean()); + TopDocs expected = searcher.search(query, size, sort, random().nextBoolean(), random().nextBoolean()); + + CheckHits.checkEqual(query, expected.scoreDocs, actual.scoreDocs); + + if (size < actual.totalHits) { + expected = searcher.searchAfter(expected.scoreDocs[size-1], query, size, sort); + actual = searcher.searchAfter(actual.scoreDocs[size-1], query, size, mutatedSort); + CheckHits.checkEqual(query, expected.scoreDocs, actual.scoreDocs); + } + } +} From db9190db9372ae88a7392a7186397441ce070a96 Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Thu, 29 Dec 2016 20:31:47 +0100 Subject: [PATCH 54/83] LUCENE-7595: Fix bug with RamUsageTester incorrectly handling Iterables outside Java Runtime --- .../src/java/org/apache/lucene/util/RamUsageTester.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java b/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java index daf81a96b35..6437d8eda12 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java @@ -152,7 +152,7 @@ public final class RamUsageTester { } boolean needsReflection = true; - if (Constants.JRE_IS_MINIMUM_JAVA9) { + if (Constants.JRE_IS_MINIMUM_JAVA9 && obClazz.getName().startsWith("java.")) { // Java 9: Best guess for some known types, as we cannot precisely look into runtime classes: final ToLongFunction func = SIMPLE_TYPES.get(obClazz); if (func != null) { // some simple type like String where the size is easy to get from public properties From 7dcb557ab73da7fb7af0e8f698895e28dde4bbca Mon Sep 17 00:00:00 2001 From: Joel Bernstein Date: Thu, 29 Dec 2016 13:46:04 -0500 Subject: [PATCH 55/83] SOLR-9905: Add NullStream to isolate the performance of the ExportWriter --- .../apache/solr/handler/StreamHandler.java | 2 +- .../client/solrj/io/stream/NullStream.java | 155 ++++++++++++++++++ .../solrj/io/stream/StreamExpressionTest.java | 65 ++++++++ 3 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/NullStream.java diff --git a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java index c6f3c62e155..13ce6365457 100644 --- a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java @@ -139,7 +139,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware, .withFunctionName("classify", ClassifyStream.class) .withFunctionName("fetch", FetchStream.class) .withFunctionName("executor", ExecutorStream.class) - + .withFunctionName("null", NullStream.class) // metrics .withFunctionName("min", MinMetric.class) .withFunctionName("max", MaxMetric.class) diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/NullStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/NullStream.java new file mode 100644 index 00000000000..bef3b1cb621 --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/NullStream.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.client.solrj.io.stream; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Date; + +import org.apache.solr.client.solrj.io.Tuple; +import org.apache.solr.client.solrj.io.comp.StreamComparator; +import org.apache.solr.client.solrj.io.stream.expr.Explanation; +import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; +import org.apache.solr.client.solrj.io.stream.expr.Expressible; +import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; + + +/** + * The NullStream Iterates over a TupleStream and eats the tuples. It returns the tuple count in the EOF tuple. + * Because the NullStreaam eats all the Tuples it see's it can be used as a simple tool for performance analysis of + * underlying streams. + **/ + +public class NullStream extends TupleStream implements Expressible { + + private static final long serialVersionUID = 1; + + private TupleStream stream; + private long count; + private long start; + private Tuple eof; + + public NullStream(TupleStream tupleStream) throws IOException { + init(tupleStream); + } + + public NullStream(StreamExpression expression, StreamFactory factory) throws IOException { + // grab all parameters out + List streamExpressions = factory.getExpressionOperandsRepresentingTypes(expression, Expressible.class, TupleStream.class); + TupleStream stream = factory.constructStream(streamExpressions.get(0)); + + init(stream); + } + + private void init(TupleStream tupleStream) throws IOException{ + this.stream = tupleStream; + } + + @Override + public StreamExpression toExpression(StreamFactory factory) throws IOException{ + return toExpression(factory, true); + } + + private StreamExpression toExpression(StreamFactory factory, boolean includeStreams) throws IOException { + // function name + StreamExpression expression = new StreamExpression(factory.getFunctionName(this.getClass())); + + if(includeStreams){ + // stream + if(stream instanceof Expressible){ + expression.addParameter(((Expressible)stream).toExpression(factory)); + } + else{ + throw new IOException("This RankStream contains a non-expressible TupleStream - it cannot be converted to an expression"); + } + } + else{ + expression.addParameter(""); + } + + return expression; + } + + @Override + public Explanation toExplanation(StreamFactory factory) throws IOException { + + return new StreamExplanation(getStreamNodeId().toString()) + .withChildren(new Explanation[]{ + stream.toExplanation(factory) + }) + .withFunctionName(factory.getFunctionName(this.getClass())) + .withImplementingClass(this.getClass().getName()) + .withExpressionType(ExpressionType.STREAM_DECORATOR) + .withExpression(toExpression(factory, false).toString()); + } + + public void setStreamContext(StreamContext context) { + this.stream.setStreamContext(context); + } + + public List children() { + List l = new ArrayList(); + l.add(stream); + return l; + } + + public void open() throws IOException { + start = new Date().getTime(); + count = 0; + stream.open(); + } + + public void close() throws IOException { + stream.close(); + } + + public Tuple read() throws IOException { + + if(eof != null) { + return eof; + } + + while(true) { + Tuple tuple = stream.read(); + if(tuple.EOF) { + eof = tuple; + long end = new Date().getTime(); + Tuple t = new Tuple(new HashMap()); + t.put("nullCount", count); + t.put("timer", end-start); + return t; + } else { + ++count; + } + } + } + + /** Return the stream sort - ie, the order in which records are returned */ + public StreamComparator getStreamSort(){ + return stream.getStreamSort(); + } + + public int getCost() { + return 0; + } + + +} \ No newline at end of file diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java index 7d48c0e239f..0c9d5b3aadf 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java @@ -373,6 +373,71 @@ public class StreamExpressionTest extends SolrCloudTestCase { } + + @Test + public void testNullStream() throws Exception { + + new UpdateRequest() + .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0") + .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0") + .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") + .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") + .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1") + .add(id, "5", "a_s", "hello1", "a_i", "1", "a_f", "2") + .commit(cluster.getSolrClient(), COLLECTIONORALIAS); + + StreamExpression expression; + TupleStream stream; + List tuples; + + StreamFactory factory = new StreamFactory() + .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) + .withFunctionName("search", CloudSolrStream.class) + .withFunctionName("null", NullStream.class); + + // Basic test + stream = factory.constructStream("null(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), by=\"a_i asc\")"); + tuples = getTuples(stream); + assertTrue(tuples.size() == 1); + assertTrue(tuples.get(0).getLong("nullCount") == 6); + } + + + @Test + public void testParallelNullStream() throws Exception { + + new UpdateRequest() + .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0") + .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0") + .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3") + .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4") + .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1") + .add(id, "5", "a_s", "hello1", "a_i", "1", "a_f", "2") + .commit(cluster.getSolrClient(), COLLECTIONORALIAS); + + StreamExpression expression; + TupleStream stream; + List tuples; + + StreamFactory factory = new StreamFactory() + .withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()) + .withFunctionName("search", CloudSolrStream.class) + .withFunctionName("null", NullStream.class) + .withFunctionName("parallel", ParallelStream.class); + + // Basic test + stream = factory.constructStream("parallel(" + COLLECTIONORALIAS + ", workers=2, sort=\"nullCount desc\", null(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\", partitionKeys=id), by=\"a_i asc\"))"); + tuples = getTuples(stream); + assertTrue(tuples.size() == 2); + long nullCount = 0; + for(Tuple t : tuples) { + nullCount += t.getLong("nullCount"); + } + + assertEquals(nullCount, 6L); + } + + @Test public void testNulls() throws Exception { From 00723827ff5ad5c129d3d8487d2c64469ea03239 Mon Sep 17 00:00:00 2001 From: Joel Bernstein Date: Thu, 29 Dec 2016 14:42:31 -0500 Subject: [PATCH 56/83] SOLR-9905: Update CHANGES.txt --- solr/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 138385940ea..b509e236001 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -202,6 +202,8 @@ New Features * SOLR-9897: Add hl.requireFieldMatch toggle support when using the UnifiedHighlighter. Defaults to false like the other highlighters that support this. (David Smiley) +* SOLR-9905: Add NullStream to isolate the performance of the ExportWriter (Joel Bernstein) + Optimizations ---------------------- * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have From a4335c0e9f01275c7d6e807813d9818b6e59d76e Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 29 Dec 2016 14:44:11 +0000 Subject: [PATCH 57/83] LUCENE-5325: Add test for missing values in sorts --- .../org/apache/lucene/search/TestDoubleValuesSource.java | 9 +++++++++ .../org/apache/lucene/search/TestLongValuesSource.java | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDoubleValuesSource.java b/lucene/core/src/test/org/apache/lucene/search/TestDoubleValuesSource.java index 38609630587..13a5168c924 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestDoubleValuesSource.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestDoubleValuesSource.java @@ -53,6 +53,8 @@ public class TestDoubleValuesSource extends LuceneTestCase { document.add(new NumericDocValuesField("long", random().nextLong())); document.add(new FloatDocValuesField("float", random().nextFloat())); document.add(new DoubleDocValuesField("double", random().nextDouble())); + if (i == 545) + document.add(new DoubleDocValuesField("onefield", 45.72)); iw.addDocument(document); } reader = iw.getReader(); @@ -67,6 +69,13 @@ public class TestDoubleValuesSource extends LuceneTestCase { super.tearDown(); } + public void testSortMissing() throws Exception { + DoubleValuesSource onefield = DoubleValuesSource.fromDoubleField("onefield"); + TopDocs results = searcher.search(new MatchAllDocsQuery(), 1, new Sort(onefield.getSortField(true))); + FieldDoc first = (FieldDoc) results.scoreDocs[0]; + assertEquals(45.72, first.fields[0]); + } + public void testSimpleFieldEquivalences() throws Exception { checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("int", SortField.Type.INT, random().nextBoolean()))); checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("long", SortField.Type.LONG, random().nextBoolean()))); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLongValuesSource.java b/lucene/core/src/test/org/apache/lucene/search/TestLongValuesSource.java index 9148ad5b544..8b20be5169d 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestLongValuesSource.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestLongValuesSource.java @@ -49,6 +49,8 @@ public class TestLongValuesSource extends LuceneTestCase { document.add(newTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO)); document.add(new NumericDocValuesField("int", random().nextInt())); document.add(new NumericDocValuesField("long", random().nextLong())); + if (i == 545) + document.add(new NumericDocValuesField("onefield", 45)); iw.addDocument(document); } reader = iw.getReader(); @@ -63,6 +65,13 @@ public class TestLongValuesSource extends LuceneTestCase { super.tearDown(); } + public void testSortMissing() throws Exception { + LongValuesSource onefield = LongValuesSource.fromLongField("onefield"); + TopDocs results = searcher.search(new MatchAllDocsQuery(), 1, new Sort(onefield.getSortField(true))); + FieldDoc first = (FieldDoc) results.scoreDocs[0]; + assertEquals(45L, first.fields[0]); + } + public void testSimpleFieldEquivalences() throws Exception { checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("int", SortField.Type.INT, random().nextBoolean()))); checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("long", SortField.Type.LONG, random().nextBoolean()))); From 93fdc20736d6e13736aceb091ab978bd8e03fcbb Mon Sep 17 00:00:00 2001 From: Steve Rowe Date: Thu, 29 Dec 2016 15:51:37 -0500 Subject: [PATCH 58/83] LUCENE-7564: Force single-threaded access to the AnalyzingInfixSuggester's SearcherManager when performing an acquire() or reassigning. This fixes failures in AnalyzingInfixSuggester.testRandomNRT(). --- .../analyzing/AnalyzingInfixSuggester.java | 138 +++++++++++------- 1 file changed, 82 insertions(+), 56 deletions(-) diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java index b8c2dbdafb3..2fbe4a81507 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java @@ -136,6 +136,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { /** {@link IndexSearcher} used for lookups. */ protected SearcherManager searcherMgr; + + protected final Object searcherMgrLock = new Object(); /** Default minimum number of leading characters before * PrefixQuery is used (4). */ @@ -275,53 +277,55 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { @Override public void build(InputIterator iter) throws IOException { - if (searcherMgr != null) { - searcherMgr.close(); - searcherMgr = null; - } - - if (writer != null) { - writer.close(); - writer = null; - } - - boolean success = false; - try { - // First pass: build a temporary normal Lucene index, - // just indexing the suggestions as they iterate: - writer = new IndexWriter(dir, - getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE)); - //long t0 = System.nanoTime(); - - // TODO: use threads? - BytesRef text; - while ((text = iter.next()) != null) { - BytesRef payload; - if (iter.hasPayloads()) { - payload = iter.payload(); - } else { - payload = null; - } - - add(text, iter.contexts(), iter.weight(), payload); + synchronized (searcherMgrLock) { + if (searcherMgr != null) { + searcherMgr.close(); + searcherMgr = null; } - //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec"); - if (commitOnBuild || closeIndexWriterOnBuild) { - commit(); + if (writer != null) { + writer.close(); + writer = null; } - searcherMgr = new SearcherManager(writer, null); - success = true; - } finally { - if (success) { - if (closeIndexWriterOnBuild) { - writer.close(); - writer = null; + + boolean success = false; + try { + // First pass: build a temporary normal Lucene index, + // just indexing the suggestions as they iterate: + writer = new IndexWriter(dir, + getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE)); + //long t0 = System.nanoTime(); + + // TODO: use threads? + BytesRef text; + while ((text = iter.next()) != null) { + BytesRef payload; + if (iter.hasPayloads()) { + payload = iter.payload(); + } else { + payload = null; + } + + add(text, iter.contexts(), iter.weight(), payload); } - } else { // failure - if (writer != null) { - writer.rollback(); - writer = null; + + //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec"); + if (commitOnBuild || closeIndexWriterOnBuild) { + commit(); + } + searcherMgr = new SearcherManager(writer, null); + success = true; + } finally { + if (success) { + if (closeIndexWriterOnBuild) { + writer.close(); + writer = null; + } + } else { // failure + if (writer != null) { + writer.rollback(); + writer = null; + } } } } @@ -369,10 +373,12 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { } else { writer = new IndexWriter(dir, getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE)); } - SearcherManager oldSearcherMgr = searcherMgr; - searcherMgr = new SearcherManager(writer, null); - if (oldSearcherMgr != null) { - oldSearcherMgr.close(); + synchronized (searcherMgrLock) { + SearcherManager oldSearcherMgr = searcherMgr; + searcherMgr = new SearcherManager(writer, null); + if (oldSearcherMgr != null) { + oldSearcherMgr.close(); + } } } } @@ -642,7 +648,12 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { // only retrieve the first num hits now: Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num); List results = null; - IndexSearcher searcher = searcherMgr.acquire(); + SearcherManager mgr; + IndexSearcher searcher; + synchronized (searcherMgrLock) { + mgr = searcherMgr; // acquire & release on same SearcherManager, via local reference + searcher = mgr.acquire(); + } try { //System.out.println("got searcher=" + searcher); searcher.search(finalQuery, c2); @@ -653,7 +664,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { // hits = searcher.search(query, null, num, SORT); results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken); } finally { - searcherMgr.release(searcher); + mgr.release(searcher); } //System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest"); @@ -853,7 +864,12 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { long mem = RamUsageEstimator.shallowSizeOf(this); try { if (searcherMgr != null) { - IndexSearcher searcher = searcherMgr.acquire(); + SearcherManager mgr; + IndexSearcher searcher; + synchronized (searcherMgrLock) { + mgr = searcherMgr; // acquire & release on same SearcherManager, via local reference + searcher = mgr.acquire(); + } try { for (LeafReaderContext context : searcher.getIndexReader().leaves()) { LeafReader reader = FilterLeafReader.unwrap(context.reader()); @@ -862,7 +878,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { } } } finally { - searcherMgr.release(searcher); + mgr.release(searcher); } } return mem; @@ -876,7 +892,12 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { List resources = new ArrayList<>(); try { if (searcherMgr != null) { - IndexSearcher searcher = searcherMgr.acquire(); + SearcherManager mgr; + IndexSearcher searcher; + synchronized (searcherMgrLock) { + mgr = searcherMgr; // acquire & release on same SearcherManager, via local reference + searcher = mgr.acquire(); + } try { for (LeafReaderContext context : searcher.getIndexReader().leaves()) { LeafReader reader = FilterLeafReader.unwrap(context.reader()); @@ -885,7 +906,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { } } } finally { - searcherMgr.release(searcher); + mgr.release(searcher); } } return Collections.unmodifiableList(resources); @@ -899,11 +920,16 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { if (searcherMgr == null) { return 0; } - IndexSearcher searcher = searcherMgr.acquire(); + SearcherManager mgr; + IndexSearcher searcher; + synchronized (searcherMgrLock) { + mgr = searcherMgr; // acquire & release on same SearcherManager, via local reference + searcher = mgr.acquire(); + } try { return searcher.getIndexReader().numDocs(); } finally { - searcherMgr.release(searcher); + mgr.release(searcher); } } -}; +} From 6b00ee5175d55d2f2a25ce6539dc12277022c898 Mon Sep 17 00:00:00 2001 From: Steve Rowe Date: Thu, 29 Dec 2016 16:08:35 -0500 Subject: [PATCH 59/83] LUCENE-7564: add missing javadocs --- .../lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java | 1 + 1 file changed, 1 insertion(+) diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java index 2fbe4a81507..81880d4e913 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java @@ -137,6 +137,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { /** {@link IndexSearcher} used for lookups. */ protected SearcherManager searcherMgr; + /** Used to manage concurrent access to searcherMgr */ protected final Object searcherMgrLock = new Object(); /** Default minimum number of leading characters before From 5d042d3a49dfcf654b8bf8a96521d5404bfd3a7b Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Fri, 30 Dec 2016 00:42:51 +0300 Subject: [PATCH 60/83] SOLR-9900: fix false positives on range queries with ReversedWildcardFilterFactory --- solr/CHANGES.txt | 2 ++ .../apache/solr/parser/SolrQueryParserBase.java | 14 ++++++++++++++ .../TestReversedWildcardFilterFactory.java | 12 ++++++++++++ 3 files changed, 28 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index b509e236001..7f83de01cf3 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -298,6 +298,8 @@ Bug Fixes * SOLR-9901: Implement move in HdfsDirectoryFactory. (Mark Miller) +* SOLR-9900: fix false positives on range queries with ReversedWildcardFilterFactory (Yonik Seeley via Mikhail Khludnev) + Other Changes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index 168bd494fdf..f54e9e98ae4 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.reverse.ReverseStringFilter; import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.index.Term; import org.apache.lucene.search.AutomatonQuery; @@ -894,6 +895,19 @@ public abstract class SolrQueryParserBase extends QueryBuilder { protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError { checkNullField(field); SchemaField sf = schema.getField(field); + + if (part1 == null) { + ReversedWildcardFilterFactory factory = getReversedWildcardFilterFactory(sf.getType()); + if (factory != null) { + // There will be reversed tokens starting with u0001 that we want to exclude, so + // lets start at u0002 inclusive instead. + char[] buf = new char[1]; + buf[0] = ReverseStringFilter.START_OF_HEADING_MARKER + 1; + part1 = new String(buf); + startInclusive = true; + } + } + return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive); } diff --git a/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java b/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java index 3ccc352d46d..f7a49ace265 100644 --- a/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java +++ b/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java @@ -182,6 +182,18 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 { assertQ("false positive", req("+id:1 +one:*zemog*"), "//result[@numFound=0]"); + + assertQ("no reverse, no false positive", + req("q", "+id:1 +three:[* TO a]", + "debugQuery", "true"), + "//result[@numFound=0]"); + { + String reverseField = random().nextBoolean() ? "one":"two"; + assertQ("false positive", + req("q", "+id:1 +"+reverseField+":[* TO a]", + "debugQuery", "true"), + "//result[@numFound=0]"); + } assertQ("false positive", req("+id:1 +two:*zemog*"), "//result[@numFound=0]"); From d65c02e8cc14f03389c2426ea3d3ddd75e12b1ec Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Thu, 29 Dec 2016 22:56:54 +0100 Subject: [PATCH 61/83] LUCENE-7595: Disable another test not compatible with RamUsageTester --- .../src/test/org/apache/lucene/search/TestLRUQueryCache.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java index 9ebacf7292a..3acc3ea9b59 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java @@ -381,6 +381,8 @@ public class TestLRUQueryCache extends LuceneTestCase { // by the cache itself, not cache entries, and we want to make sure that // memory usage is not grossly underestimated. public void testRamBytesUsedConstantEntryOverhead() throws IOException { + assumeFalse("LUCENE-7595: RamUsageTester does not work exact in Java 9 (estimations for maps and lists)", Constants.JRE_IS_MINIMUM_JAVA9); + final LRUQueryCache queryCache = new LRUQueryCache(1000000, 10000000, context -> true); final RamUsageTester.Accumulator acc = new RamUsageTester.Accumulator() { From cb266d5fc775bd9d26ed7f0e68e9d0d12793f9b5 Mon Sep 17 00:00:00 2001 From: Erick Erickson Date: Thu, 29 Dec 2016 17:39:48 -0800 Subject: [PATCH 62/83] SOLR-9891: Add mkroot command to bin/solr and bin/solr.cmd --- solr/CHANGES.txt | 2 + solr/bin/solr | 26 +++++++- solr/bin/solr.cmd | 21 +++++- .../java/org/apache/solr/util/SolrCLI.java | 65 ++++++++++++++++++- 4 files changed, 108 insertions(+), 6 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 7f83de01cf3..59dde90338a 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -204,6 +204,8 @@ New Features * SOLR-9905: Add NullStream to isolate the performance of the ExportWriter (Joel Bernstein) +* SOLR-9891: Add mkroot command to bin/solr and bin/solr.cmd (Erick Erickson) + Optimizations ---------------------- * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have diff --git a/solr/bin/solr b/solr/bin/solr index c1add261bd0..fcf864b405c 100755 --- a/solr/bin/solr +++ b/solr/bin/solr @@ -493,6 +493,12 @@ function print_usage() { echo "" echo " Only the node names are listed, not data" echo "" + echo " mkroot makes a znode on Zookeeper with no data. Can be used to make a path of arbitrary" + echo " depth but primarily intended to create a 'chroot'." + echo "" + echo " : The Zookeeper path to create. Leading slash is assumed if not present." + echo " Intermediate nodes are created as needed if not present." + echo "" fi } # end print_usage @@ -507,6 +513,7 @@ function print_short_zk_usage() { echo " solr zk rm [-r] [-z zkHost]" echo " solr zk mv [-z zkHost]" echo " solr zk ls [-r] [-z zkHost]" + echo " solr zk mkroot [-z zkHost]" echo "" if [ "$1" == "" ]; then @@ -1001,7 +1008,7 @@ if [[ "$SCRIPT_CMD" == "zk" ]]; then if [ $# -gt 0 ]; then while true; do case "$1" in - -upconfig|upconfig|-downconfig|downconfig|cp|rm|mv|ls) + -upconfig|upconfig|-downconfig|downconfig|cp|rm|mv|ls|mkroot) if [ "${1:0:1}" == "-" ]; then ZK_OP=${1:1} else @@ -1042,7 +1049,7 @@ if [[ "$SCRIPT_CMD" == "zk" ]]; then shift break ;; - *) # Pick up or params for rm, ls, cp, mv. + *) # Pick up or params for rm, ls, cp, mv, mkroot. if [ "$1" == "" ]; then break # out-of-args, stop looping fi @@ -1062,7 +1069,7 @@ if [[ "$SCRIPT_CMD" == "zk" ]]; then fi if [ -z "$ZK_OP" ]; then - print_short_zk_usage "Zookeeper operation (one of 'upconfig', 'downconfig', 'rm', 'mv', 'cp', 'ls') is required!" + print_short_zk_usage "Zookeeper operation (one of 'upconfig', 'downconfig', 'rm', 'mv', 'cp', 'ls', 'mkroot') is required!" fi if [ -z "$ZK_HOST" ]; then @@ -1088,6 +1095,13 @@ if [[ "$SCRIPT_CMD" == "zk" ]]; then fi fi + if [[ "$ZK_OP" == "mkroot" ]]; then + if [[ -z "$ZK_SRC" ]]; then + print_short_zk_usage " must be specified when using the 'mkroot' command." + fi + fi + + case "$ZK_OP" in upconfig) run_tool "$ZK_OP" -confname "$CONFIGSET_CONFNAME" -confdir "$CONFIGSET_CONFDIR" -zkHost "$ZK_HOST" -configsetsDir "$SOLR_TIP/server/solr/configsets" @@ -1113,6 +1127,12 @@ if [[ "$SCRIPT_CMD" == "zk" ]]; then fi run_tool "$ZK_OP" -path "$ZK_SRC" -recurse "$ZK_RECURSE" -zkHost "$ZK_HOST" ;; + mkroot) + if [ -z "$ZK_SRC" ]; then + print_short_zk_usage "Zookeeper path to list must be specified when using the 'mkroot' command" + fi + run_tool "$ZK_OP" -path "$ZK_SRC" -zkHost "$ZK_HOST" + ;; *) print_short_zk_usage "Unrecognized Zookeeper operation $ZK_OP" ;; diff --git a/solr/bin/solr.cmd b/solr/bin/solr.cmd index 4b6081fd50c..04398bc6754 100644 --- a/solr/bin/solr.cmd +++ b/solr/bin/solr.cmd @@ -480,6 +480,13 @@ echo ^: The Zookeeper path to use as the root. echo. echo Only the node names are listed, not data echo. +echo mkroot makes a znode in Zookeeper with no data. Can be used to make a path of arbitrary +echo depth but primarily intended to create a 'chroot'." +echo. +echo ^: The Zookeeper path to create. Leading slash is assumed if not present. +echo Intermediate nodes are created as needed if not present. +echo. + goto done :zk_short_usage @@ -492,6 +499,7 @@ echo solr zk cp [-r] ^ ^ [-z zkHost] echo solr zk rm [-r] ^ [-z zkHost] echo solr zk mv ^ ^ [-z zkHost] echo solr zk ls [-r] ^ [-z zkHost] +echo solr zk mkroot ^ [-z zkHost] echo. IF "%ZK_FULL%"=="true" ( goto zk_full_usage @@ -1399,6 +1407,8 @@ IF "%1"=="-upconfig" ( goto set_zk_op ) ELSE IF "%1"=="ls" ( goto set_zk_op +) ELSE IF "%1"=="mkroot" ( + goto set_zk_op ) ELSE IF "%1"=="-n" ( goto set_config_name ) ELSE IF "%1"=="-r" ( @@ -1561,13 +1571,22 @@ IF "!ZK_OP!"=="upconfig" ( org.apache.solr.util.SolrCLI !ZK_OP! -zkHost !ZK_HOST! -path !ZK_SRC! -recurse !ZK_RECURSE! ) ELSE IF "!ZK_OP!"=="ls" ( IF "%ZK_SRC"=="" ( - set ERROR_MSG="Zookeeper path to remove must be specified when using the 'rm' command" + set ERROR_MSG="Zookeeper path to remove must be specified when using the 'ls' command" goto zk_short_usage ) "%JAVA%" %SOLR_SSL_OPTS% %AUTHC_OPTS% %SOLR_ZK_CREDS_AND_ACLS% -Dsolr.install.dir="%SOLR_TIP%" ^ -Dlog4j.configuration="file:%DEFAULT_SERVER_DIR%\scripts\cloud-scripts\log4j.properties" ^ -classpath "%DEFAULT_SERVER_DIR%\solr-webapp\webapp\WEB-INF\lib\*;%DEFAULT_SERVER_DIR%\lib\ext\*" ^ org.apache.solr.util.SolrCLI !ZK_OP! -zkHost !ZK_HOST! -path !ZK_SRC! -recurse !ZK_RECURSE! +) ELSE IF "!ZK_OP!"=="mkroot" ( + IF "%ZK_SRC"=="" ( + set ERROR_MSG="Zookeeper path to create must be specified when using the 'mkroot' command" + goto zk_short_usage + ) + "%JAVA%" %SOLR_SSL_OPTS% %AUTHC_OPTS% %SOLR_ZK_CREDS_AND_ACLS% -Dsolr.install.dir="%SOLR_TIP%" ^ + -Dlog4j.configuration="file:%DEFAULT_SERVER_DIR%\scripts\cloud-scripts\log4j.properties" ^ + -classpath "%DEFAULT_SERVER_DIR%\solr-webapp\webapp\WEB-INF\lib\*;%DEFAULT_SERVER_DIR%\lib\ext\*" ^ + org.apache.solr.util.SolrCLI !ZK_OP! -zkHost !ZK_HOST! -path !ZK_SRC! ) ELSE ( set ERROR_MSG="Unknown zk option !ZK_OP!" goto zk_short_usage diff --git a/solr/core/src/java/org/apache/solr/util/SolrCLI.java b/solr/core/src/java/org/apache/solr/util/SolrCLI.java index 49798489498..bb2d55405aa 100644 --- a/solr/core/src/java/org/apache/solr/util/SolrCLI.java +++ b/solr/core/src/java/org/apache/solr/util/SolrCLI.java @@ -364,6 +364,8 @@ public class SolrCLI { return new ZkCpTool(); else if ("ls".equals(toolType)) return new ZkLsTool(); + else if ("mkroot".equals(toolType)) + return new ZkMkrootTool(); else if ("assert".equals(toolType)) return new AssertTool(); else if ("utils".equals(toolType)) @@ -1986,7 +1988,7 @@ public class SolrCLI { if (zkHost == null) { throw new IllegalStateException("Solr at " + cli.getOptionValue("zkHost") + - " is running in standalone server mode, 'zk rm' can only be used when running in SolrCloud mode.\n"); + " is running in standalone server mode, 'zk ls' can only be used when running in SolrCloud mode.\n"); } @@ -1999,12 +2001,71 @@ public class SolrCLI { " recurse: " + Boolean.toString(recurse)); stdout.print(zkClient.listZnode(znode, recurse)); } catch (Exception e) { - log.error("Could not complete rm operation for reason: " + e.getMessage()); + log.error("Could not complete ls operation for reason: " + e.getMessage()); throw (e); } } } // End zkLsTool class + + public static class ZkMkrootTool extends ToolBase { + + public ZkMkrootTool() { + this(System.out); + } + + public ZkMkrootTool(PrintStream stdout) { + super(stdout); + } + + @SuppressWarnings("static-access") + public Option[] getOptions() { + return new Option[]{ + OptionBuilder + .withArgName("path") + .hasArg() + .isRequired(true) + .withDescription("Path to create") + .create("path"), + OptionBuilder + .withArgName("HOST") + .hasArg() + .isRequired(true) + .withDescription("Address of the Zookeeper ensemble; defaults to: " + ZK_HOST) + .create("zkHost") + }; + } + + public String getName() { + return "mkroot"; + } + + protected void runImpl(CommandLine cli) throws Exception { + + String zkHost = getZkHost(cli); + + if (zkHost == null) { + throw new IllegalStateException("Solr at " + cli.getOptionValue("zkHost") + + " is running in standalone server mode, 'zk mkroot' can only be used when running in SolrCloud mode.\n"); + } + + + try (SolrZkClient zkClient = new SolrZkClient(zkHost, 30000)) { + echo("\nConnecting to ZooKeeper at " + zkHost + " ..."); + + String znode = cli.getOptionValue("path"); + echo("Creating Zookeeper path " + znode + " on ZooKeeper at " + zkHost); + zkClient.makePath(znode, true); + } catch (Exception e) { + log.error("Could not complete mkroot operation for reason: " + e.getMessage()); + throw (e); + } + } + } // End zkMkrootTool class + + + + public static class ZkCpTool extends ToolBase { public ZkCpTool() { From 3ccd15a7658ad2821e8a2d2916781265db6f3afe Mon Sep 17 00:00:00 2001 From: Erick Erickson Date: Thu, 29 Dec 2016 18:10:34 -0800 Subject: [PATCH 63/83] SOLR-9843 Fix up DocValuesNotIndexedTest failures (cherry picked from commit f6a3557) --- .../org/apache/solr/cloud/DocValuesNotIndexedTest.java | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java b/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java index f5257f82865..be9f9a36a5a 100644 --- a/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java @@ -256,10 +256,6 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase { solrQuery.addSort("id", SolrQuery.ORDER.asc); final QueryResponse rsp = client.query(COLLECTION, solrQuery); SolrDocumentList res = rsp.getResults(); - //TODO remove after SOLR-9843 - if (order.length != res.getNumFound()) { - log.error("(3) About to fail, response is: " + rsp.toString()); - } assertEquals("Should have exactly " + order.length + " documents returned", order.length, res.getNumFound()); String expected; for (int idx = 0; idx < res.size(); ++idx) { @@ -305,10 +301,6 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase { if (prop.getName().startsWith("bool")) expected = 3; //true, false and null List fieldCommandGroups = fieldCommand.getValues(); - //TODO: remove me since this is excessive in the normal case, this is in for SOLR-9843 - if (expected != fieldCommandGroups.size()) { - log.error("(1) About to fail assert, response is: " + rsp.toString()); - } assertEquals("Did not find the expected number of groups for field " + prop.getName(), expected, fieldCommandGroups.size()); } } @@ -381,8 +373,6 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase { break; default: - //TODO remove me after SOLR-9843 - log.error("(2) About to fail, response is: " + rsp.toString()); fail("Unexpected number of elements in the group for " + prop.getName() + ": " + grp.getResult().size()); } } From 26ee8e9bea70e857aa61764020337ce675066bd1 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 30 Dec 2016 10:30:40 +0100 Subject: [PATCH 64/83] LUCENE-7606: Normalization with CustomAnalyzer would only apply the last token filter. --- lucene/CHANGES.txt | 3 +++ .../analysis/custom/CustomAnalyzer.java | 2 +- .../analysis/custom/TestCustomAnalyzer.java | 21 +++++++++++++++++++ .../lucene/analysis/custom/mapping1.txt | 1 + .../lucene/analysis/custom/mapping2.txt | 1 + 5 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/mapping1.txt create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/mapping2.txt diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 12b615d6ac6..4c49560620a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -129,6 +129,9 @@ Bug Fixes using helpers for exclusive bounds that are consistent with Double.compare. (Adrien Grand, Dawid Weiss) +* LUCENE-7606: Normalization with CustomAnalyzer would only apply the last + token filter. (Adrien Grand) + Improvements * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery, diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java index 466642c9f37..1cfdfe37979 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java @@ -145,7 +145,7 @@ public final class CustomAnalyzer extends Analyzer { for (TokenFilterFactory filter : tokenFilters) { if (filter instanceof MultiTermAwareComponent) { filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent(); - result = filter.create(in); + result = filter.create(result); } } return result; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java index aa69b709ec9..d929bfd099e 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java @@ -30,6 +30,7 @@ import org.apache.lucene.analysis.CharFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory; +import org.apache.lucene.analysis.charfilter.MappingCharFilterFactory; import org.apache.lucene.analysis.core.KeywordTokenizerFactory; import org.apache.lucene.analysis.core.LowerCaseFilterFactory; import org.apache.lucene.analysis.core.LowerCaseTokenizer; @@ -479,4 +480,24 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase { assertEquals(new BytesRef("2A"), analyzer2.normalize("dummy", "0À")); } + public void testNormalizationWithMultipleTokenFilters() throws IOException { + CustomAnalyzer analyzer = CustomAnalyzer.builder() + // none of these components are multi-term aware so they should not be applied + .withTokenizer(WhitespaceTokenizerFactory.class, Collections.emptyMap()) + .addTokenFilter(LowerCaseFilterFactory.class, Collections.emptyMap()) + .addTokenFilter(ASCIIFoldingFilterFactory.class, Collections.emptyMap()) + .build(); + assertEquals(new BytesRef("a b e"), analyzer.normalize("dummy", "À B é")); + } + + public void testNormalizationWithMultiplCharFilters() throws IOException { + CustomAnalyzer analyzer = CustomAnalyzer.builder() + // none of these components are multi-term aware so they should not be applied + .withTokenizer(WhitespaceTokenizerFactory.class, Collections.emptyMap()) + .addCharFilter(MappingCharFilterFactory.class, new HashMap<>(Collections.singletonMap("mapping", "org/apache/lucene/analysis/custom/mapping1.txt"))) + .addCharFilter(MappingCharFilterFactory.class, new HashMap<>(Collections.singletonMap("mapping", "org/apache/lucene/analysis/custom/mapping2.txt"))) + .build(); + assertEquals(new BytesRef("e f c"), analyzer.normalize("dummy", "a b c")); + } + } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/mapping1.txt b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/mapping1.txt new file mode 100644 index 00000000000..40aaf5a27d3 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/mapping1.txt @@ -0,0 +1 @@ +"a" => "e" diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/mapping2.txt b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/mapping2.txt new file mode 100644 index 00000000000..cac0bea0694 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/mapping2.txt @@ -0,0 +1 @@ +"b" => "f" From cc862d8e67f32d5447599d265f5d126541ed92c9 Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Tue, 27 Dec 2016 15:34:12 +0300 Subject: [PATCH 65/83] SOLR-9668: introduce cursorMark='true' for SolrEntityProcessor --- solr/CHANGES.txt | 2 + .../dataimport/SolrEntityProcessor.java | 198 +++++++++++------- .../dataimport/MockSolrEntityProcessor.java | 18 +- .../TestSolrEntityProcessorEndToEnd.java | 27 ++- .../TestSolrEntityProcessorUnit.java | 70 +++++++ 5 files changed, 237 insertions(+), 78 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 59dde90338a..874ac81db88 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -206,6 +206,8 @@ New Features * SOLR-9891: Add mkroot command to bin/solr and bin/solr.cmd (Erick Erickson) +* SOLR-9668,SOLR-7197: introduce cursorMark='true' in SolrEntityProcessor (Yegor Kozlov, Raveendra Yerraguntl via Mikhail Khludnev) + Optimizations ---------------------- * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SolrEntityProcessor.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SolrEntityProcessor.java index 5e62731879e..6d8726f91dc 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SolrEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SolrEntityProcessor.java @@ -16,6 +16,18 @@ */ package org.apache.solr.handler.dataimport; +import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; +import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + import org.apache.http.client.HttpClient; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrQuery; @@ -27,22 +39,12 @@ import org.apache.solr.client.solrj.impl.XMLResponseParser; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.CursorMarkParams; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.lang.invoke.MethodHandles; -import java.net.MalformedURLException; -import java.net.URL; -import java.util.Collection; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; - /** *

    * An implementation of {@link EntityProcessor} which fetches values from a @@ -139,81 +141,53 @@ public class SolrEntityProcessor extends EntityProcessorBase { * The following method changes the rowIterator mutable field. It requires * external synchronization. */ - private void buildIterator() { + protected void buildIterator() { if (rowIterator != null) { SolrDocumentListIterator documentListIterator = (SolrDocumentListIterator) rowIterator; if (!documentListIterator.hasNext() && documentListIterator.hasMoreRows()) { - SolrDocumentList solrDocumentList = doQuery(documentListIterator - .getStart() + documentListIterator.getSize()); - if (solrDocumentList != null) { - rowIterator = new SolrDocumentListIterator(solrDocumentList); - } + nextPage(); } - } else { - SolrDocumentList solrDocumentList = doQuery(0); - if (solrDocumentList != null) { - rowIterator = new SolrDocumentListIterator(solrDocumentList); - } - return; + } else { + Boolean cursor = new Boolean(context + .getResolvedEntityAttribute(CursorMarkParams.CURSOR_MARK_PARAM)); + rowIterator = !cursor ? new SolrDocumentListIterator(new SolrDocumentList()) + : new SolrDocumentListCursor(new SolrDocumentList(), CursorMarkParams.CURSOR_MARK_START); + nextPage(); } } - protected SolrDocumentList doQuery(int start) { - this.queryString = context.getResolvedEntityAttribute(QUERY); - if (this.queryString == null) { - throw new DataImportHandlerException( - DataImportHandlerException.SEVERE, - "SolrEntityProcessor: parameter 'query' is required" - ); - } + protected void nextPage() { + ((SolrDocumentListIterator)rowIterator).doQuery(); + } - String rowsP = context.getResolvedEntityAttribute(CommonParams.ROWS); - if (rowsP != null) { - rows = Integer.parseInt(rowsP); - } - - String fqAsString = context.getResolvedEntityAttribute(CommonParams.FQ); - if (fqAsString != null) { - this.filterQueries = fqAsString.split(","); - } - - String fieldsAsString = context.getResolvedEntityAttribute(CommonParams.FL); - if (fieldsAsString != null) { - this.fields = fieldsAsString.split(","); - } - this.requestHandler = context.getResolvedEntityAttribute(CommonParams.QT); - String timeoutAsString = context.getResolvedEntityAttribute(TIMEOUT); - if (timeoutAsString != null) { - this.timeout = Integer.parseInt(timeoutAsString); - } - - SolrQuery solrQuery = new SolrQuery(queryString); - solrQuery.setRows(rows); - solrQuery.setStart(start); - if (fields != null) { - for (String field : fields) { - solrQuery.addField(field); - } - } - solrQuery.setRequestHandler(requestHandler); - solrQuery.setFilterQueries(filterQueries); - solrQuery.setTimeAllowed(timeout * 1000); + class SolrDocumentListCursor extends SolrDocumentListIterator { - QueryResponse response = null; - try { - response = solrClient.query(solrQuery); - } catch (SolrServerException | IOException e) { - if (ABORT.equals(onError)) { - wrapAndThrow(SEVERE, e); - } else if (SKIP.equals(onError)) { - wrapAndThrow(DataImportHandlerException.SKIP_ROW, e); + private final String cursorMark; + + public SolrDocumentListCursor(SolrDocumentList solrDocumentList, String cursorMark) { + super(solrDocumentList); + this.cursorMark = cursorMark; + } + + @Override + protected void passNextPage(SolrQuery solrQuery) { + String timeoutAsString = context.getResolvedEntityAttribute(TIMEOUT); + if (timeoutAsString != null) { + throw new DataImportHandlerException(SEVERE,"cursorMark can't be used with timeout"); } + + solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark); } - return response == null ? null : response.getResults(); + @Override + protected Iterator> createNextPageIterator(QueryResponse response) { + return + new SolrDocumentListCursor(response.getResults(), + response.getNextCursorMark()) ; + } } - private static class SolrDocumentListIterator implements Iterator> { + class SolrDocumentListIterator implements Iterator> { private final int start; private final int size; @@ -230,6 +204,84 @@ public class SolrEntityProcessor extends EntityProcessorBase { this.size = solrDocumentList.size(); } + protected QueryResponse doQuery() { + SolrEntityProcessor.this.queryString = context.getResolvedEntityAttribute(QUERY); + if (SolrEntityProcessor.this.queryString == null) { + throw new DataImportHandlerException( + DataImportHandlerException.SEVERE, + "SolrEntityProcessor: parameter 'query' is required" + ); + } + + String rowsP = context.getResolvedEntityAttribute(CommonParams.ROWS); + if (rowsP != null) { + rows = Integer.parseInt(rowsP); + } + + String sortParam = context.getResolvedEntityAttribute(CommonParams.SORT); + + String fqAsString = context.getResolvedEntityAttribute(CommonParams.FQ); + if (fqAsString != null) { + SolrEntityProcessor.this.filterQueries = fqAsString.split(","); + } + + String fieldsAsString = context.getResolvedEntityAttribute(CommonParams.FL); + if (fieldsAsString != null) { + SolrEntityProcessor.this.fields = fieldsAsString.split(","); + } + SolrEntityProcessor.this.requestHandler = context.getResolvedEntityAttribute(CommonParams.QT); + + + SolrQuery solrQuery = new SolrQuery(queryString); + solrQuery.setRows(rows); + + if (sortParam!=null) { + solrQuery.setParam(CommonParams.SORT, sortParam); + } + + passNextPage(solrQuery); + + if (fields != null) { + for (String field : fields) { + solrQuery.addField(field); + } + } + solrQuery.setRequestHandler(requestHandler); + solrQuery.setFilterQueries(filterQueries); + + + QueryResponse response = null; + try { + response = solrClient.query(solrQuery); + } catch (SolrServerException | IOException | SolrException e) { + if (ABORT.equals(onError)) { + wrapAndThrow(SEVERE, e); + } else if (SKIP.equals(onError)) { + wrapAndThrow(DataImportHandlerException.SKIP_ROW, e); + } + } + + if (response != null) { + SolrEntityProcessor.this.rowIterator = createNextPageIterator(response); + } + return response; + } + + protected Iterator> createNextPageIterator(QueryResponse response) { + return new SolrDocumentListIterator(response.getResults()); + } + + protected void passNextPage(SolrQuery solrQuery) { + String timeoutAsString = context.getResolvedEntityAttribute(TIMEOUT); + if (timeoutAsString != null) { + SolrEntityProcessor.this.timeout = Integer.parseInt(timeoutAsString); + } + + solrQuery.setTimeAllowed(timeout * 1000); + + solrQuery.setStart(getStart() + getSize()); + } + @Override public boolean hasNext() { return solrDocumentIterator.hasNext(); diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/MockSolrEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/MockSolrEntityProcessor.java index 4ebca306ed8..42e5f7d3e48 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/MockSolrEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/MockSolrEntityProcessor.java @@ -29,16 +29,28 @@ public class MockSolrEntityProcessor extends SolrEntityProcessor { private int queryCount = 0; private int rows; + + private int start = 0; public MockSolrEntityProcessor(List docsData, int rows) { this.docsData = docsData; this.rows = rows; } + //@Override + //protected SolrDocumentList doQuery(int start) { + // queryCount++; + // return getDocs(start, rows); + // } + @Override - protected SolrDocumentList doQuery(int start) { - queryCount++; - return getDocs(start, rows); + protected void buildIterator() { + if (rowIterator==null || (!rowIterator.hasNext() && ((SolrDocumentListIterator)rowIterator).hasMoreRows())){ + queryCount++; + SolrDocumentList docs = getDocs(start, rows); + rowIterator = new SolrDocumentListIterator(docs); + start += docs.size(); + } } private SolrDocumentList getDocs(int start, int rows) { diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorEndToEnd.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorEndToEnd.java index 8ef94c02c76..9e104eeb7a4 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorEndToEnd.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorEndToEnd.java @@ -34,6 +34,8 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; import java.nio.file.Files; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -179,7 +181,7 @@ public class TestSolrEntityProcessorEndToEnd extends AbstractDataImportHandlerTe try { addDocumentsToSolr(generateSolrDocuments(7)); - runFullImport(generateDIHConfig("query='*:*' fl='id' rows='2'", false)); + runFullImport(generateDIHConfig("query='*:*' fl='id' rows='2'"+(random().nextBoolean() ?" cursorMark='true' sort='id asc'":""), false)); } catch (Exception e) { LOG.error(e.getMessage(), e); fail(e.getMessage()); @@ -252,7 +254,8 @@ public class TestSolrEntityProcessorEndToEnd extends AbstractDataImportHandlerTe assertQ(req("*:*"), "//result[@numFound='0']"); try { - runFullImport(generateDIHConfig("query='bogus:3' rows='2' fl='id,desc' onError='abort'", false)); + runFullImport(generateDIHConfig("query='bogus:3' rows='2' fl='id,desc' onError='"+ + (random().nextBoolean() ? "abort" : "justtogetcoverage")+"'", false)); } catch (Exception e) { LOG.error(e.getMessage(), e); fail(e.getMessage()); @@ -260,7 +263,27 @@ public class TestSolrEntityProcessorEndToEnd extends AbstractDataImportHandlerTe assertQ(req("*:*"), "//result[@numFound='0']"); } + + public void testCursorMarkNoSort() throws SolrServerException, IOException { + assertQ(req("*:*"), "//result[@numFound='0']"); + addDocumentsToSolr(generateSolrDocuments(7)); + try { + List errors = Arrays.asList("sort='id'", //wrong sort spec + "", //no sort spec + "sort='id asc' timeout='12345'"); // sort is fine, but set timeout + Collections.shuffle(errors, random()); + String attrs = "query='*:*' rows='2' fl='id,desc' cursorMark='true' " + + errors.get(0); + runFullImport(generateDIHConfig(attrs, + false)); + } catch (Exception e) { + LOG.error(e.getMessage(), e); + fail(e.getMessage()); + } + assertQ(req("*:*"), "//result[@numFound='0']"); + } + private static List> generateSolrDocuments(int num) { List> docList = new ArrayList<>(); for (int i = 1; i <= num; i++) { diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorUnit.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorUnit.java index a8fcbb18cb5..a2a9fffa9ab 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorUnit.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorUnit.java @@ -18,11 +18,23 @@ package org.apache.solr.handler.dataimport; import java.util.*; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.CursorMarkParams; +import org.apache.solr.handler.dataimport.SolrEntityProcessor.SolrDocumentListIterator; +import org.junit.Test; + /** * Unit test of SolrEntityProcessor. A very basic test outside of the DIH. */ public class TestSolrEntityProcessorUnit extends AbstractDataImportHandlerTestCase { + private static final class NoNextMockProcessor extends SolrEntityProcessor { + @Override + protected void nextPage() { + } + } + private static final String ID = "id"; public void testQuery() { @@ -85,6 +97,64 @@ public class TestSolrEntityProcessorUnit extends AbstractDataImportHandlerTestCa processor.destroy(); } } + @Test (expected = DataImportHandlerException.class) + public void testNoQuery() { + SolrEntityProcessor processor = new SolrEntityProcessor(); + + HashMap entityAttrs = new HashMap(){{put(SolrEntityProcessor.SOLR_SERVER,"http://route:66/no");}}; + processor.init(getContext(null, null, null, null, Collections.emptyList(), + entityAttrs)); + try { + processor.buildIterator(); + }finally { + processor.destroy(); + } + } + + public void testPagingQuery() { + SolrEntityProcessor processor = new NoNextMockProcessor() ; + + HashMap entityAttrs = new HashMap(){{ + put(SolrEntityProcessor.SOLR_SERVER,"http://route:66/no"); + if (random().nextBoolean()) { + List noCursor = Arrays.asList("","false",CursorMarkParams.CURSOR_MARK_START);//only 'true' not '*' + Collections.shuffle(noCursor, random()); + put(CursorMarkParams.CURSOR_MARK_PARAM, noCursor.get(0)); + }}}; + processor.init(getContext(null, null, null, null, Collections.emptyList(), + entityAttrs)); + try { + processor.buildIterator(); + SolrQuery query = new SolrQuery(); + ((SolrDocumentListIterator) processor.rowIterator).passNextPage(query); + assertEquals("0", query.get(CommonParams.START)); + assertNull( query.get(CursorMarkParams.CURSOR_MARK_PARAM)); + assertNotNull( query.get(CommonParams.TIME_ALLOWED)); + }finally { + processor.destroy(); + } + } + + public void testCursorQuery() { + SolrEntityProcessor processor = new NoNextMockProcessor() ; + + HashMap entityAttrs = new HashMap(){{ + put(SolrEntityProcessor.SOLR_SERVER,"http://route:66/no"); + put(CursorMarkParams.CURSOR_MARK_PARAM,"true"); + }}; + processor.init(getContext(null, null, null, null, Collections.emptyList(), + entityAttrs)); + try { + processor.buildIterator(); + SolrQuery query = new SolrQuery(); + ((SolrDocumentListIterator) processor.rowIterator).passNextPage(query); + assertNull(query.get(CommonParams.START)); + assertEquals(CursorMarkParams.CURSOR_MARK_START, query.get(CursorMarkParams.CURSOR_MARK_PARAM)); + assertNull( query.get(CommonParams.TIME_ALLOWED)); + }finally { + processor.destroy(); + } + } private List generateUniqueDocs(int numDocs) { List types = new ArrayList<>(); From f3fe487970f1e21300bd556d226461a2d51b00f9 Mon Sep 17 00:00:00 2001 From: Joel Bernstein Date: Fri, 30 Dec 2016 14:34:00 -0500 Subject: [PATCH 66/83] SOLR-9684: Add schedule Streaming Expression --- .../apache/solr/handler/StreamHandler.java | 1 + .../solrj/io/stream/SchedulerStream.java | 161 ++++++++++++++++++ .../solrj/io/stream/StreamExpressionTest.java | 149 ++++++++++++++++ 3 files changed, 311 insertions(+) create mode 100644 solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/SchedulerStream.java diff --git a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java index 13ce6365457..1610fead6fc 100644 --- a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java @@ -140,6 +140,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware, .withFunctionName("fetch", FetchStream.class) .withFunctionName("executor", ExecutorStream.class) .withFunctionName("null", NullStream.class) + .withFunctionName("schedule", SchedulerStream.class) // metrics .withFunctionName("min", MinMetric.class) .withFunctionName("max", MaxMetric.class) diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/SchedulerStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/SchedulerStream.java new file mode 100644 index 00000000000..f8506b9164a --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/SchedulerStream.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.client.solrj.io.stream; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; + +import org.apache.solr.client.solrj.io.Tuple; +import org.apache.solr.client.solrj.io.comp.StreamComparator; +import org.apache.solr.client.solrj.io.stream.expr.Explanation; +import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; +import org.apache.solr.client.solrj.io.stream.expr.Expressible; +import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The scheduler wraps two topics that represent high priority and low priority task queues. + * Each time the scheduler is called it will check to see if there are any high priority tasks in the queue. If there + * are high priority tasks, then the high priority queue will be read until it returns the EOF Tuple. + * + * If there are no tasks in the high priority queue, then the lower priority task queue will be opened and read until the EOF Tuple is + * returned. + * + * The scheduler is designed to be wrapped by the executor function and a daemon function can be used to call the executor iteratively. + **/ + +public class SchedulerStream extends TupleStream implements Expressible { + + private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private PushBackStream highPriorityTasks; + private PushBackStream tasks; + private TupleStream currentStream; + + public SchedulerStream(StreamExpression expression, StreamFactory factory) throws IOException { + // grab all parameters out + List streamExpressions = factory.getExpressionOperandsRepresentingTypes(expression, Expressible.class, TupleStream.class); + + + if(2 != streamExpressions.size()){ + throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting a single stream but found %d",expression, streamExpressions.size())); + } + + TupleStream stream1 = factory.constructStream(streamExpressions.get(0)); + TupleStream stream2 = factory.constructStream(streamExpressions.get(1)); + + if(!(stream1 instanceof TopicStream) || !(stream2 instanceof TopicStream)) { + throw new IOException("The scheduler expects both stream parameters to be topics."); + } + + init(new PushBackStream(stream1), new PushBackStream(stream2)); + } + + private void init(PushBackStream stream1, PushBackStream stream2) throws IOException{ + this.highPriorityTasks = stream1; + this.tasks = stream2; + } + + @Override + public StreamExpression toExpression(StreamFactory factory) throws IOException { + return toExpression(factory, true); + } + + private StreamExpression toExpression(StreamFactory factory, boolean includeStreams) throws IOException { + + // function name + StreamExpression expression = new StreamExpression(factory.getFunctionName(this.getClass())); + + // stream + if(includeStreams) { + if (highPriorityTasks instanceof Expressible) { + expression.addParameter(((Expressible) highPriorityTasks).toExpression(factory)); + } else { + throw new IOException("The SchedulerStream contains a non-expressible TupleStream - it cannot be converted to an expression"); + } + + if (tasks instanceof Expressible) { + expression.addParameter(((Expressible) tasks).toExpression(factory)); + } else { + throw new IOException("The SchedulerStream contains a non-expressible TupleStream - it cannot be converted to an expression"); + } + } + + return expression; + } + + @Override + public Explanation toExplanation(StreamFactory factory) throws IOException { + + return new StreamExplanation(getStreamNodeId().toString()) + .withChildren(new Explanation[]{ + highPriorityTasks.toExplanation(factory), tasks.toExplanation(factory) + }) + .withFunctionName(factory.getFunctionName(this.getClass())) + .withImplementingClass(this.getClass().getName()) + .withExpressionType(ExpressionType.STREAM_DECORATOR) + .withExpression(toExpression(factory, false).toString()); + } + + public void setStreamContext(StreamContext streamContext) { + this.highPriorityTasks.setStreamContext(streamContext); + tasks.setStreamContext(streamContext); + } + + public List children() { + List l = new ArrayList(); + l.add(highPriorityTasks); + l.add(tasks); + return l; + } + + public void open() throws IOException { + highPriorityTasks.open(); + Tuple tuple = highPriorityTasks.read(); + if(tuple.EOF) { + highPriorityTasks.close(); + tasks.open(); + currentStream = tasks; + } else { + highPriorityTasks.pushBack(tuple); + currentStream = highPriorityTasks; + } + } + + public void close() throws IOException { + currentStream.close(); + } + + public Tuple read() throws IOException { + return currentStream.read(); + } + + public StreamComparator getStreamSort(){ + return null; + } + + public int getCost() { + return 0; + } +} \ No newline at end of file diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java index 0c9d5b3aadf..3bfe12996ae 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java @@ -2815,6 +2815,155 @@ public class StreamExpressionTest extends SolrCloudTestCase { } } + @Test + public void testSchedulerStream() throws Exception { + Assume.assumeTrue(!useAlias); + + new UpdateRequest() + .add(id, "0", "a_s", "hello1", "a_i", "0", "a_f", "1") + .add(id, "2", "a_s", "hello1", "a_i", "2", "a_f", "2") + .add(id, "3", "a_s", "hello1", "a_i", "3", "a_f", "3") + .add(id, "4", "a_s", "hello1", "a_i", "4", "a_f", "4") + .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "5") + .add(id, "5", "a_s", "hello", "a_i", "10", "a_f", "6") + .add(id, "6", "a_s", "hello", "a_i", "11", "a_f", "7") + .add(id, "7", "a_s", "hello", "a_i", "12", "a_f", "8") + .add(id, "8", "a_s", "hello", "a_i", "13", "a_f", "9") + .add(id, "9", "a_s", "hello1", "a_i", "14", "a_f", "10") + .commit(cluster.getSolrClient(), COLLECTIONORALIAS); + + StreamFactory factory = new StreamFactory() + .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) + .withFunctionName("topic", TopicStream.class) + .withFunctionName("schedule", SchedulerStream.class); + + StreamExpression expression; + TupleStream stream; + List tuples; + + SolrClientCache cache = new SolrClientCache(); + + try { + FieldComparator comp = new FieldComparator("a_i", ComparatorOrder.ASCENDING); + + expression = StreamExpressionParser.parse("schedule(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0)," + + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0))"); + stream = factory.constructStream(expression); + StreamContext context = new StreamContext(); + context.setSolrClientCache(cache); + stream.setStreamContext(context); + tuples = getTuples(stream); + + Collections.sort(tuples, comp); + //The tuples from the first topic (high priority) should be returned. + + assertEquals(tuples.size(), 4); + assertOrder(tuples, 5, 6, 7, 8); + + expression = StreamExpressionParser.parse("schedule(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0)," + + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0))"); + stream = factory.constructStream(expression); + context = new StreamContext(); + context.setSolrClientCache(cache); + stream.setStreamContext(context); + tuples = getTuples(stream); + Collections.sort(tuples, comp); + + //The Tuples from the second topic (Low priority) should be returned. + assertEquals(tuples.size(), 6); + assertOrder(tuples, 0, 1, 2, 3, 4, 9); + + expression = StreamExpressionParser.parse("schedule(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0)," + + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0))"); + stream = factory.constructStream(expression); + context = new StreamContext(); + context.setSolrClientCache(cache); + stream.setStreamContext(context); + tuples = getTuples(stream); + + //Both queus are empty. + assertEquals(tuples.size(), 0); + + } finally { + cache.close(); + } + } + + @Test + public void testParallelSchedulerStream() throws Exception { + Assume.assumeTrue(!useAlias); + + new UpdateRequest() + .add(id, "0", "a_s", "hello1", "a_i", "0", "a_f", "1") + .add(id, "2", "a_s", "hello1", "a_i", "2", "a_f", "2") + .add(id, "3", "a_s", "hello1", "a_i", "3", "a_f", "3") + .add(id, "4", "a_s", "hello1", "a_i", "4", "a_f", "4") + .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "5") + .add(id, "5", "a_s", "hello", "a_i", "10", "a_f", "6") + .add(id, "6", "a_s", "hello", "a_i", "11", "a_f", "7") + .add(id, "7", "a_s", "hello", "a_i", "12", "a_f", "8") + .add(id, "8", "a_s", "hello", "a_i", "13", "a_f", "9") + .add(id, "9", "a_s", "hello1", "a_i", "14", "a_f", "10") + .commit(cluster.getSolrClient(), COLLECTIONORALIAS); + + StreamFactory factory = new StreamFactory() + .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) + .withFunctionName("topic", TopicStream.class) + .withFunctionName("parallel", ParallelStream.class) + .withFunctionName("schedule", SchedulerStream.class); + + StreamExpression expression; + TupleStream stream; + List tuples; + + SolrClientCache cache = new SolrClientCache(); + + try { + FieldComparator comp = new FieldComparator("a_i", ComparatorOrder.ASCENDING); + + expression = StreamExpressionParser.parse("parallel(collection1, workers=2, sort=\"_version_ asc\", schedule(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0, partitionKeys=id)," + + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0, partitionKeys=id)))"); + stream = factory.constructStream(expression); + StreamContext context = new StreamContext(); + context.setSolrClientCache(cache); + stream.setStreamContext(context); + tuples = getTuples(stream); + + Collections.sort(tuples, comp); + //The tuples from the first topic (high priority) should be returned. + + assertEquals(tuples.size(), 4); + assertOrder(tuples, 5, 6, 7, 8); + + expression = StreamExpressionParser.parse("parallel(collection1, workers=2, sort=\"_version_ asc\", schedule(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0, partitionKeys=id)," + + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0, partitionKeys=id)))"); + stream = factory.constructStream(expression); + context = new StreamContext(); + context.setSolrClientCache(cache); + stream.setStreamContext(context); + tuples = getTuples(stream); + Collections.sort(tuples, comp); + + //The Tuples from the second topic (Low priority) should be returned. + assertEquals(tuples.size(), 6); + assertOrder(tuples, 0, 1, 2, 3, 4, 9); + + expression = StreamExpressionParser.parse("parallel(collection1, workers=2, sort=\"_version_ asc\", schedule(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0, partitionKeys=id)," + + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0, partitionKeys=id)))"); + stream = factory.constructStream(expression); + context = new StreamContext(); + context.setSolrClientCache(cache); + stream.setStreamContext(context); + tuples = getTuples(stream); + + //Both queus are empty. + assertEquals(tuples.size(), 0); + + } finally { + cache.close(); + } + } + @Test public void testParallelTopicStream() throws Exception { From 36a691c50d680d1c6977e6185448e06cb21f653d Mon Sep 17 00:00:00 2001 From: Joel Bernstein Date: Fri, 30 Dec 2016 15:38:28 -0500 Subject: [PATCH 67/83] SOLR-9684: Update CHANGES.txt --- solr/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 874ac81db88..6a99617c01a 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -208,6 +208,8 @@ New Features * SOLR-9668,SOLR-7197: introduce cursorMark='true' in SolrEntityProcessor (Yegor Kozlov, Raveendra Yerraguntl via Mikhail Khludnev) +* SOLR-9684: Add schedule Streaming Expression (Joel Bernstein) + Optimizations ---------------------- * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have From 61676188d7f592f697933b6051806c0bc55b406a Mon Sep 17 00:00:00 2001 From: Joel Bernstein Date: Fri, 30 Dec 2016 19:54:00 -0500 Subject: [PATCH 68/83] SOLR-9495: AIOBE with confusing message for incomplete sort spec in Streaming Expression --- .../solr/client/solrj/io/stream/CloudSolrStream.java | 4 ++++ .../client/solrj/io/stream/StreamExpressionTest.java | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CloudSolrStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CloudSolrStream.java index 0580122bf38..f177585e2a3 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CloudSolrStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CloudSolrStream.java @@ -332,6 +332,10 @@ public class CloudSolrStream extends TupleStream implements Expressible { String[] spec = s.trim().split("\\s+"); //This should take into account spaces in the sort spec. + if (spec.length != 2) { + throw new IOException("Invalid sort spec:" + s); + } + String fieldName = spec[0].trim(); String order = spec[1].trim(); diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java index 3bfe12996ae..936d42fe34f 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java @@ -162,6 +162,16 @@ public class StreamExpressionTest extends SolrCloudTestCase { assertTrue(e.getMessage().contains("fl param expected for search function")); } + + try { + expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=\"blah\", fl=\"id, a_f\", sort=\"a_f\")"); + stream = new CloudSolrStream(expression, factory); + tuples = getTuples(stream); + throw new Exception("Should be an exception here"); + } catch(Exception e) { + assertTrue(e.getMessage().contains("Invalid sort spec")); + } + } @Test From 832d02bf494c8fea02398db31b55de4314f2be8a Mon Sep 17 00:00:00 2001 From: Joel Bernstein Date: Fri, 30 Dec 2016 20:39:57 -0500 Subject: [PATCH 69/83] SOLR-9495: Update CHANGES.txt --- solr/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 6a99617c01a..02167f31707 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -306,6 +306,8 @@ Bug Fixes * SOLR-9900: fix false positives on range queries with ReversedWildcardFilterFactory (Yonik Seeley via Mikhail Khludnev) +* SOLR-9495: AIOBE with confusing message for incomplete sort spec in Streaming Expression (Gus Heck, Joel Bernstein) + Other Changes ---------------------- From fb2800b1497a67493c7f8944bda22f590bb9cc6b Mon Sep 17 00:00:00 2001 From: Andrzej Bialecki Date: Sat, 31 Dec 2016 20:46:32 +0100 Subject: [PATCH 70/83] SOLR-9880 Fix concurrency bugs in tests. --- .../apache/solr/metrics/reporters/SolrGangliaReporterTest.java | 3 ++- .../solr/metrics/reporters/SolrGraphiteReporterTest.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGangliaReporterTest.java b/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGangliaReporterTest.java index b5b0f858395..d9675efd5dd 100644 --- a/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGangliaReporterTest.java +++ b/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGangliaReporterTest.java @@ -73,7 +73,8 @@ public class SolrGangliaReporterTest extends SolrTestCaseJ4 { gangliaReporter.start(); Thread.sleep(5000); assertTrue(names.size() >= 3); - for (String name : names) { + String[] frozenNames = (String[])names.toArray(new String[names.size()]); + for (String name : frozenNames) { assertTrue(name, name.startsWith("test.solr.node.cores.")); } } diff --git a/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGraphiteReporterTest.java b/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGraphiteReporterTest.java index 6773e0ca0e3..3d1c48218d7 100644 --- a/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGraphiteReporterTest.java +++ b/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGraphiteReporterTest.java @@ -69,7 +69,8 @@ public class SolrGraphiteReporterTest extends SolrTestCaseJ4 { assertTrue(reporter instanceof SolrGraphiteReporter); Thread.sleep(5000); assertTrue(mock.lines.size() >= 3); - for (String line : mock.lines) { + String[] frozenLines = (String[])mock.lines.toArray(new String[mock.lines.size()]); + for (String line : frozenLines) { assertTrue(line, line.startsWith("test.solr.node.cores.")); } } finally { From 93562da610bf8756351be7720c69872bc1cea727 Mon Sep 17 00:00:00 2001 From: anshum Date: Sun, 1 Jan 2017 15:31:02 -0800 Subject: [PATCH 71/83] SOLR-9154: Fix DirectSolrSpellChecker to work when added through the Config API --- solr/CHANGES.txt | 4 +++- .../solr/spelling/DirectSolrSpellChecker.java | 18 +++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 02167f31707..71336380d01 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -299,7 +299,7 @@ Bug Fixes * SOLR-9699,SOLR-4668: fix exception from core status in parallel with core reload (Mikhail Khludnev) -* SOLR-9859: replication.properties cannot be updated after being written and neither replication.properties or +* SOLR-9859: replication.properties cannot be updated after being written and neither replication.properties or index.properties are durable in the face of a crash. (Pushkar Raste, Chris de Kok, Cao Manh Dat, Mark Miller) * SOLR-9901: Implement move in HdfsDirectoryFactory. (Mark Miller) @@ -308,6 +308,8 @@ Bug Fixes * SOLR-9495: AIOBE with confusing message for incomplete sort spec in Streaming Expression (Gus Heck, Joel Bernstein) +* SOLR-9154: Fix DirectSolrSpellChecker to work when added through the Config API. (Anshum Gupta) + Other Changes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java index bbde74ae9f4..15fee72c3b2 100644 --- a/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java +++ b/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java @@ -29,6 +29,7 @@ import org.apache.lucene.search.spell.StringDistance; import org.apache.lucene.search.spell.SuggestWord; import org.apache.lucene.search.spell.SuggestWordFrequencyComparator; import org.apache.lucene.search.spell.SuggestWordQueue; +import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.search.SolrIndexSearcher; @@ -93,6 +94,9 @@ public class DirectSolrSpellChecker extends SolrSpellChecker { @Override public String init(NamedList config, SolrCore core) { + + SolrParams params = SolrParams.toSolrParams(config); + LOG.info("init: " + config); String name = super.init(config, core); @@ -113,37 +117,37 @@ public class DirectSolrSpellChecker extends SolrSpellChecker { sd = core.getResourceLoader().newInstance(distClass, StringDistance.class); float minAccuracy = DEFAULT_ACCURACY; - Float accuracy = (Float) config.get(ACCURACY); + Float accuracy = params.getFloat(ACCURACY); if (accuracy != null) minAccuracy = accuracy; int maxEdits = DEFAULT_MAXEDITS; - Integer edits = (Integer) config.get(MAXEDITS); + Integer edits = params.getInt(MAXEDITS); if (edits != null) maxEdits = edits; int minPrefix = DEFAULT_MINPREFIX; - Integer prefix = (Integer) config.get(MINPREFIX); + Integer prefix = params.getInt(MINPREFIX); if (prefix != null) minPrefix = prefix; int maxInspections = DEFAULT_MAXINSPECTIONS; - Integer inspections = (Integer) config.get(MAXINSPECTIONS); + Integer inspections = params.getInt(MAXINSPECTIONS); if (inspections != null) maxInspections = inspections; float minThreshold = DEFAULT_THRESHOLD_TOKEN_FREQUENCY; - Float threshold = (Float) config.get(THRESHOLD_TOKEN_FREQUENCY); + Float threshold = params.getFloat(THRESHOLD_TOKEN_FREQUENCY); if (threshold != null) minThreshold = threshold; int minQueryLength = DEFAULT_MINQUERYLENGTH; - Integer queryLength = (Integer) config.get(MINQUERYLENGTH); + Integer queryLength = params.getInt(MINQUERYLENGTH); if (queryLength != null) minQueryLength = queryLength; float maxQueryFrequency = DEFAULT_MAXQUERYFREQUENCY; - Float queryFreq = (Float) config.get(MAXQUERYFREQUENCY); + Float queryFreq = params.getFloat(MAXQUERYFREQUENCY); if (queryFreq != null) maxQueryFrequency = queryFreq; From 0999f6779a3341af072d31162a2c88cf1eb8c5d4 Mon Sep 17 00:00:00 2001 From: Joel Bernstein Date: Mon, 2 Jan 2017 11:08:44 -0500 Subject: [PATCH 72/83] SOLR-9684: Rename schedule function to priority --- solr/CHANGES.txt | 2 +- .../apache/solr/handler/StreamHandler.java | 2 +- ...hedulerStream.java => PriorityStream.java} | 8 ++++---- .../solrj/io/stream/StreamExpressionTest.java | 20 +++++++++---------- 4 files changed, 16 insertions(+), 16 deletions(-) rename solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/{SchedulerStream.java => PriorityStream.java} (93%) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 71336380d01..6326e547113 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -208,7 +208,7 @@ New Features * SOLR-9668,SOLR-7197: introduce cursorMark='true' in SolrEntityProcessor (Yegor Kozlov, Raveendra Yerraguntl via Mikhail Khludnev) -* SOLR-9684: Add schedule Streaming Expression (Joel Bernstein) +* SOLR-9684: Add priority Streaming Expression (Joel Bernstein, David Smiley) Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java index 1610fead6fc..661704f401e 100644 --- a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java @@ -140,7 +140,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware, .withFunctionName("fetch", FetchStream.class) .withFunctionName("executor", ExecutorStream.class) .withFunctionName("null", NullStream.class) - .withFunctionName("schedule", SchedulerStream.class) + .withFunctionName("priority", PriorityStream.class) // metrics .withFunctionName("min", MinMetric.class) .withFunctionName("max", MaxMetric.class) diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/SchedulerStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/PriorityStream.java similarity index 93% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/SchedulerStream.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/PriorityStream.java index f8506b9164a..c5faf41162b 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/SchedulerStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/PriorityStream.java @@ -35,8 +35,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * The scheduler wraps two topics that represent high priority and low priority task queues. - * Each time the scheduler is called it will check to see if there are any high priority tasks in the queue. If there + * The priority function wraps two topics that represent high priority and low priority task queues. + * Each time the priority function is called it will check to see if there are any high priority tasks in the queue. If there * are high priority tasks, then the high priority queue will be read until it returns the EOF Tuple. * * If there are no tasks in the high priority queue, then the lower priority task queue will be opened and read until the EOF Tuple is @@ -45,7 +45,7 @@ import org.slf4j.LoggerFactory; * The scheduler is designed to be wrapped by the executor function and a daemon function can be used to call the executor iteratively. **/ -public class SchedulerStream extends TupleStream implements Expressible { +public class PriorityStream extends TupleStream implements Expressible { private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @@ -53,7 +53,7 @@ public class SchedulerStream extends TupleStream implements Expressible { private PushBackStream tasks; private TupleStream currentStream; - public SchedulerStream(StreamExpression expression, StreamFactory factory) throws IOException { + public PriorityStream(StreamExpression expression, StreamFactory factory) throws IOException { // grab all parameters out List streamExpressions = factory.getExpressionOperandsRepresentingTypes(expression, Expressible.class, TupleStream.class); diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java index 936d42fe34f..1316af4fc53 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java @@ -2826,7 +2826,7 @@ public class StreamExpressionTest extends SolrCloudTestCase { } @Test - public void testSchedulerStream() throws Exception { + public void testPriorityStream() throws Exception { Assume.assumeTrue(!useAlias); new UpdateRequest() @@ -2845,7 +2845,7 @@ public class StreamExpressionTest extends SolrCloudTestCase { StreamFactory factory = new StreamFactory() .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withFunctionName("topic", TopicStream.class) - .withFunctionName("schedule", SchedulerStream.class); + .withFunctionName("priority", PriorityStream.class); StreamExpression expression; TupleStream stream; @@ -2856,7 +2856,7 @@ public class StreamExpressionTest extends SolrCloudTestCase { try { FieldComparator comp = new FieldComparator("a_i", ComparatorOrder.ASCENDING); - expression = StreamExpressionParser.parse("schedule(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0)," + + expression = StreamExpressionParser.parse("priority(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0)," + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0))"); stream = factory.constructStream(expression); StreamContext context = new StreamContext(); @@ -2870,7 +2870,7 @@ public class StreamExpressionTest extends SolrCloudTestCase { assertEquals(tuples.size(), 4); assertOrder(tuples, 5, 6, 7, 8); - expression = StreamExpressionParser.parse("schedule(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0)," + + expression = StreamExpressionParser.parse("priority(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0)," + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0))"); stream = factory.constructStream(expression); context = new StreamContext(); @@ -2883,7 +2883,7 @@ public class StreamExpressionTest extends SolrCloudTestCase { assertEquals(tuples.size(), 6); assertOrder(tuples, 0, 1, 2, 3, 4, 9); - expression = StreamExpressionParser.parse("schedule(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0)," + + expression = StreamExpressionParser.parse("priority(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0)," + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0))"); stream = factory.constructStream(expression); context = new StreamContext(); @@ -2900,7 +2900,7 @@ public class StreamExpressionTest extends SolrCloudTestCase { } @Test - public void testParallelSchedulerStream() throws Exception { + public void testParallelPriorityStream() throws Exception { Assume.assumeTrue(!useAlias); new UpdateRequest() @@ -2920,7 +2920,7 @@ public class StreamExpressionTest extends SolrCloudTestCase { .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) .withFunctionName("topic", TopicStream.class) .withFunctionName("parallel", ParallelStream.class) - .withFunctionName("schedule", SchedulerStream.class); + .withFunctionName("priority", PriorityStream.class); StreamExpression expression; TupleStream stream; @@ -2931,7 +2931,7 @@ public class StreamExpressionTest extends SolrCloudTestCase { try { FieldComparator comp = new FieldComparator("a_i", ComparatorOrder.ASCENDING); - expression = StreamExpressionParser.parse("parallel(collection1, workers=2, sort=\"_version_ asc\", schedule(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0, partitionKeys=id)," + + expression = StreamExpressionParser.parse("parallel(collection1, workers=2, sort=\"_version_ asc\", priority(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0, partitionKeys=id)," + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0, partitionKeys=id)))"); stream = factory.constructStream(expression); StreamContext context = new StreamContext(); @@ -2945,7 +2945,7 @@ public class StreamExpressionTest extends SolrCloudTestCase { assertEquals(tuples.size(), 4); assertOrder(tuples, 5, 6, 7, 8); - expression = StreamExpressionParser.parse("parallel(collection1, workers=2, sort=\"_version_ asc\", schedule(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0, partitionKeys=id)," + + expression = StreamExpressionParser.parse("parallel(collection1, workers=2, sort=\"_version_ asc\", priority(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0, partitionKeys=id)," + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0, partitionKeys=id)))"); stream = factory.constructStream(expression); context = new StreamContext(); @@ -2958,7 +2958,7 @@ public class StreamExpressionTest extends SolrCloudTestCase { assertEquals(tuples.size(), 6); assertOrder(tuples, 0, 1, 2, 3, 4, 9); - expression = StreamExpressionParser.parse("parallel(collection1, workers=2, sort=\"_version_ asc\", schedule(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0, partitionKeys=id)," + + expression = StreamExpressionParser.parse("parallel(collection1, workers=2, sort=\"_version_ asc\", priority(topic(collection1, collection1, q=\"a_s:hello\", fl=\"id,a_i\", id=1000000, initialCheckpoint=0, partitionKeys=id)," + "topic(collection1, collection1, q=\"a_s:hello1\", fl=\"id,a_i\", id=2000000, initialCheckpoint=0, partitionKeys=id)))"); stream = factory.constructStream(expression); context = new StreamContext(); From 3988532d26a50b1f3cf51e1d0009a0754cfd6b57 Mon Sep 17 00:00:00 2001 From: Noble Paul Date: Tue, 3 Jan 2017 17:52:08 +1030 Subject: [PATCH 73/83] SOLR-9906-Use better check to validate if node recovered via PeerSync or Replication --- .../java/org/apache/solr/util/TimeOut.java | 13 +++- .../LeaderFailureAfterFreshStartTest.java | 68 +++++++++++-------- .../solr/cloud/PeerSyncReplicationTest.java | 55 +++++++-------- .../solr/cloud/AbstractDistribZkTestBase.java | 32 +++++---- 4 files changed, 89 insertions(+), 79 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/util/TimeOut.java b/solr/core/src/java/org/apache/solr/util/TimeOut.java index f823b7e3adf..fd91045be11 100644 --- a/solr/core/src/java/org/apache/solr/util/TimeOut.java +++ b/solr/core/src/java/org/apache/solr/util/TimeOut.java @@ -18,12 +18,15 @@ package org.apache.solr.util; import java.util.concurrent.TimeUnit; +import static java.util.concurrent.TimeUnit.NANOSECONDS; + public class TimeOut { - private final long timeoutAt; + private final long timeoutAt, startTime; public TimeOut(long interval, TimeUnit unit) { - this.timeoutAt = System.nanoTime() + TimeUnit.NANOSECONDS.convert(interval, unit); + startTime = System.nanoTime(); + this.timeoutAt = startTime + NANOSECONDS.convert(interval, unit); } public boolean hasTimedOut() { @@ -31,6 +34,10 @@ public class TimeOut { } public long timeLeft(TimeUnit unit) { - return unit.convert(timeoutAt - System.nanoTime(), TimeUnit.NANOSECONDS); + return unit.convert(timeoutAt - System.nanoTime(), NANOSECONDS); + } + + public long timeElapsed(TimeUnit unit) { + return unit.convert(System.nanoTime() - startTime, NANOSECONDS); } } diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderFailureAfterFreshStartTest.java b/solr/core/src/test/org/apache/solr/cloud/LeaderFailureAfterFreshStartTest.java index ef213860c63..77dd6b6cbde 100644 --- a/solr/core/src/test/org/apache/solr/cloud/LeaderFailureAfterFreshStartTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/LeaderFailureAfterFreshStartTest.java @@ -19,6 +19,8 @@ package org.apache.solr.cloud; import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; @@ -26,6 +28,7 @@ import java.util.List; import java.util.Set; import java.util.stream.Collectors; +import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang.RandomStringUtils; import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.solr.client.solrj.SolrServerException; @@ -38,12 +41,13 @@ import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.handler.ReplicationHandler; +import org.apache.solr.util.TimeOut; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static java.util.Collections.singletonList; +import static java.util.concurrent.TimeUnit.SECONDS; /** * @@ -127,52 +131,53 @@ public class LeaderFailureAfterFreshStartTest extends AbstractFullDistribZkTestB waitForThingsToLevelOut(30); checkShardConsistency(false, true); + + // bring down the other node and index a few docs; so the leader and other node segments diverge + forceNodeFailures(singletonList(secondNode)); + for (int i = 0; i < 10; i++) { + indexDoc(id, docId, i1, 50, tlong, 50, t1, + "document number " + docId++); + if(i % 2 == 0) { + commit(); + } + } + commit(); + restartNodes(singletonList(secondNode)); // start the freshNode - ChaosMonkey.start(freshNode.jetty); - nodesDown.remove(freshNode); - - waitTillNodesActive(); - waitForThingsToLevelOut(30); - - //TODO check how to see if fresh node went into recovery (may be check count for replication handler on new leader) - - long numRequestsBefore = (Long) secondNode.jetty - .getCoreContainer() - .getCores() - .iterator() - .next() - .getRequestHandler(ReplicationHandler.PATH) - .getStatistics().get("requests"); + restartNodes(singletonList(freshNode)); + String replicationProperties = (String) freshNode.jetty.getSolrHome() + "/cores/" + DEFAULT_TEST_COLLECTION_NAME + "/data/replication.properties"; + String md5 = DigestUtils.md5Hex(Files.readAllBytes(Paths.get(replicationProperties))); + // shutdown the original leader log.info("Now shutting down initial leader"); forceNodeFailures(singletonList(initialLeaderJetty)); - waitForNewLeader(cloudClient, "shard1", (Replica)initialLeaderJetty.client.info , 15); + waitForNewLeader(cloudClient, "shard1", (Replica)initialLeaderJetty.client.info , new TimeOut(15, SECONDS)); + waitTillNodesActive(); log.info("Updating mappings from zk"); updateMappingsFromZk(jettys, clients, true); - - long numRequestsAfter = (Long) secondNode.jetty - .getCoreContainer() - .getCores() - .iterator() - .next() - .getRequestHandler(ReplicationHandler.PATH) - .getStatistics().get("requests"); - - assertEquals("Node went into replication", numRequestsBefore, numRequestsAfter); + assertEquals("Node went into replication", md5, DigestUtils.md5Hex(Files.readAllBytes(Paths.get(replicationProperties)))); success = true; } finally { System.clearProperty("solr.disableFingerprint"); } } + + private void restartNodes(List nodesToRestart) throws Exception { + for (CloudJettyRunner node : nodesToRestart) { + chaosMonkey.start(node.jetty); + nodesDown.remove(node); + } + waitTillNodesActive(); + checkShardConsistency(false, true); + } private void forceNodeFailures(List replicasToShutDown) throws Exception { for (CloudJettyRunner replicaToShutDown : replicasToShutDown) { chaosMonkey.killJetty(replicaToShutDown); - waitForNoShardInconsistency(); } int totalDown = 0; @@ -205,8 +210,13 @@ public class LeaderFailureAfterFreshStartTest extends AbstractFullDistribZkTestB Collection replicas = slice.getReplicas(); boolean allActive = true; + Collection nodesDownNames = nodesDown.stream() + .map(n -> n.coreNodeName) + .collect(Collectors.toList()); + Collection replicasToCheck = null; - replicasToCheck = replicas.stream().filter(r -> nodesDown.contains(r.getName())) + replicasToCheck = replicas.stream() + .filter(r -> !nodesDownNames.contains(r.getName())) .collect(Collectors.toList()); for (Replica replica : replicasToCheck) { diff --git a/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java b/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java index e00ea3c4811..4084ad796ab 100644 --- a/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java @@ -20,6 +20,8 @@ package org.apache.solr.cloud; import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -41,15 +43,16 @@ import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.handler.ReplicationHandler; +import org.apache.solr.util.TimeOut; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static java.util.Collections.singletonList; +import static java.util.concurrent.TimeUnit.SECONDS; /** - * Test sync peer sync when a node restarts and documents are indexed when node was down. + * Test PeerSync when a node restarts and documents are indexed when node was down. * * This test is modeled after SyncSliceTest */ @@ -149,12 +152,12 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase { log.info("Now shutting down initial leader"); forceNodeFailures(singletonList(initialLeaderJetty)); log.info("Updating mappings from zk"); - waitForNewLeader(cloudClient, "shard1", (Replica) initialLeaderJetty.client.info, 15); + waitForNewLeader(cloudClient, "shard1", (Replica) initialLeaderJetty.client.info, new TimeOut(15, SECONDS)); updateMappingsFromZk(jettys, clients, true); assertEquals("PeerSynced node did not become leader", nodePeerSynced, shardToLeaderJetty.get("shard1")); // bring up node that was down all along, and let it PeerSync from the node that was forced to PeerSynce - bringUpDeadNodeAndEnsureNoReplication(shardToLeaderJetty.get("shard1"), neverLeader, false); + bringUpDeadNodeAndEnsureNoReplication(neverLeader, false); waitTillNodesActive(); checkShardConsistency(false, true); @@ -199,7 +202,6 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase { private void forceNodeFailures(List replicasToShutDown) throws Exception { for (CloudJettyRunner replicaToShutDown : replicasToShutDown) { chaosMonkey.killJetty(replicaToShutDown); - waitForNoShardInconsistency(); } int totalDown = 0; @@ -218,8 +220,6 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase { assertEquals(getShardCount() - totalDown, jetties.size()); nodesDown.addAll(replicasToShutDown); - - Thread.sleep(3000); } @@ -241,26 +241,17 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase { "document number " + docId++); commit(); - bringUpDeadNodeAndEnsureNoReplication(leaderJetty, replicaToShutDown, disableFingerprint); + bringUpDeadNodeAndEnsureNoReplication(replicaToShutDown, disableFingerprint); return replicaToShutDown; } - - - private void bringUpDeadNodeAndEnsureNoReplication(CloudJettyRunner leaderJetty, CloudJettyRunner nodeToBringUp, - boolean disableFingerprint) throws Exception { + + private void bringUpDeadNodeAndEnsureNoReplication(CloudJettyRunner nodeToBringUp, boolean disableFingerprint) + throws Exception { // disable fingerprint check if needed System.setProperty("solr.disableFingerprint", String.valueOf(disableFingerprint)); - long numRequestsBefore = (Long) leaderJetty.jetty - .getCoreContainer() - .getCores() - .iterator() - .next() - .getRequestHandler(ReplicationHandler.PATH) - .getStatistics().get("requests"); - indexInBackground(50); // bring back dead node and ensure it recovers @@ -279,15 +270,9 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase { long cloudClientDocs = cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound(); assertEquals(docId, cloudClientDocs); - long numRequestsAfter = (Long) leaderJetty.jetty - .getCoreContainer() - .getCores() - .iterator() - .next() - .getRequestHandler(ReplicationHandler.PATH) - .getStatistics().get("requests"); - - assertEquals("PeerSync failed. Had to fail back to replication", numRequestsBefore, numRequestsAfter); + // if there was no replication, we should not have replication.properties file + String replicationProperties = nodeToBringUp.jetty.getSolrHome() + "/cores/" + DEFAULT_TEST_COLLECTION_NAME + "/data/replication.properties"; + assertTrue("PeerSync failed. Had to fail back to replication", Files.notExists(Paths.get(replicationProperties))); } @@ -302,9 +287,15 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase { Collection replicas = slice.getReplicas(); boolean allActive = true; - Collection replicasToCheck = null; - replicasToCheck = replicas.stream().filter(r -> nodesDown.contains(r.getName())) - .collect(Collectors.toList()); + Collection nodesDownNames = + nodesDown.stream() + .map(n -> n.coreNodeName) + .collect(Collectors.toList()); + + Collection replicasToCheck = + replicas.stream() + .filter(r -> !nodesDownNames.contains(r.getName())) + .collect(Collectors.toList()); for (Replica replica : replicasToCheck) { if (!clusterState.liveNodesContain(replica.getNodeName()) || replica.getState() != Replica.State.ACTIVE) { diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java index d04d9968b6e..7f991a4842c 100644 --- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java +++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java @@ -19,6 +19,7 @@ package org.apache.solr.cloud; import java.io.File; import java.lang.invoke.MethodHandles; import java.util.Map; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.io.FileUtils; @@ -29,16 +30,20 @@ import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.cloud.Slice.State; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.ZkStateReader; -import org.apache.solr.common.cloud.Slice.State; import org.apache.solr.core.Diagnostics; import org.apache.solr.core.MockDirectoryFactory; +import org.apache.solr.util.TimeOut; import org.apache.zookeeper.KeeperException; import org.junit.BeforeClass; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static java.util.concurrent.TimeUnit.SECONDS; + public abstract class AbstractDistribZkTestBase extends BaseDistributedSearchTestCase { private static final String REMOVE_VERSION_FIELD = "remove.version.field"; @@ -226,31 +231,28 @@ public abstract class AbstractDistribZkTestBase extends BaseDistributedSearchTes log.info("Collection has disappeared - collection: " + collection); } - static void waitForNewLeader(CloudSolrClient cloudClient, String shardName, Replica oldLeader, int maxWaitInSecs) + static void waitForNewLeader(CloudSolrClient cloudClient, String shardName, Replica oldLeader, TimeOut timeOut) throws Exception { - log.info("Will wait for a node to become leader for {} secs", maxWaitInSecs); - boolean waitForLeader = true; - int i = 0; + log.info("Will wait for a node to become leader for {} secs", timeOut.timeLeft(SECONDS)); ZkStateReader zkStateReader = cloudClient.getZkStateReader(); zkStateReader.forceUpdateCollection(DEFAULT_COLLECTION); - - while(waitForLeader) { + + for (; ; ) { ClusterState clusterState = zkStateReader.getClusterState(); DocCollection coll = clusterState.getCollection("collection1"); Slice slice = coll.getSlice(shardName); - if(slice.getLeader() != oldLeader && slice.getState() == State.ACTIVE) { - log.info("New leader got elected in {} secs", i); + if (slice.getLeader() != null && !slice.getLeader().equals(oldLeader) && slice.getState() == State.ACTIVE) { + log.info("Old leader {}, new leader. New leader got elected in {} ms", oldLeader, slice.getLeader(),timeOut.timeElapsed(MILLISECONDS) ); break; } - - if(i == maxWaitInSecs) { + + if (timeOut.hasTimedOut()) { Diagnostics.logThreadDumps("Could not find new leader in specified timeout"); zkStateReader.getZkClient().printLayoutToStdOut(); - fail("Could not find new leader even after waiting for " + maxWaitInSecs + "secs"); + fail("Could not find new leader even after waiting for " + timeOut.timeElapsed(MILLISECONDS) + "ms"); } - - i++; - Thread.sleep(1000); + + Thread.sleep(100); } } From d5652385675d12b80a58e44a8c8b392c9f70a334 Mon Sep 17 00:00:00 2001 From: Noble Paul Date: Tue, 3 Jan 2017 20:38:28 +1030 Subject: [PATCH 74/83] SOLR-9906: unused import --- .../java/org/apache/solr/cloud/AbstractDistribZkTestBase.java | 1 - 1 file changed, 1 deletion(-) diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java index 7f991a4842c..0669cbe5bb5 100644 --- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java +++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java @@ -19,7 +19,6 @@ package org.apache.solr.cloud; import java.io.File; import java.lang.invoke.MethodHandles; import java.util.Map; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.io.FileUtils; From a81ebce804947685b86b50f7525335120fde38b4 Mon Sep 17 00:00:00 2001 From: Matt Weber Date: Mon, 26 Dec 2016 07:50:58 -0800 Subject: [PATCH 75/83] Support Graph Token Streams in QueryBuilder Adds support for handling graph token streams inside the QueryBuilder util class used by query parsers. --- .../org/apache/lucene/search/GraphQuery.java | 136 +++++++++++ .../org/apache/lucene/util/QueryBuilder.java | 113 +++++++-- .../graph/GraphTokenStreamFiniteStrings.java | 230 ++++++++++++++++++ .../apache/lucene/search/TestGraphQuery.java | 79 ++++++ .../apache/lucene/util/TestQueryBuilder.java | 15 +- .../TestGraphTokenStreamFiniteStrings.java | 217 +++++++++++++++++ .../queryparser/classic/QueryParserBase.java | 42 +++- .../classic/TestMultiFieldQueryParser.java | 11 +- .../queryparser/classic/TestQueryParser.java | 119 +++++---- 9 files changed, 871 insertions(+), 91 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/GraphQuery.java create mode 100644 lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java create mode 100644 lucene/core/src/test/org/apache/lucene/search/TestGraphQuery.java create mode 100644 lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java diff --git a/lucene/core/src/java/org/apache/lucene/search/GraphQuery.java b/lucene/core/src/java/org/apache/lucene/search/GraphQuery.java new file mode 100644 index 00000000000..a1308c9cb4c --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/GraphQuery.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +import org.apache.lucene.index.IndexReader; + +/** + * A query that wraps multiple sub-queries generated from a graph token stream. + */ +public final class GraphQuery extends Query { + private final Query[] queries; + private boolean hasBoolean = false; + private boolean hasPhrase = false; + + /** + * Constructor sets the queries and checks if any of them are + * a boolean query. + * + * @param queries the non-null array of queries + */ + public GraphQuery(Query... queries) { + this.queries = Objects.requireNonNull(queries).clone(); + for (Query query : queries) { + if (query instanceof BooleanQuery) { + hasBoolean = true; + } else if (query instanceof PhraseQuery) { + hasPhrase = true; + } + } + } + + /** + * Gets the queries + * + * @return unmodifiable list of Query + */ + public List getQueries() { + return Collections.unmodifiableList(Arrays.asList(queries)); + } + + /** + * If there is at least one boolean query or not. + * + * @return true if there is a boolean, false if not + */ + public boolean hasBoolean() { + return hasBoolean; + } + + /** + * If there is at least one phrase query or not. + * + * @return true if there is a phrase query, false if not + */ + public boolean hasPhrase() { + return hasPhrase; + } + + /** + * Rewrites to a single query or a boolean query where each query is a SHOULD clause. + */ + @Override + public Query rewrite(IndexReader reader) throws IOException { + if (queries.length == 0) { + return new BooleanQuery.Builder().build(); + } + + if (queries.length == 1) { + return queries[0]; + } + + BooleanQuery.Builder q = new BooleanQuery.Builder(); + for (Query clause : queries) { + q.add(clause, BooleanClause.Occur.SHOULD); + } + + return q.build(); + } + + @Override + public String toString(String field) { + StringBuilder builder = new StringBuilder("Graph("); + for (int i = 0; i < queries.length; i++) { + if (i != 0) { + builder.append(", "); + } + builder.append(Objects.toString(queries[i])); + } + + if (queries.length > 0) { + builder.append(", "); + } + + builder.append("hasBoolean=") + .append(hasBoolean) + .append(", hasPhrase=") + .append(hasPhrase) + .append(")"); + + return builder.toString(); + } + + @Override + public boolean equals(Object other) { + return sameClassAs(other) && + hasBoolean == ((GraphQuery) other).hasBoolean && + hasPhrase == ((GraphQuery) other).hasPhrase && + Arrays.equals(queries, ((GraphQuery) other).queries); + } + + @Override + public int hashCode() { + return 31 * classHash() + Arrays.deepHashCode(new Object[]{hasBoolean, hasPhrase, queries}); + } +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java index 6c5ea15aac5..a8c0a82e156 100644 --- a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java @@ -25,15 +25,18 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.GraphQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings; /** * Creates queries from the {@link Analyzer} chain. @@ -135,17 +138,39 @@ public class QueryBuilder { Query query = createFieldQuery(analyzer, BooleanClause.Occur.SHOULD, field, queryText, false, 0); if (query instanceof BooleanQuery) { - BooleanQuery bq = (BooleanQuery) query; - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - builder.setMinimumNumberShouldMatch((int) (fraction * bq.clauses().size())); - for (BooleanClause clause : bq) { - builder.add(clause); + query = addMinShouldMatchToBoolean((BooleanQuery) query, fraction); + } else if (query instanceof GraphQuery && ((GraphQuery) query).hasBoolean()) { + // we have a graph query that has at least one boolean sub-query + // re-build and set minimum should match on each boolean found + List oldQueries = ((GraphQuery) query).getQueries(); + Query[] queries = new Query[oldQueries.size()]; + for (int i = 0; i < queries.length; i++) { + Query oldQuery = oldQueries.get(i); + if (oldQuery instanceof BooleanQuery) { + queries[i] = addMinShouldMatchToBoolean((BooleanQuery) oldQuery, fraction); + } else { + queries[i] = oldQuery; + } } - query = builder.build(); + + query = new GraphQuery(queries); } return query; } - + + /** + * Rebuilds a boolean query and sets a new minimum number should match value. + */ + private BooleanQuery addMinShouldMatchToBoolean(BooleanQuery query, float fraction) { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.setMinimumNumberShouldMatch((int) (fraction * query.clauses().size())); + for (BooleanClause clause : query) { + builder.add(clause); + } + + return builder.build(); + } + /** * Returns the analyzer. * @see #setAnalyzer(Analyzer) @@ -183,6 +208,7 @@ public class QueryBuilder { this.enablePositionIncrements = enable; } + /** * Creates a query from the analysis chain. *

    @@ -192,25 +218,44 @@ public class QueryBuilder { * it is usually not necessary to override it in a subclass; instead, override * methods like {@link #newBooleanQuery}, etc., if possible. * - * @param analyzer analyzer used for this query - * @param operator default boolean operator used for this query - * @param field field to create queries against - * @param queryText text to be passed to the analysis chain - * @param quoted true if phrases should be generated when terms occur at more than one position + * @param analyzer analyzer used for this query + * @param operator default boolean operator used for this query + * @param field field to create queries against + * @param queryText text to be passed to the analysis chain + * @param quoted true if phrases should be generated when terms occur at more than one position * @param phraseSlop slop factor for phrase/multiphrase queries */ protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, String queryText, boolean quoted, int phraseSlop) { assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST; - + // Use the analyzer to get all the tokens, and then build an appropriate // query based on the analysis chain. - - try (TokenStream source = analyzer.tokenStream(field, queryText); - CachingTokenFilter stream = new CachingTokenFilter(source)) { + try (TokenStream source = analyzer.tokenStream(field, queryText)) { + return createFieldQuery(source, operator, field, quoted, phraseSlop); + } catch (IOException e) { + throw new RuntimeException("Error analyzing query text", e); + } + } + + /** + * Creates a query from a token stream. + * + * @param source the token stream to create the query from + * @param operator default boolean operator used for this query + * @param field field to create queries against + * @param quoted true if phrases should be generated when terms occur at more than one position + * @param phraseSlop slop factor for phrase/multiphrase queries + */ + protected Query createFieldQuery(TokenStream source, BooleanClause.Occur operator, String field, boolean quoted, int phraseSlop) { + assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST; + + // Build an appropriate query based on the analysis chain. + try (CachingTokenFilter stream = new CachingTokenFilter(source)) { TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); - + PositionLengthAttribute posLenAtt = stream.addAttribute(PositionLengthAttribute.class); + if (termAtt == null) { return null; } @@ -221,6 +266,7 @@ public class QueryBuilder { int numTokens = 0; int positionCount = 0; boolean hasSynonyms = false; + boolean isGraph = false; stream.reset(); while (stream.incrementToken()) { @@ -231,6 +277,11 @@ public class QueryBuilder { } else { hasSynonyms = true; } + + int positionLength = posLenAtt.getPositionLength(); + if (!isGraph && positionLength > 1) { + isGraph = true; + } } // phase 2: based on token count, presence of synonyms, and options @@ -241,6 +292,9 @@ public class QueryBuilder { } else if (numTokens == 1) { // single term return analyzeTerm(field, stream); + } else if (isGraph) { + // graph + return analyzeGraph(stream, operator, field, quoted, phraseSlop); } else if (quoted && positionCount > 1) { // phrase if (hasSynonyms) { @@ -388,7 +442,30 @@ public class QueryBuilder { } return mpqb.build(); } - + + /** + * Creates a query from a graph token stream by extracting all the finite strings from the graph and using them to create the query. + */ + protected Query analyzeGraph(TokenStream source, BooleanClause.Occur operator, String field, boolean quoted, int phraseSlop) + throws IOException { + source.reset(); + List tokenStreams = GraphTokenStreamFiniteStrings.getTokenStreams(source); + + if (tokenStreams.isEmpty()) { + return null; + } + + List queries = new ArrayList<>(tokenStreams.size()); + for (TokenStream ts : tokenStreams) { + Query query = createFieldQuery(ts, operator, field, quoted, phraseSlop); + if (query != null) { + queries.add(query); + } + } + + return new GraphQuery(queries.toArray(new Query[0])); + } + /** * Builds a new BooleanQuery instance. *

    diff --git a/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java b/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java new file mode 100644 index 00000000000..cec65fadfad --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util.graph; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.FiniteStringsIterator; +import org.apache.lucene.util.automaton.Operations; + +import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES; + +/** + * Creates a list of {@link TokenStream} where each stream is the tokens that make up a finite string in graph token stream. To do this, + * the graph token stream is converted to an {@link Automaton} and from there we use a {@link FiniteStringsIterator} to collect the various + * token streams for each finite string. + */ +public final class GraphTokenStreamFiniteStrings { + private final Automaton.Builder builder = new Automaton.Builder(); + private final Map termToID = new HashMap<>(); + private final Map idToTerm = new HashMap<>(); + private final Map idToInc = new HashMap<>(); + private Automaton det; + + private class FiniteStringsTokenStream extends TokenStream { + private final BytesTermAttribute termAtt = addAttribute(BytesTermAttribute.class); + private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); + private final IntsRef ids; + private final int end; + private int offset; + + FiniteStringsTokenStream(final IntsRef ids) { + assert ids != null; + this.ids = ids; + this.offset = ids.offset; + this.end = ids.offset + ids.length; + } + + @Override + public boolean incrementToken() throws IOException { + if (offset < end) { + clearAttributes(); + int id = ids.ints[offset]; + termAtt.setBytesRef(idToTerm.get(id)); + + int incr = 1; + if (idToInc.containsKey(id)) { + incr = idToInc.get(id); + } + posIncAtt.setPositionIncrement(incr); + offset++; + return true; + } + + return false; + } + } + + private GraphTokenStreamFiniteStrings() { + } + + /** + * Gets the list of finite string token streams from the given input graph token stream. + */ + public static List getTokenStreams(final TokenStream in) throws IOException { + GraphTokenStreamFiniteStrings gfs = new GraphTokenStreamFiniteStrings(); + return gfs.process(in); + } + + /** + * Builds automaton and builds the finite string token streams. + */ + private List process(final TokenStream in) throws IOException { + build(in); + + List tokenStreams = new ArrayList<>(); + final FiniteStringsIterator finiteStrings = new FiniteStringsIterator(det); + for (IntsRef ids; (ids = finiteStrings.next()) != null; ) { + tokenStreams.add(new FiniteStringsTokenStream(IntsRef.deepCopyOf(ids))); + } + + return tokenStreams; + } + + private void build(final TokenStream in) throws IOException { + if (det != null) { + throw new IllegalStateException("Automation already built"); + } + + final TermToBytesRefAttribute termBytesAtt = in.addAttribute(TermToBytesRefAttribute.class); + final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class); + final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class); + + in.reset(); + + int pos = -1; + int prevIncr = 1; + int state = -1; + while (in.incrementToken()) { + int currentIncr = posIncAtt.getPositionIncrement(); + if (pos == -1 && currentIncr < 1) { + throw new IllegalStateException("Malformed TokenStream, start token can't have increment less than 1"); + } + + // always use inc 1 while building, but save original increment + int incr = Math.min(1, currentIncr); + if (incr > 0) { + pos += incr; + } + + int endPos = pos + posLengthAtt.getPositionLength(); + while (state < endPos) { + state = createState(); + } + + BytesRef term = termBytesAtt.getBytesRef(); + int id = getTermID(currentIncr, prevIncr, term); + addTransition(pos, endPos, currentIncr, id); + + // only save last increment on non-zero increment in case we have multiple stacked tokens + if (currentIncr > 0) { + prevIncr = currentIncr; + } + } + + in.end(); + setAccept(state, true); + finish(); + } + + /** + * Returns a new state; state 0 is always the initial state. + */ + private int createState() { + return builder.createState(); + } + + /** + * Marks the specified state as accept or not. + */ + private void setAccept(int state, boolean accept) { + builder.setAccept(state, accept); + } + + /** + * Adds a transition to the automaton. + */ + private void addTransition(int source, int dest, int incr, int id) { + builder.addTransition(source, dest, id); + } + + /** + * Call this once you are done adding states/transitions. + */ + private void finish() { + finish(DEFAULT_MAX_DETERMINIZED_STATES); + } + + /** + * Call this once you are done adding states/transitions. + * + * @param maxDeterminizedStates Maximum number of states created when determinizing the automaton. Higher numbers allow this operation + * to consume more memory but allow more complex automatons. + */ + private void finish(int maxDeterminizedStates) { + Automaton automaton = builder.finish(); + det = Operations.removeDeadStates(Operations.determinize(automaton, maxDeterminizedStates)); + } + + /** + * Gets an integer id for a given term. + * + * If there is no position gaps for this token then we can reuse the id for the same term if it appeared at another + * position without a gap. If we have a position gap generate a new id so we can keep track of the position + * increment. + */ + private int getTermID(int incr, int prevIncr, BytesRef term) { + assert term != null; + boolean isStackedGap = incr == 0 && prevIncr > 1; + boolean hasGap = incr > 1; + Integer id; + if (hasGap || isStackedGap) { + id = idToTerm.size(); + idToTerm.put(id, BytesRef.deepCopyOf(term)); + + // stacked token should have the same increment as original token at this position + if (isStackedGap) { + idToInc.put(id, prevIncr); + } else { + idToInc.put(id, incr); + } + } else { + id = termToID.get(term); + if (id == null) { + term = BytesRef.deepCopyOf(term); + id = idToTerm.size(); + termToID.put(term, id); + idToTerm.put(id, term); + } + } + + return id; + } +} \ No newline at end of file diff --git a/lucene/core/src/test/org/apache/lucene/search/TestGraphQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestGraphQuery.java new file mode 100644 index 00000000000..412fac4654c --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/TestGraphQuery.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + + +import java.io.IOException; + +import org.apache.lucene.index.Term; +import org.apache.lucene.util.LuceneTestCase; + +public class TestGraphQuery extends LuceneTestCase { + + public void testEquals() { + QueryUtils.checkEqual(new GraphQuery(), new GraphQuery()); + QueryUtils.checkEqual(new GraphQuery(new MatchAllDocsQuery()), new GraphQuery(new MatchAllDocsQuery())); + QueryUtils.checkEqual( + new GraphQuery(new TermQuery(new Term("a", "a")), new TermQuery(new Term("a", "b"))), + new GraphQuery(new TermQuery(new Term("a", "a")), new TermQuery(new Term("a", "b"))) + ); + } + + public void testBooleanDetection() { + assertFalse(new GraphQuery().hasBoolean()); + assertFalse(new GraphQuery(new MatchAllDocsQuery(), new TermQuery(new Term("a", "a"))).hasBoolean()); + assertTrue(new GraphQuery(new BooleanQuery.Builder().build()).hasBoolean()); + assertTrue(new GraphQuery(new TermQuery(new Term("a", "a")), new BooleanQuery.Builder().build()).hasBoolean()); + } + + public void testPhraseDetection() { + assertFalse(new GraphQuery().hasPhrase()); + assertFalse(new GraphQuery(new MatchAllDocsQuery(), new TermQuery(new Term("a", "a"))).hasPhrase()); + assertTrue(new GraphQuery(new PhraseQuery.Builder().build()).hasPhrase()); + assertTrue(new GraphQuery(new TermQuery(new Term("a", "a")), new PhraseQuery.Builder().build()).hasPhrase()); + } + + public void testToString() { + assertEquals("Graph(hasBoolean=false, hasPhrase=false)", new GraphQuery().toString()); + assertEquals("Graph(a:a, a:b, hasBoolean=true, hasPhrase=false)", + new GraphQuery(new TermQuery(new Term("a", "a")), + new BooleanQuery.Builder().add(new TermQuery(new Term("a", "b")), BooleanClause.Occur.SHOULD) + .build()).toString()); + assertEquals("Graph(a:\"a b\", a:b, hasBoolean=true, hasPhrase=true)", + new GraphQuery( + new PhraseQuery.Builder() + .add(new Term("a", "a")) + .add(new Term("a", "b")).build(), + new BooleanQuery.Builder().add(new TermQuery(new Term("a", "b")), BooleanClause.Occur.SHOULD) + .build()).toString()); + } + + public void testRewrite() throws IOException { + QueryUtils.checkEqual(new BooleanQuery.Builder().build(), new GraphQuery().rewrite(null)); + QueryUtils.checkEqual(new TermQuery(new Term("a", "a")), + new GraphQuery(new TermQuery(new Term("a", "a"))).rewrite(null)); + QueryUtils.checkEqual( + new BooleanQuery.Builder() + .add(new TermQuery(new Term("a", "a")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("b", "b")), BooleanClause.Occur.SHOULD).build(), + new GraphQuery( + new TermQuery(new Term("a", "a")), + new TermQuery(new Term("b", "b")) + ).rewrite(null) + ); + } +} diff --git a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java index d3019e3d077..9cd839027ed 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java @@ -31,6 +31,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.GraphQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; @@ -150,13 +151,17 @@ public class TestQueryBuilder extends LuceneTestCase { assertEquals(expectedBuilder.build(), builder.createPhraseQuery("field", "old dogs")); } - /** forms multiphrase query */ + /** forms graph query */ public void testMultiWordSynonymsPhrase() throws Exception { - MultiPhraseQuery.Builder expectedBuilder = new MultiPhraseQuery.Builder(); - expectedBuilder.add(new Term[] { new Term("field", "guinea"), new Term("field", "cavy") }); - expectedBuilder.add(new Term("field", "pig")); + PhraseQuery.Builder expectedPhrase = new PhraseQuery.Builder(); + expectedPhrase.add(new Term("field", "guinea")); + expectedPhrase.add(new Term("field", "pig")); + + TermQuery expectedTerm = new TermQuery(new Term("field", "cavy")); + QueryBuilder queryBuilder = new QueryBuilder(new MockSynonymAnalyzer()); - assertEquals(expectedBuilder.build(), queryBuilder.createPhraseQuery("field", "guinea pig")); + assertEquals(new GraphQuery(expectedPhrase.build(), expectedTerm), + queryBuilder.createPhraseQuery("field", "guinea pig")); } protected static class SimpleCJKTokenizer extends Tokenizer { diff --git a/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java b/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java new file mode 100644 index 00000000000..4e636e249dc --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util.graph; + +import java.util.List; + +import org.apache.lucene.analysis.CannedTokenStream; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.util.LuceneTestCase; + +/** + * {@link GraphTokenStreamFiniteStrings} tests. + */ +public class TestGraphTokenStreamFiniteStrings extends LuceneTestCase { + + private static Token token(String term, int posInc, int posLength) { + final Token t = new Token(term, 0, term.length()); + t.setPositionIncrement(posInc); + t.setPositionLength(posLength); + return t; + } + + private void assertTokenStream(TokenStream ts, String[] terms, int[] increments) throws Exception { + // verify no nulls and arrays same length + assertNotNull(ts); + assertNotNull(terms); + assertNotNull(increments); + assertEquals(terms.length, increments.length); + BytesTermAttribute termAtt = ts.getAttribute(BytesTermAttribute.class); + PositionIncrementAttribute incrAtt = ts.getAttribute(PositionIncrementAttribute.class); + int offset = 0; + while (ts.incrementToken()) { + // verify term and increment + assert offset < terms.length; + assertEquals(terms[offset], termAtt.getBytesRef().utf8ToString()); + assertEquals(increments[offset], incrAtt.getPositionIncrement()); + offset++; + } + + // make sure we processed all items + assertEquals(offset, terms.length); + } + + public void testIllegalState() throws Exception { + expectThrows(IllegalStateException.class, () -> { + TokenStream ts = new CannedTokenStream( + token("a", 0, 1), + token("b", 1, 1) + ); + + GraphTokenStreamFiniteStrings.getTokenStreams(ts); + }); + } + + public void testSingleGraph() throws Exception { + TokenStream ts = new CannedTokenStream( + token("fast", 1, 1), + token("wi", 1, 1), + token("wifi", 0, 2), + token("fi", 1, 1), + token("network", 1, 1) + ); + + List finiteTokenStreams = GraphTokenStreamFiniteStrings.getTokenStreams(ts); + + assertEquals(2, finiteTokenStreams.size()); + assertTokenStream(finiteTokenStreams.get(0), new String[]{"fast", "wi", "fi", "network"}, new int[]{1, 1, 1, 1}); + assertTokenStream(finiteTokenStreams.get(1), new String[]{"fast", "wifi", "network"}, new int[]{1, 1, 1}); + } + + public void testSingleGraphWithGap() throws Exception { + // "hey the fast wifi network", where "the" removed + TokenStream ts = new CannedTokenStream( + token("hey", 1, 1), + token("fast", 2, 1), + token("wi", 1, 1), + token("wifi", 0, 2), + token("fi", 1, 1), + token("network", 1, 1) + ); + + List finiteTokenStreams = GraphTokenStreamFiniteStrings.getTokenStreams(ts); + + assertEquals(2, finiteTokenStreams.size()); + assertTokenStream(finiteTokenStreams.get(0), + new String[]{"hey", "fast", "wi", "fi", "network"}, new int[]{1, 2, 1, 1, 1}); + assertTokenStream(finiteTokenStreams.get(1), + new String[]{"hey", "fast", "wifi", "network"}, new int[]{1, 2, 1, 1}); + } + + + public void testGraphAndGapSameToken() throws Exception { + TokenStream ts = new CannedTokenStream( + token("fast", 1, 1), + token("wi", 2, 1), + token("wifi", 0, 2), + token("fi", 1, 1), + token("network", 1, 1) + ); + + List finiteTokenStreams = GraphTokenStreamFiniteStrings.getTokenStreams(ts); + + assertEquals(2, finiteTokenStreams.size()); + assertTokenStream(finiteTokenStreams.get(0), new String[]{"fast", "wi", "fi", "network"}, new int[]{1, 2, 1, 1}); + assertTokenStream(finiteTokenStreams.get(1), new String[]{"fast", "wifi", "network"}, new int[]{1, 2, 1}); + } + + public void testGraphAndGapSameTokenTerm() throws Exception { + TokenStream ts = new CannedTokenStream( + token("a", 1, 1), + token("b", 1, 1), + token("c", 2, 1), + token("a", 0, 2), + token("d", 1, 1) + ); + + List finiteTokenStreams = GraphTokenStreamFiniteStrings.getTokenStreams(ts); + + assertEquals(2, finiteTokenStreams.size()); + assertTokenStream(finiteTokenStreams.get(0), new String[]{"a", "b", "c", "d"}, new int[]{1, 1, 2, 1}); + assertTokenStream(finiteTokenStreams.get(1), new String[]{"a", "b", "a"}, new int[]{1, 1, 2}); + } + + public void testStackedGraph() throws Exception { + TokenStream ts = new CannedTokenStream( + token("fast", 1, 1), + token("wi", 1, 1), + token("wifi", 0, 2), + token("wireless", 0, 2), + token("fi", 1, 1), + token("network", 1, 1) + ); + + List finiteTokenStreams = GraphTokenStreamFiniteStrings.getTokenStreams(ts); + + assertEquals(3, finiteTokenStreams.size()); + assertTokenStream(finiteTokenStreams.get(0), new String[]{"fast", "wi", "fi", "network"}, new int[]{1, 1, 1, 1}); + assertTokenStream(finiteTokenStreams.get(1), new String[]{"fast", "wifi", "network"}, new int[]{1, 1, 1}); + assertTokenStream(finiteTokenStreams.get(2), new String[]{"fast", "wireless", "network"}, new int[]{1, 1, 1}); + } + + public void testStackedGraphWithGap() throws Exception { + TokenStream ts = new CannedTokenStream( + token("fast", 1, 1), + token("wi", 2, 1), + token("wifi", 0, 2), + token("wireless", 0, 2), + token("fi", 1, 1), + token("network", 1, 1) + ); + + List finiteTokenStreams = GraphTokenStreamFiniteStrings.getTokenStreams(ts); + + assertEquals(3, finiteTokenStreams.size()); + assertTokenStream(finiteTokenStreams.get(0), new String[]{"fast", "wi", "fi", "network"}, new int[]{1, 2, 1, 1}); + assertTokenStream(finiteTokenStreams.get(1), new String[]{"fast", "wifi", "network"}, new int[]{1, 2, 1}); + assertTokenStream(finiteTokenStreams.get(2), new String[]{"fast", "wireless", "network"}, new int[]{1, 2, 1}); + } + + public void testGraphWithRegularSynonym() throws Exception { + TokenStream ts = new CannedTokenStream( + token("fast", 1, 1), + token("speedy", 0, 1), + token("wi", 1, 1), + token("wifi", 0, 2), + token("fi", 1, 1), + token("network", 1, 1) + ); + + List finiteTokenStreams = GraphTokenStreamFiniteStrings.getTokenStreams(ts); + + assertEquals(4, finiteTokenStreams.size()); + assertTokenStream(finiteTokenStreams.get(0), new String[]{"fast", "wi", "fi", "network"}, new int[]{1, 1, 1, 1}); + assertTokenStream(finiteTokenStreams.get(1), new String[]{"fast", "wifi", "network"}, new int[]{1, 1, 1}); + assertTokenStream(finiteTokenStreams.get(2), new String[]{"speedy", "wi", "fi", "network"}, new int[]{1, 1, 1, 1}); + assertTokenStream(finiteTokenStreams.get(3), new String[]{"speedy", "wifi", "network"}, new int[]{1, 1, 1}); + } + + public void testMultiGraph() throws Exception { + TokenStream ts = new CannedTokenStream( + token("turbo", 1, 1), + token("fast", 0, 2), + token("charged", 1, 1), + token("wi", 1, 1), + token("wifi", 0, 2), + token("fi", 1, 1), + token("network", 1, 1) + ); + + List finiteTokenStreams = GraphTokenStreamFiniteStrings.getTokenStreams(ts); + + assertEquals(4, finiteTokenStreams.size()); + assertTokenStream(finiteTokenStreams.get(0), + new String[]{"turbo", "charged", "wi", "fi", "network"}, new int[]{1, 1, 1, 1, 1}); + assertTokenStream(finiteTokenStreams.get(1), + new String[]{"turbo", "charged", "wifi", "network"}, new int[]{1, 1, 1, 1}); + assertTokenStream(finiteTokenStreams.get(2), new String[]{"fast", "wi", "fi", "network"}, new int[]{1, 1, 1, 1}); + assertTokenStream(finiteTokenStreams.get(3), new String[]{"fast", "wifi", "network"}, new int[]{1, 1, 1}); + } +} diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java index 41d3764f0ac..9b238d87eff 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java @@ -475,8 +475,6 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer return createFieldQuery(analyzer, occur, field, queryText, quoted || autoGeneratePhraseQueries, phraseSlop); } - - /** * Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}. * This method may be overridden, for example, to return @@ -489,26 +487,48 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer Query query = getFieldQuery(field, queryText, true); if (query instanceof PhraseQuery) { - PhraseQuery.Builder builder = new PhraseQuery.Builder(); - builder.setSlop(slop); - PhraseQuery pq = (PhraseQuery) query; - org.apache.lucene.index.Term[] terms = pq.getTerms(); - int[] positions = pq.getPositions(); - for (int i = 0; i < terms.length; ++i) { - builder.add(terms[i], positions[i]); - } - query = builder.build(); + query = addSlopToPhrase((PhraseQuery) query, slop); } else if (query instanceof MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery)query; if (slop != mpq.getSlop()) { query = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build(); } + } else if (query instanceof GraphQuery && ((GraphQuery) query).hasPhrase()) { + // we have a graph query that has at least one phrase sub-query + // re-build and set slop on all phrase queries + List oldQueries = ((GraphQuery) query).getQueries(); + Query[] queries = new Query[oldQueries.size()]; + for (int i = 0; i < queries.length; i++) { + Query oldQuery = oldQueries.get(i); + if (oldQuery instanceof PhraseQuery) { + queries[i] = addSlopToPhrase((PhraseQuery) oldQuery, slop); + } else { + queries[i] = oldQuery; + } + } + + query = new GraphQuery(queries); } return query; } + /** + * Rebuild a phrase query with a slop value + */ + private PhraseQuery addSlopToPhrase(PhraseQuery query, int slop) { + PhraseQuery.Builder builder = new PhraseQuery.Builder(); + builder.setSlop(slop); + org.apache.lucene.index.Term[] terms = query.getTerms(); + int[] positions = query.getPositions(); + for (int i = 0; i < terms.length; ++i) { + builder.add(terms[i], positions[i]); + } + + return builder.build(); + } + protected Query getRangeQuery(String field, String part1, String part2, diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java index ed76ff5009f..ae15284d45b 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java @@ -21,15 +21,19 @@ import java.io.StringReader; import java.util.HashMap; import java.util.Map; -import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockSynonymFilter; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; @@ -347,7 +351,8 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { assertEquals("Synonym(b:dog b:dogs) Synonym(t:dog t:dogs)", q.toString()); q = parser.parse("guinea pig"); assertFalse(parser.getSplitOnWhitespace()); - assertEquals("(Synonym(b:cavy b:guinea) Synonym(t:cavy t:guinea)) (b:pig t:pig)", q.toString()); + assertEquals("Graph(b:guinea b:pig, b:cavy, hasBoolean=true, hasPhrase=false) " + + "Graph(t:guinea t:pig, t:cavy, hasBoolean=true, hasPhrase=false)", q.toString()); parser.setSplitOnWhitespace(true); q = parser.parse("guinea pig"); assertEquals("(b:guinea t:guinea) (b:pig t:pig)", q.toString()); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java index bb976249bf6..87bc89fa4f7 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java @@ -16,6 +16,8 @@ */ package org.apache.lucene.queryparser.classic; +import java.io.IOException; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockBytesAnalyzer; @@ -27,10 +29,10 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.document.DateTools.Resolution; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.DateTools.Resolution; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.RandomIndexWriter; @@ -41,6 +43,7 @@ import org.apache.lucene.queryparser.util.QueryParserTestBase; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.GraphQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; @@ -51,8 +54,6 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; -import java.io.IOException; - /** * Tests QueryParser. */ @@ -502,32 +503,34 @@ public class TestQueryParser extends QueryParserTestBase { QueryParser dumb = new QueryParser("field", new Analyzer1()); dumb.setSplitOnWhitespace(false); - // A multi-word synonym source will form a synonym query for the same-starting-position tokens - BooleanQuery.Builder multiWordExpandedBqBuilder = new BooleanQuery.Builder(); - Query multiWordSynonymQuery = new SynonymQuery(new Term("field", "guinea"), new Term("field", "cavy")); - multiWordExpandedBqBuilder.add(multiWordSynonymQuery, BooleanClause.Occur.SHOULD); - multiWordExpandedBqBuilder.add(new TermQuery(new Term("field", "pig")), BooleanClause.Occur.SHOULD); - Query multiWordExpandedBq = multiWordExpandedBqBuilder.build(); - assertEquals(multiWordExpandedBq, dumb.parse("guinea pig")); + TermQuery guinea = new TermQuery(new Term("field", "guinea")); + TermQuery pig = new TermQuery(new Term("field", "pig")); + TermQuery cavy = new TermQuery(new Term("field", "cavy")); - // With the phrase operator, a multi-word synonym source will form a multiphrase query. - // When the number of expanded term(s) is different from that of the original term(s), this is not good. - MultiPhraseQuery.Builder multiWordExpandedMpqBuilder = new MultiPhraseQuery.Builder(); - multiWordExpandedMpqBuilder.add(new Term[]{new Term("field", "guinea"), new Term("field", "cavy")}); - multiWordExpandedMpqBuilder.add(new Term("field", "pig")); - Query multiWordExpandedMPQ = multiWordExpandedMpqBuilder.build(); - assertEquals(multiWordExpandedMPQ, dumb.parse("\"guinea pig\"")); + // A multi-word synonym source will form a graph query for synonyms that formed the graph token stream + BooleanQuery.Builder synonym = new BooleanQuery.Builder(); + synonym.add(guinea, BooleanClause.Occur.SHOULD); + synonym.add(pig, BooleanClause.Occur.SHOULD); + BooleanQuery guineaPig = synonym.build(); + + GraphQuery graphQuery = new GraphQuery(guineaPig, cavy); + assertEquals(graphQuery, dumb.parse("guinea pig")); + + // With the phrase operator, a multi-word synonym source will form a graph query with inner phrase queries. + PhraseQuery.Builder phraseSynonym = new PhraseQuery.Builder(); + phraseSynonym.add(new Term("field", "guinea")); + phraseSynonym.add(new Term("field", "pig")); + PhraseQuery guineaPigPhrase = phraseSynonym.build(); + + graphQuery = new GraphQuery(guineaPigPhrase, cavy); + assertEquals(graphQuery, dumb.parse("\"guinea pig\"")); // custom behavior, the synonyms are expanded, unless you use quote operator QueryParser smart = new SmartQueryParser(); smart.setSplitOnWhitespace(false); - assertEquals(multiWordExpandedBq, smart.parse("guinea pig")); - - PhraseQuery.Builder multiWordUnexpandedPqBuilder = new PhraseQuery.Builder(); - multiWordUnexpandedPqBuilder.add(new Term("field", "guinea")); - multiWordUnexpandedPqBuilder.add(new Term("field", "pig")); - Query multiWordUnexpandedPq = multiWordUnexpandedPqBuilder.build(); - assertEquals(multiWordUnexpandedPq, smart.parse("\"guinea pig\"")); + graphQuery = new GraphQuery(guineaPig, cavy); + assertEquals(graphQuery, smart.parse("guinea pig")); + assertEquals(guineaPigPhrase, smart.parse("\"guinea pig\"")); } // TODO: Move to QueryParserTestBase once standard flexible parser gets this capability @@ -580,34 +583,34 @@ public class TestQueryParser extends QueryParserTestBase { assertQueryEquals("guinea /pig/", a, "guinea /pig/"); // Operators should not interrupt multiword analysis if not don't associate - assertQueryEquals("(guinea pig)", a, "Synonym(cavy guinea) pig"); - assertQueryEquals("+(guinea pig)", a, "+(Synonym(cavy guinea) pig)"); - assertQueryEquals("-(guinea pig)", a, "-(Synonym(cavy guinea) pig)"); - assertQueryEquals("!(guinea pig)", a, "-(Synonym(cavy guinea) pig)"); - assertQueryEquals("NOT (guinea pig)", a, "-(Synonym(cavy guinea) pig)"); - assertQueryEquals("(guinea pig)^2", a, "(Synonym(cavy guinea) pig)^2.0"); + assertQueryEquals("(guinea pig)", a, "Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false)"); + assertQueryEquals("+(guinea pig)", a, "+Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false)"); + assertQueryEquals("-(guinea pig)", a, "-Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false)"); + assertQueryEquals("!(guinea pig)", a, "-Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false)"); + assertQueryEquals("NOT (guinea pig)", a, "-Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false)"); + assertQueryEquals("(guinea pig)^2", a, "(Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false))^2.0"); - assertQueryEquals("field:(guinea pig)", a, "Synonym(cavy guinea) pig"); + assertQueryEquals("field:(guinea pig)", a, "Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false)"); - assertQueryEquals("+small guinea pig", a, "+small Synonym(cavy guinea) pig"); - assertQueryEquals("-small guinea pig", a, "-small Synonym(cavy guinea) pig"); - assertQueryEquals("!small guinea pig", a, "-small Synonym(cavy guinea) pig"); - assertQueryEquals("NOT small guinea pig", a, "-small Synonym(cavy guinea) pig"); - assertQueryEquals("small* guinea pig", a, "small* Synonym(cavy guinea) pig"); - assertQueryEquals("small? guinea pig", a, "small? Synonym(cavy guinea) pig"); - assertQueryEquals("\"small\" guinea pig", a, "small Synonym(cavy guinea) pig"); + assertQueryEquals("+small guinea pig", a, "+small Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false)"); + assertQueryEquals("-small guinea pig", a, "-small Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false)"); + assertQueryEquals("!small guinea pig", a, "-small Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false)"); + assertQueryEquals("NOT small guinea pig", a, "-small Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false)"); + assertQueryEquals("small* guinea pig", a, "small* Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false)"); + assertQueryEquals("small? guinea pig", a, "small? Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false)"); + assertQueryEquals("\"small\" guinea pig", a, "small Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false)"); - assertQueryEquals("guinea pig +running", a, "Synonym(cavy guinea) pig +running"); - assertQueryEquals("guinea pig -running", a, "Synonym(cavy guinea) pig -running"); - assertQueryEquals("guinea pig !running", a, "Synonym(cavy guinea) pig -running"); - assertQueryEquals("guinea pig NOT running", a, "Synonym(cavy guinea) pig -running"); - assertQueryEquals("guinea pig running*", a, "Synonym(cavy guinea) pig running*"); - assertQueryEquals("guinea pig running?", a, "Synonym(cavy guinea) pig running?"); - assertQueryEquals("guinea pig \"running\"", a, "Synonym(cavy guinea) pig running"); + assertQueryEquals("guinea pig +running", a, "Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false) +running"); + assertQueryEquals("guinea pig -running", a, "Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false) -running"); + assertQueryEquals("guinea pig !running", a, "Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false) -running"); + assertQueryEquals("guinea pig NOT running", a, "Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false) -running"); + assertQueryEquals("guinea pig running*", a, "Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false) running*"); + assertQueryEquals("guinea pig running?", a, "Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false) running?"); + assertQueryEquals("guinea pig \"running\"", a, "Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false) running"); - assertQueryEquals("\"guinea pig\"~2", a, "\"(guinea cavy) pig\"~2"); + assertQueryEquals("\"guinea pig\"~2", a, "Graph(field:\"guinea pig\"~2, field:cavy, hasBoolean=false, hasPhrase=true)"); - assertQueryEquals("field:\"guinea pig\"", a, "\"(guinea cavy) pig\""); + assertQueryEquals("field:\"guinea pig\"", a, "Graph(field:\"guinea pig\", field:cavy, hasBoolean=false, hasPhrase=true)"); splitOnWhitespace = oldSplitOnWhitespace; } @@ -684,9 +687,9 @@ public class TestQueryParser extends QueryParserTestBase { assertQueryEquals("guinea pig running?", a, "guinea pig running?"); assertQueryEquals("guinea pig \"running\"", a, "guinea pig running"); - assertQueryEquals("\"guinea pig\"~2", a, "\"(guinea cavy) pig\"~2"); + assertQueryEquals("\"guinea pig\"~2", a, "Graph(field:\"guinea pig\"~2, field:cavy, hasBoolean=false, hasPhrase=true)"); - assertQueryEquals("field:\"guinea pig\"", a, "\"(guinea cavy) pig\""); + assertQueryEquals("field:\"guinea pig\"", a, "Graph(field:\"guinea pig\", field:cavy, hasBoolean=false, hasPhrase=true)"); splitOnWhitespace = oldSplitOnWhitespace; } @@ -697,14 +700,22 @@ public class TestQueryParser extends QueryParserTestBase { assertFalse(parser.getSplitOnWhitespace()); // default is false // A multi-word synonym source will form a synonym query for the same-starting-position tokens - BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); - bqBuilder.add(new SynonymQuery(new Term("field", "guinea"), new Term("field", "cavy")), BooleanClause.Occur.SHOULD); - bqBuilder.add(new TermQuery(new Term("field", "pig")), BooleanClause.Occur.SHOULD); - assertEquals(bqBuilder.build(), parser.parse("guinea pig")); + TermQuery guinea = new TermQuery(new Term("field", "guinea")); + TermQuery pig = new TermQuery(new Term("field", "pig")); + TermQuery cavy = new TermQuery(new Term("field", "cavy")); + + // A multi-word synonym source will form a graph query for synonyms that formed the graph token stream + BooleanQuery.Builder synonym = new BooleanQuery.Builder(); + synonym.add(guinea, BooleanClause.Occur.SHOULD); + synonym.add(pig, BooleanClause.Occur.SHOULD); + BooleanQuery guineaPig = synonym.build(); + + GraphQuery graphQuery = new GraphQuery(guineaPig, cavy); + assertEquals(graphQuery, parser.parse("guinea pig")); boolean oldSplitOnWhitespace = splitOnWhitespace; splitOnWhitespace = QueryParser.DEFAULT_SPLIT_ON_WHITESPACE; - assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "Synonym(cavy guinea) pig"); + assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "Graph(field:guinea field:pig, field:cavy, hasBoolean=true, hasPhrase=false)"); splitOnWhitespace = oldSplitOnWhitespace; } From 1bcf9a251d597cdc029295325b287ce5ce661bec Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Tue, 3 Jan 2017 05:15:37 -0500 Subject: [PATCH 76/83] LUCENE-7603: add CHANGES entry --- lucene/CHANGES.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4c49560620a..474080cf275 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -94,6 +94,10 @@ New features expose per-segment LongValues or DoubleValues iterators, similar to the existing DocValues iterator API. (Alan Woodward, Adrien Grand) +* LUCENE-7603: Graph token streams are now handled accurately by query + parsers, by enumerating all paths and creating the corresponding + query/ies as sub-clauses (Matt Weber via Mike McCandless) + Bug Fixes * LUCENE-7547: JapaneseTokenizerFactory was failing to close the From 3c963967242aed73a906b7bc17c26a4b8b07083c Mon Sep 17 00:00:00 2001 From: Shalin Shekhar Mangar Date: Tue, 3 Jan 2017 15:52:01 +0530 Subject: [PATCH 77/83] SOLR-9896: Instrument and collect metrics from query, update, core admin and core load thread pools --- solr/CHANGES.txt | 2 ++ .../java/org/apache/solr/core/CoreContainer.java | 15 ++++++++++++--- .../solr/handler/admin/CoreAdminHandler.java | 13 ++++++++++++- .../component/HttpShardHandlerFactory.java | 10 +++++++--- .../apache/solr/update/UpdateShardHandler.java | 9 ++++++++- .../org/apache/solr/util/stats/MetricUtils.java | 9 +++++++++ 6 files changed, 50 insertions(+), 8 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 6326e547113..8609f9139bc 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -210,6 +210,8 @@ New Features * SOLR-9684: Add priority Streaming Expression (Joel Bernstein, David Smiley) +* SOLR-9896: Instrument and collect metrics from query, update, core admin and core load thread pools. (shalin) + Optimizations ---------------------- * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java index f3747dcb9f6..de7c34d8a70 100644 --- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java +++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java @@ -78,6 +78,7 @@ import org.apache.solr.security.SecurityPluginHolder; import org.apache.solr.update.SolrCoreState; import org.apache.solr.update.UpdateShardHandler; import org.apache.solr.util.DefaultSolrThreadFactory; +import org.apache.solr.util.stats.MetricUtils; import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -462,6 +463,11 @@ public class CoreContainer { metricManager = new SolrMetricManager(); + coreContainerWorkExecutor = MetricUtils.instrumentedExecutorService( + coreContainerWorkExecutor, + metricManager.registry(SolrMetricManager.getRegistryName(SolrInfoMBean.Group.node)), + SolrMetricManager.mkName("coreContainerWorkExecutor", "threadPool")); + shardHandlerFactory = ShardHandlerFactory.newInstance(cfg.getShardHandlerFactoryPluginInfo(), loader); if (shardHandlerFactory instanceof SolrMetricProducer) { SolrMetricProducer metricProducer = (SolrMetricProducer) shardHandlerFactory; @@ -520,9 +526,12 @@ public class CoreContainer { unloadedCores, true, "unloaded", "cores"); // setup executor to load cores in parallel - ExecutorService coreLoadExecutor = ExecutorUtil.newMDCAwareFixedThreadPool( - cfg.getCoreLoadThreadCount(isZooKeeperAware()), - new DefaultSolrThreadFactory("coreLoadExecutor") ); + ExecutorService coreLoadExecutor = MetricUtils.instrumentedExecutorService( + ExecutorUtil.newMDCAwareFixedThreadPool( + cfg.getCoreLoadThreadCount(isZooKeeperAware()), + new DefaultSolrThreadFactory("coreLoadExecutor")), + metricManager.registry(SolrMetricManager.getRegistryName(SolrInfoMBean.Group.node)), + SolrMetricManager.mkName("coreLoadExecutor", "threadPool")); final List> futures = new ArrayList<>(); try { List cds = coresLocator.discover(this); diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java index 7b0ecfb7633..458b7a5eefc 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java @@ -18,6 +18,7 @@ package org.apache.solr.handler.admin; import java.io.File; import java.lang.invoke.MethodHandles; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; @@ -42,11 +43,13 @@ import org.apache.solr.common.util.NamedList; import org.apache.solr.core.CoreContainer; import org.apache.solr.core.CoreDescriptor; import org.apache.solr.handler.RequestHandlerBase; +import org.apache.solr.metrics.SolrMetricManager; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.security.AuthorizationContext; import org.apache.solr.security.PermissionNameProvider; import org.apache.solr.util.DefaultSolrThreadFactory; +import org.apache.solr.util.stats.MetricUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.MDC; @@ -65,7 +68,7 @@ public class CoreAdminHandler extends RequestHandlerBase implements PermissionNa protected final CoreContainer coreContainer; protected final Map> requestStatusMap; - protected final ExecutorService parallelExecutor = ExecutorUtil.newMDCAwareFixedThreadPool(50, + protected ExecutorService parallelExecutor = ExecutorUtil.newMDCAwareFixedThreadPool(50, new DefaultSolrThreadFactory("parallelCoreAdminExecutor")); protected static int MAX_TRACKED_REQUESTS = 100; @@ -111,6 +114,14 @@ public class CoreAdminHandler extends RequestHandlerBase implements PermissionNa "it is a special Handler configured directly by the RequestDispatcher"); } + @Override + public Collection initializeMetrics(SolrMetricManager manager, String registryName, String scope) { + Collection metrics = super.initializeMetrics(manager, registryName, scope); + parallelExecutor = MetricUtils.instrumentedExecutorService(parallelExecutor, manager.registry(registryName), + SolrMetricManager.mkName("parallelCoreAdminExecutor", getCategory().name(),scope, "threadPool")); + return metrics; + } + /** * The instance of CoreContainer this handler handles. This should be the CoreContainer instance that created this * handler. diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java index 3c01720c6bb..d190ce03f1f 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java +++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java @@ -42,6 +42,7 @@ import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.util.DefaultSolrThreadFactory; import org.apache.solr.util.stats.InstrumentedHttpRequestExecutor; import org.apache.solr.util.stats.InstrumentedPoolingHttpClientConnectionManager; +import org.apache.solr.util.stats.MetricUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -58,8 +59,8 @@ import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.CompletionService; import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; import java.util.concurrent.SynchronousQueue; -import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; @@ -73,7 +74,7 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org. // // Consider CallerRuns policy and a lower max threads to throttle // requests at some point (or should we simply return failure?) - private ThreadPoolExecutor commExecutor = new ExecutorUtil.MDCAwareThreadPoolExecutor( + private ExecutorService commExecutor = new ExecutorUtil.MDCAwareThreadPoolExecutor( 0, Integer.MAX_VALUE, 5, TimeUnit.SECONDS, // terminate idle threads after 5 sec @@ -191,7 +192,7 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org. return clientParams; } - protected ThreadPoolExecutor getThreadPoolExecutor(){ + protected ExecutorService getThreadPoolExecutor(){ return this.commExecutor; } @@ -378,6 +379,9 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org. List metricNames = new ArrayList<>(4); metricNames.addAll(clientConnectionManager.initializeMetrics(manager, registry, scope)); metricNames.addAll(httpRequestExecutor.initializeMetrics(manager, registry, scope)); + commExecutor = MetricUtils.instrumentedExecutorService(commExecutor, + manager.registry(registry), + SolrMetricManager.mkName("httpShardExecutor", scope, "threadPool")); return metricNames; } diff --git a/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java b/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java index c3ed8cd14e7..9d230bcd57d 100644 --- a/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java +++ b/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java @@ -23,6 +23,7 @@ import java.util.Collection; import java.util.List; import java.util.concurrent.ExecutorService; +import com.codahale.metrics.InstrumentedExecutorService; import org.apache.http.client.HttpClient; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; @@ -102,12 +103,18 @@ public class UpdateShardHandler implements SolrMetricProducer { List metricNames = new ArrayList<>(4); metricNames.addAll(clientConnectionManager.initializeMetrics(manager, registry, scope)); metricNames.addAll(httpRequestExecutor.initializeMetrics(manager, registry, scope)); + updateExecutor = new InstrumentedExecutorService(updateExecutor, + manager.registry(registry), + SolrMetricManager.mkName("updateExecutor", scope, "threadPool")); + recoveryExecutor = new InstrumentedExecutorService(recoveryExecutor, + manager.registry(registry), + SolrMetricManager.mkName("recoveryExecutor", scope, "threadPool")); return metricNames; } @Override public String getDescription() { - return "Metrics tracked by UpdateShardHandler for "; + return "Metrics tracked by UpdateShardHandler related to distributed updates and recovery"; } @Override diff --git a/solr/core/src/java/org/apache/solr/util/stats/MetricUtils.java b/solr/core/src/java/org/apache/solr/util/stats/MetricUtils.java index 62f57763fcf..af5a0b5ff42 100644 --- a/solr/core/src/java/org/apache/solr/util/stats/MetricUtils.java +++ b/solr/core/src/java/org/apache/solr/util/stats/MetricUtils.java @@ -19,11 +19,13 @@ package org.apache.solr.util.stats; import java.util.List; import java.util.Map; import java.util.SortedSet; +import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import com.codahale.metrics.Counter; import com.codahale.metrics.Gauge; import com.codahale.metrics.Histogram; +import com.codahale.metrics.InstrumentedExecutorService; import com.codahale.metrics.Meter; import com.codahale.metrics.Metric; import com.codahale.metrics.MetricFilter; @@ -141,4 +143,11 @@ public class MetricUtils { response.add("requests", counter.getCount()); return response; } + + /** + * Returns an instrumented wrapper over the given executor service. + */ + public static ExecutorService instrumentedExecutorService(ExecutorService delegate, MetricRegistry metricRegistry, String scope) { + return new InstrumentedExecutorService(delegate, metricRegistry, scope); + } } From 018df31da8b6b5beeb767c90d7ef2a784eca354a Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Tue, 3 Jan 2017 05:31:56 -0500 Subject: [PATCH 78/83] LUCENE-7603: add package-info.java for new package --- .../lucene/util/graph/package-info.java | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 lucene/core/src/java/org/apache/lucene/util/graph/package-info.java diff --git a/lucene/core/src/java/org/apache/lucene/util/graph/package-info.java b/lucene/core/src/java/org/apache/lucene/util/graph/package-info.java new file mode 100644 index 00000000000..bbc9b6a6ed9 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/graph/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Utility classes for working with token streams as graphs. + */ +package org.apache.lucene.util.graph; From 7b2e3db5531d42d91c2718737c63c2ce4d873c8e Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Tue, 3 Jan 2017 05:46:11 -0500 Subject: [PATCH 79/83] don't allow position length < 1 --- .../tokenattributes/PackedTokenAttributeImpl.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PackedTokenAttributeImpl.java b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PackedTokenAttributeImpl.java index aaa3316b576..c89a37420ca 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PackedTokenAttributeImpl.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PackedTokenAttributeImpl.java @@ -46,9 +46,9 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl */ @Override public void setPositionIncrement(int positionIncrement) { - if (positionIncrement < 0) - throw new IllegalArgumentException - ("Increment must be zero or greater: " + positionIncrement); + if (positionIncrement < 0) { + throw new IllegalArgumentException("Increment must be zero or greater: " + positionIncrement); + } this.positionIncrement = positionIncrement; } @@ -67,6 +67,9 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl */ @Override public void setPositionLength(int positionLength) { + if (positionLength < 1) { + throw new IllegalArgumentException("Position length must be 1 or greater: got " + positionLength); + } this.positionLength = positionLength; } From 48ca9fc3f4f8d95293cee7bb59eff61247ede181 Mon Sep 17 00:00:00 2001 From: Andrzej Bialecki Date: Tue, 3 Jan 2017 11:11:50 +0100 Subject: [PATCH 80/83] SOLR-9854: Collect metrics for index merges and index store IO. (squashed) --- solr/CHANGES.txt | 2 + .../solr/core/MetricsDirectoryFactory.java | 511 ++++++++++++++++++ .../java/org/apache/solr/core/SolrCore.java | 24 +- .../apache/solr/core/SolrDeletionPolicy.java | 6 + .../org/apache/solr/core/SolrInfoMBean.java | 2 +- .../solr/core/StandardDirectoryFactory.java | 2 +- .../apache/solr/update/SolrIndexConfig.java | 16 +- .../apache/solr/update/SolrIndexWriter.java | 144 +++++ .../apache/solr/util/stats/MetricUtils.java | 79 ++- .../conf/solrconfig-indexmetrics.xml | 57 ++ .../test/org/apache/solr/core/TestConfig.java | 2 + .../solr/handler/TestReplicationHandler.java | 13 +- .../CoreMergeIndexesAdminHandlerTest.java | 10 +- .../handler/admin/MetricsHandlerTest.java | 6 +- .../solr/update/SolrIndexConfigTest.java | 1 + .../solr/update/SolrIndexMetricsTest.java | 94 ++++ .../solr/util/stats/MetricUtilsTest.java | 24 +- 17 files changed, 945 insertions(+), 48 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/core/MetricsDirectoryFactory.java create mode 100644 solr/core/src/test-files/solr/collection1/conf/solrconfig-indexmetrics.xml create mode 100644 solr/core/src/test/org/apache/solr/update/SolrIndexMetricsTest.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 8609f9139bc..afcd295db73 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -212,6 +212,8 @@ New Features * SOLR-9896: Instrument and collect metrics from query, update, core admin and core load thread pools. (shalin) +* SOLR-9854: Collect metrics for index merges and index store IO (ab) + Optimizations ---------------------- * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have diff --git a/solr/core/src/java/org/apache/solr/core/MetricsDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/MetricsDirectoryFactory.java new file mode 100644 index 00000000000..62e82ac88db --- /dev/null +++ b/solr/core/src/java/org/apache/solr/core/MetricsDirectoryFactory.java @@ -0,0 +1,511 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.core; + +import java.io.IOException; +import java.util.Collection; + +import com.codahale.metrics.Histogram; +import com.codahale.metrics.Meter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterDirectory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.LockFactory; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.metrics.SolrMetricManager; +import org.apache.solr.util.plugin.SolrCoreAware; + +/** + * An implementation of {@link DirectoryFactory} that decorates provided factory by + * adding metrics for directory IO operations. + */ +public class MetricsDirectoryFactory extends DirectoryFactory implements SolrCoreAware { + private final SolrMetricManager metricManager; + private final String registry; + private final DirectoryFactory in; + private boolean directoryDetails = false; + + public MetricsDirectoryFactory(SolrMetricManager metricManager, String registry, DirectoryFactory in) { + this.metricManager = metricManager; + this.registry = registry; + this.in = in; + } + + public DirectoryFactory getDelegate() { + return in; + } + + /** + * Currently the following arguments are supported: + *

      + *
    • directoryDetails - (optional bool) when true then additional detailed metrics + * will be collected. These include eg. IO size histograms and per-file counters and histograms
    • + *
    + * @param args init args + */ + @Override + public void init(NamedList args) { + // should be already inited + // in.init(args); + if (args == null) { + return; + } + Boolean dd = args.getBooleanArg("directoryDetails"); + if (dd != null) { + directoryDetails = dd; + } else { + directoryDetails = false; + } + } + + @Override + public void doneWithDirectory(Directory dir) throws IOException { + // unwrap + if (dir instanceof MetricsDirectory) { + dir = ((MetricsDirectory)dir).getDelegate(); + } + in.doneWithDirectory(dir); + } + + @Override + public void addCloseListener(Directory dir, CachingDirectoryFactory.CloseListener closeListener) { + // unwrap + if (dir instanceof MetricsDirectory) { + dir = ((MetricsDirectory)dir).getDelegate(); + } + in.addCloseListener(dir, closeListener); + } + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + protected Directory create(String path, LockFactory lockFactory, DirContext dirContext) throws IOException { + Directory dir = in.create(path, lockFactory, dirContext); + return new MetricsDirectory(metricManager, registry, dir, directoryDetails); + } + + @Override + protected LockFactory createLockFactory(String rawLockType) throws IOException { + return in.createLockFactory(rawLockType); + } + + @Override + public boolean exists(String path) throws IOException { + return in.exists(path); + } + + @Override + public void remove(Directory dir) throws IOException { + // unwrap + if (dir instanceof MetricsDirectory) { + dir = ((MetricsDirectory)dir).getDelegate(); + } + in.remove(dir); + } + + @Override + public void remove(Directory dir, boolean afterCoreClose) throws IOException { + // unwrap + if (dir instanceof MetricsDirectory) { + dir = ((MetricsDirectory)dir).getDelegate(); + } + in.remove(dir, afterCoreClose); + } + + @Override + public boolean isSharedStorage() { + return in.isSharedStorage(); + } + + @Override + public boolean isAbsolute(String path) { + return in.isAbsolute(path); + } + + @Override + public boolean searchersReserveCommitPoints() { + return in.searchersReserveCommitPoints(); + } + + @Override + public String getDataHome(CoreDescriptor cd) throws IOException { + return in.getDataHome(cd); + } + + @Override + public long size(Directory directory) throws IOException { + return in.size(directory); + } + + @Override + public long size(String path) throws IOException { + return in.size(path); + } + + @Override + public Collection offerMBeans() { + return in.offerMBeans(); + } + + @Override + public void cleanupOldIndexDirectories(String dataDirPath, String currentIndexDirPath) { + in.cleanupOldIndexDirectories(dataDirPath, currentIndexDirPath); + } + + @Override + public void remove(String path, boolean afterCoreClose) throws IOException { + in.remove(path, afterCoreClose); + } + + @Override + public void remove(String path) throws IOException { + in.remove(path); + } + + @Override + public void move(Directory fromDir, Directory toDir, String fileName, IOContext ioContext) throws IOException { + in.move(fromDir, toDir, fileName, ioContext); + } + + @Override + public Directory get(String path, DirContext dirContext, String rawLockType) throws IOException { + Directory dir = in.get(path, dirContext, rawLockType); + if (dir instanceof MetricsDirectory) { + return dir; + } else { + return new MetricsDirectory(metricManager, registry, dir, directoryDetails); + } + } + + @Override + public void renameWithOverwrite(Directory dir, String fileName, String toName) throws IOException { + super.renameWithOverwrite(dir, fileName, toName); + } + + @Override + public String normalize(String path) throws IOException { + return in.normalize(path); + } + + @Override + protected boolean deleteOldIndexDirectory(String oldDirPath) throws IOException { + return in.deleteOldIndexDirectory(oldDirPath); + } + + @Override + public void initCoreContainer(CoreContainer cc) { + in.initCoreContainer(cc); + } + + @Override + protected Directory getBaseDir(Directory dir) { + return in.getBaseDir(dir); + } + + @Override + public void incRef(Directory dir) { + // unwrap + if (dir instanceof MetricsDirectory) { + dir = ((MetricsDirectory)dir).getDelegate(); + } + in.incRef(dir); + } + + @Override + public boolean isPersistent() { + return in.isPersistent(); + } + + @Override + public void inform(SolrCore core) { + if (in instanceof SolrCoreAware) { + ((SolrCoreAware)in).inform(core); + } + } + + @Override + public void release(Directory dir) throws IOException { + // unwrap + if (dir instanceof MetricsDirectory) { + dir = ((MetricsDirectory)dir).getDelegate(); + } + in.release(dir); + } + + + + private static final String SEGMENTS = "segments"; + private static final String SEGMENTS_PREFIX = "segments_"; + private static final String PENDING_SEGMENTS_PREFIX = "pending_segments_"; + private static final String TEMP = "temp"; + private static final String OTHER = "other"; + + public static class MetricsDirectory extends FilterDirectory { + + private final Directory in; + private final String registry; + private final SolrMetricManager metricManager; + private final Meter totalReads; + private final Histogram totalReadSizes; + private final Meter totalWrites; + private final Histogram totalWriteSizes; + private final boolean directoryDetails; + + private final String PREFIX = SolrInfoMBean.Category.DIRECTORY.toString() + "."; + + public MetricsDirectory(SolrMetricManager metricManager, String registry, Directory in, boolean directoryDetails) throws IOException { + super(in); + this.metricManager = metricManager; + this.registry = registry; + this.in = in; + this.directoryDetails = directoryDetails; + this.totalReads = metricManager.meter(registry, "reads", SolrInfoMBean.Category.DIRECTORY.toString(), "total"); + this.totalWrites = metricManager.meter(registry, "writes", SolrInfoMBean.Category.DIRECTORY.toString(), "total"); + if (directoryDetails) { + this.totalReadSizes = metricManager.histogram(registry, "readSizes", SolrInfoMBean.Category.DIRECTORY.toString(), "total"); + this.totalWriteSizes = metricManager.histogram(registry, "writeSizes", SolrInfoMBean.Category.DIRECTORY.toString(), "total"); + } else { + this.totalReadSizes = null; + this.totalWriteSizes = null; + } + } + + private String getMetricName(String name, boolean output) { + if (!directoryDetails) { + return null; + } + String lastName; + if (name.startsWith(SEGMENTS_PREFIX) || name.startsWith(PENDING_SEGMENTS_PREFIX)) { + lastName = SEGMENTS; + } else { + int pos = name.lastIndexOf('.'); + if (pos != -1 && name.length() > pos + 1) { + lastName = name.substring(pos + 1); + } else { + lastName = OTHER; + } + } + StringBuilder sb = new StringBuilder(PREFIX); + sb.append(lastName); + sb.append('.'); + if (output) { + sb.append("write"); + } else { + sb.append("read"); + } + return sb.toString(); + } + + @Override + public IndexOutput createOutput(String name, IOContext context) throws IOException { + IndexOutput output = in.createOutput(name, context); + if (output != null) { + return new MetricsOutput(totalWrites, totalWriteSizes, metricManager, registry, getMetricName(name, true), output); + } else { + return null; + } + } + + @Override + public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException { + IndexOutput output = in.createTempOutput(prefix, suffix, context); + if (output != null) { + return new MetricsOutput(totalWrites, totalWriteSizes, metricManager, registry, getMetricName(TEMP, true), output); + } else { + return null; + } + } + + @Override + public IndexInput openInput(String name, IOContext context) throws IOException { + IndexInput input = in.openInput(name, context); + if (input != null) { + return new MetricsInput(totalReads, totalReadSizes, metricManager, registry, getMetricName(name, false), input); + } else { + return null; + } + } + } + + public static class MetricsOutput extends IndexOutput { + private final IndexOutput in; + private final Histogram histogram; + private final Meter meter; + private final Meter totalMeter; + private final Histogram totalHistogram; + private final boolean withDetails; + + public MetricsOutput(Meter totalMeter, Histogram totalHistogram, SolrMetricManager metricManager, + String registry, String metricName, IndexOutput in) { + super(in.toString(), in.getName()); + this.in = in; + this.totalMeter = totalMeter; + this.totalHistogram = totalHistogram; + if (metricName != null && totalHistogram != null) { + withDetails = true; + String histName = metricName + "Sizes"; + String meterName = metricName + "s"; + this.histogram = metricManager.histogram(registry, histName); + this.meter = metricManager.meter(registry, meterName); + } else { + withDetails = false; + this.histogram = null; + this.meter = null; + } + } + + @Override + public void writeByte(byte b) throws IOException { + in.writeByte(b); + totalMeter.mark(); + if (withDetails) { + totalHistogram.update(1); + meter.mark(); + histogram.update(1); + } + } + + @Override + public void writeBytes(byte[] b, int offset, int length) throws IOException { + in.writeBytes(b, offset, length); + totalMeter.mark(length); + if (withDetails) { + totalHistogram.update(length); + meter.mark(length); + histogram.update(length); + } + } + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + public long getFilePointer() { + return in.getFilePointer(); + } + + @Override + public long getChecksum() throws IOException { + return in.getChecksum(); + } + } + + public static class MetricsInput extends IndexInput { + private final IndexInput in; + private final Meter totalMeter; + private final Histogram totalHistogram; + private final Histogram histogram; + private final Meter meter; + private final boolean withDetails; + + public MetricsInput(Meter totalMeter, Histogram totalHistogram, SolrMetricManager metricManager, String registry, String metricName, IndexInput in) { + super(in.toString()); + this.in = in; + this.totalMeter = totalMeter; + this.totalHistogram = totalHistogram; + if (metricName != null && totalHistogram != null) { + withDetails = true; + String histName = metricName + "Sizes"; + String meterName = metricName + "s"; + this.histogram = metricManager.histogram(registry, histName); + this.meter = metricManager.meter(registry, meterName); + } else { + withDetails = false; + this.histogram = null; + this.meter = null; + } + } + + public MetricsInput(Meter totalMeter, Histogram totalHistogram, Histogram histogram, Meter meter, IndexInput in) { + super(in.toString()); + this.in = in; + this.totalMeter = totalMeter; + this.totalHistogram = totalHistogram; + this.histogram = histogram; + this.meter = meter; + if (totalHistogram != null && meter != null && histogram != null) { + withDetails = true; + } else { + withDetails = false; + } + } + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + public long getFilePointer() { + return in.getFilePointer(); + } + + @Override + public void seek(long pos) throws IOException { + in.seek(pos); + } + + @Override + public long length() { + return in.length(); + } + + @Override + public IndexInput clone() { + return new MetricsInput(totalMeter, totalHistogram, histogram, meter, in.clone()); + } + + @Override + public IndexInput slice(String sliceDescription, long offset, long length) throws IOException { + IndexInput slice = in.slice(sliceDescription, offset, length); + if (slice != null) { + return new MetricsInput(totalMeter, totalHistogram, histogram, meter, slice); + } else { + return null; + } + } + + @Override + public byte readByte() throws IOException { + totalMeter.mark(); + if (withDetails) { + totalHistogram.update(1); + meter.mark(); + histogram.update(1); + } + return in.readByte(); + } + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + totalMeter.mark(len); + if (withDetails) { + totalHistogram.update(len); + meter.mark(len); + histogram.update(len); + } + in.readBytes(b, offset, len); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java index a8d7738ea06..a9fec5ad142 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java @@ -642,7 +642,14 @@ public final class SolrCore implements SolrInfoMBean, Closeable { dirFactory = new NRTCachingDirectoryFactory(); dirFactory.initCoreContainer(getCoreDescriptor().getCoreContainer()); } - return dirFactory; + if (solrConfig.indexConfig.metricsInfo != null && solrConfig.indexConfig.metricsInfo.isEnabled()) { + final DirectoryFactory factory = new MetricsDirectoryFactory(coreDescriptor.getCoreContainer().getMetricManager(), + coreMetricManager.getRegistryName(), dirFactory); + factory.init(solrConfig.indexConfig.metricsInfo.initArgs); + return factory; + } else { + return dirFactory; + } } private void initIndexReaderFactory() { @@ -846,6 +853,8 @@ public final class SolrCore implements SolrInfoMBean, Closeable { resourceLoader = config.getResourceLoader(); this.solrConfig = config; this.configSetProperties = configSetProperties; + // Initialize the metrics manager + this.coreMetricManager = initCoreMetricManager(config); if (updateHandler == null) { directoryFactory = initDirectoryFactory(); @@ -863,17 +872,14 @@ public final class SolrCore implements SolrInfoMBean, Closeable { checkVersionFieldExistsInSchema(schema, coreDescriptor); - // Initialize the metrics manager - this.coreMetricManager = initCoreMetricManager(config); - SolrMetricManager metricManager = this.coreDescriptor.getCoreContainer().getMetricManager(); // initialize searcher-related metrics - newSearcherCounter = metricManager.counter(coreMetricManager.getRegistryName(), "newSearcher"); - newSearcherTimer = metricManager.timer(coreMetricManager.getRegistryName(), "newSearcherTime"); - newSearcherWarmupTimer = metricManager.timer(coreMetricManager.getRegistryName(), "newSearcherWarmup"); - newSearcherMaxReachedCounter = metricManager.counter(coreMetricManager.getRegistryName(), "newSearcherMaxReached"); - newSearcherOtherErrorsCounter = metricManager.counter(coreMetricManager.getRegistryName(), "newSearcherErrors"); + newSearcherCounter = metricManager.counter(coreMetricManager.getRegistryName(), "new", Category.SEARCHER.toString()); + newSearcherTimer = metricManager.timer(coreMetricManager.getRegistryName(), "time", Category.SEARCHER.toString(), "new"); + newSearcherWarmupTimer = metricManager.timer(coreMetricManager.getRegistryName(), "warmup", Category.SEARCHER.toString(), "new"); + newSearcherMaxReachedCounter = metricManager.counter(coreMetricManager.getRegistryName(), "maxReached", Category.SEARCHER.toString(), "new"); + newSearcherOtherErrorsCounter = metricManager.counter(coreMetricManager.getRegistryName(), "errors", Category.SEARCHER.toString(), "new"); // Initialize JMX this.infoRegistry = initInfoRegistry(name, config); diff --git a/solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java b/solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java index 34482cd14d4..eba29646974 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java +++ b/solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java @@ -114,6 +114,9 @@ public class SolrDeletionPolicy extends IndexDeletionPolicy implements NamedList protected void appendDetails(StringBuilder sb, IndexCommit c) { Directory dir = c.getDirectory(); + if (dir instanceof MetricsDirectoryFactory.MetricsDirectory) { // unwrap + dir = ((MetricsDirectoryFactory.MetricsDirectory) dir).getDelegate(); + } if (dir instanceof FSDirectory) { FSDirectory fsd = (FSDirectory) dir; sb.append("dir=").append(fsd.getDirectory()); @@ -194,6 +197,9 @@ public class SolrDeletionPolicy extends IndexDeletionPolicy implements NamedList private String getId(IndexCommit commit) { StringBuilder sb = new StringBuilder(); Directory dir = commit.getDirectory(); + if (dir instanceof MetricsDirectoryFactory.MetricsDirectory) { // unwrap + dir = ((MetricsDirectoryFactory.MetricsDirectory) dir).getDelegate(); + } // For anything persistent, make something that will // be the same, regardless of the Directory instance. diff --git a/solr/core/src/java/org/apache/solr/core/SolrInfoMBean.java b/solr/core/src/java/org/apache/solr/core/SolrInfoMBean.java index c5fb84b0db5..c64af474961 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrInfoMBean.java +++ b/solr/core/src/java/org/apache/solr/core/SolrInfoMBean.java @@ -32,7 +32,7 @@ public interface SolrInfoMBean { /** * Category of {@link SolrCore} component. */ - enum Category { CORE, QUERYHANDLER, UPDATEHANDLER, CACHE, HIGHLIGHTING, QUERYPARSER, OTHER } + enum Category { CORE, QUERYHANDLER, UPDATEHANDLER, CACHE, HIGHLIGHTING, QUERYPARSER, SEARCHER, INDEX, DIRECTORY, OTHER } /** * Top-level group of beans for a subsystem. diff --git a/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java index 1d8793afc68..37c15edb66b 100644 --- a/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java +++ b/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java @@ -142,7 +142,7 @@ public class StandardDirectoryFactory extends CachingDirectoryFactory { super.move(fromDir, toDir, fileName, ioContext); } - + // perform an atomic rename if possible public void renameWithOverwrite(Directory dir, String fileName, String toName) throws IOException { Directory baseDir = getBaseDir(dir); diff --git a/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java b/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java index 1a9801f9002..d484e85b13e 100644 --- a/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java +++ b/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java @@ -18,6 +18,7 @@ package org.apache.solr.update; import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.util.Collections; import java.util.List; import java.util.Map; @@ -78,6 +79,7 @@ public class SolrIndexConfig implements MapSerializable { public final PluginInfo mergePolicyInfo; public final PluginInfo mergePolicyFactoryInfo; public final PluginInfo mergeSchedulerInfo; + public final PluginInfo metricsInfo; public final PluginInfo mergedSegmentWarmerInfo; @@ -99,6 +101,8 @@ public class SolrIndexConfig implements MapSerializable { mergePolicyFactoryInfo = null; mergeSchedulerInfo = null; mergedSegmentWarmerInfo = null; + // enable coarse-grained metrics by default + metricsInfo = new PluginInfo("metrics", Collections.emptyMap(), null, null); } /** @@ -144,6 +148,12 @@ public class SolrIndexConfig implements MapSerializable { writeLockTimeout=solrConfig.getInt(prefix+"/writeLockTimeout", def.writeLockTimeout); lockType=solrConfig.get(prefix+"/lockType", def.lockType); + List infos = solrConfig.readPluginInfos(prefix + "/metrics", false, false); + if (infos.isEmpty()) { + metricsInfo = def.metricsInfo; + } else { + metricsInfo = infos.get(0); + } mergeSchedulerInfo = getPluginInfo(prefix + "/mergeScheduler", solrConfig, def.mergeSchedulerInfo); mergePolicyInfo = getPluginInfo(prefix + "/mergePolicy", solrConfig, def.mergePolicyInfo); mergePolicyFactoryInfo = getPluginInfo(prefix + "/mergePolicyFactory", solrConfig, def.mergePolicyFactoryInfo); @@ -197,6 +207,9 @@ public class SolrIndexConfig implements MapSerializable { "lockType", lockType, "infoStreamEnabled", infoStream != InfoStream.NO_OUTPUT); if(mergeSchedulerInfo != null) m.put("mergeScheduler",mergeSchedulerInfo); + if (metricsInfo != null) { + m.put("metrics", metricsInfo); + } if (mergePolicyInfo != null) { m.put("mergePolicy", mergePolicyInfo); } else if (mergePolicyFactoryInfo != null) { @@ -237,7 +250,8 @@ public class SolrIndexConfig implements MapSerializable { iwc.setSimilarity(schema.getSimilarity()); MergePolicy mergePolicy = buildMergePolicy(schema); iwc.setMergePolicy(mergePolicy); - iwc.setMergeScheduler(buildMergeScheduler(schema)); + MergeScheduler mergeScheduler = buildMergeScheduler(schema); + iwc.setMergeScheduler(mergeScheduler); iwc.setInfoStream(infoStream); if (mergePolicy instanceof SortingMergePolicy) { diff --git a/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java b/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java index d75214aeca7..626bc8eeb90 100644 --- a/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java +++ b/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java @@ -20,12 +20,19 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import com.codahale.metrics.Counter; +import com.codahale.metrics.Gauge; +import com.codahale.metrics.Meter; +import com.codahale.metrics.Timer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.index.IndexDeletionPolicy; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.SegmentCommitInfo; import org.apache.lucene.store.Directory; import org.apache.lucene.util.InfoStream; import org.apache.solr.common.util.IOUtils; @@ -33,6 +40,8 @@ import org.apache.solr.common.util.SuppressForbidden; import org.apache.solr.core.DirectoryFactory; import org.apache.solr.core.DirectoryFactory.DirContext; import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrInfoMBean; +import org.apache.solr.metrics.SolrMetricManager; import org.apache.solr.schema.IndexSchema; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -60,6 +69,28 @@ public class SolrIndexWriter extends IndexWriter { private InfoStream infoStream; private Directory directory; + // metrics + private long majorMergeDocs = 512 * 1024; + private final Timer majorMerge; + private final Timer minorMerge; + private final Meter majorMergedDocs; + private final Meter majorDeletedDocs; + private final Counter mergeErrors; + private final Meter flushMeter; // original counter is package-private in IndexWriter + private final boolean mergeDetails; + private final AtomicInteger runningMajorMerges = new AtomicInteger(); + private final Gauge runningMajorMergesGauge; + private final AtomicInteger runningMinorMerges = new AtomicInteger(); + private final Gauge runningMinorMergesGauge; + private final AtomicInteger runningMajorMergesSegments = new AtomicInteger(); + private final Gauge runningMajorMergesSegmentsGauge; + private final AtomicInteger runningMinorMergesSegments = new AtomicInteger(); + private final Gauge runningMinorMergesSegmentsGauge; + private final AtomicLong runningMajorMergesDocs = new AtomicLong(); + private final Gauge runningMajorMergesDocsGauge; + private final AtomicLong runningMinorMergesDocs = new AtomicLong(); + private final Gauge runningMinorMergesDocsGauge; + public static SolrIndexWriter create(SolrCore core, String name, String path, DirectoryFactory directoryFactory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException { SolrIndexWriter w = null; @@ -84,6 +115,20 @@ public class SolrIndexWriter extends IndexWriter { this.directory = d; numOpens.incrementAndGet(); log.debug("Opened Writer " + name); + // no metrics + minorMerge = null; + majorMerge = null; + mergeErrors = null; + majorMergedDocs = null; + majorDeletedDocs = null; + runningMinorMergesGauge = null; + runningMinorMergesDocsGauge = null; + runningMinorMergesSegmentsGauge = null; + runningMajorMergesGauge = null; + runningMajorMergesDocsGauge = null; + runningMajorMergesSegmentsGauge = null; + flushMeter = null; + mergeDetails = false; } private SolrIndexWriter(SolrCore core, String name, String path, Directory directory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException { @@ -97,6 +142,51 @@ public class SolrIndexWriter extends IndexWriter { infoStream = getConfig().getInfoStream(); this.directory = directory; numOpens.incrementAndGet(); + SolrMetricManager metricManager = core.getCoreDescriptor().getCoreContainer().getMetricManager(); + String registry = core.getCoreMetricManager().getRegistryName(); + minorMerge = metricManager.timer(registry, "minor", SolrInfoMBean.Category.INDEX.toString(), "merge"); + majorMerge = metricManager.timer(registry, "major", SolrInfoMBean.Category.INDEX.toString(), "merge"); + mergeErrors = metricManager.counter(registry, "errors", SolrInfoMBean.Category.INDEX.toString(), "merge"); + runningMajorMergesGauge = () -> runningMajorMerges.get(); + runningMinorMergesGauge = () -> runningMinorMerges.get(); + runningMajorMergesDocsGauge = () -> runningMajorMergesDocs.get(); + runningMinorMergesDocsGauge = () -> runningMinorMergesDocs.get(); + runningMajorMergesSegmentsGauge = () -> runningMajorMergesSegments.get(); + runningMinorMergesSegmentsGauge = () -> runningMinorMergesSegments.get(); + metricManager.register(registry, runningMajorMergesGauge, true, "running", SolrInfoMBean.Category.INDEX.toString(), "merge", "major"); + metricManager.register(registry, runningMinorMergesGauge, true, "running", SolrInfoMBean.Category.INDEX.toString(), "merge", "minor"); + metricManager.register(registry, runningMajorMergesDocsGauge, true, "running.docs", SolrInfoMBean.Category.INDEX.toString(), "merge", "major"); + metricManager.register(registry, runningMinorMergesDocsGauge, true, "running.docs", SolrInfoMBean.Category.INDEX.toString(), "merge", "minor"); + metricManager.register(registry, runningMajorMergesSegmentsGauge, true, "running.segments", SolrInfoMBean.Category.INDEX.toString(), "merge", "major"); + metricManager.register(registry, runningMinorMergesSegmentsGauge, true, "running.segments", SolrInfoMBean.Category.INDEX.toString(), "merge", "minor"); + flushMeter = metricManager.meter(registry, "flush", SolrInfoMBean.Category.INDEX.toString()); + if (config.metricsInfo != null && config.metricsInfo.initArgs != null) { + Object v = config.metricsInfo.initArgs.get("majorMergeDocs"); + if (v != null) { + try { + majorMergeDocs = Long.parseLong(String.valueOf(v)); + } catch (Exception e) { + log.warn("Invalid 'majorMergeDocs' argument, using default 512k", e); + } + } + Boolean Details = config.metricsInfo.initArgs.getBooleanArg("mergeDetails"); + if (Details != null) { + mergeDetails = Details; + } else { + mergeDetails = false; + } + if (mergeDetails) { + majorMergedDocs = metricManager.meter(registry, "docs", SolrInfoMBean.Category.INDEX.toString(), "merge", "major"); + majorDeletedDocs = metricManager.meter(registry, "deletedDocs", SolrInfoMBean.Category.INDEX.toString(), "merge", "major"); + } else { + majorMergedDocs = null; + majorDeletedDocs = null; + } + } else { + mergeDetails = false; + majorMergedDocs = null; + majorDeletedDocs = null; + } } @SuppressForbidden(reason = "Need currentTimeMillis, commit time should be used only for debugging purposes, " + @@ -112,6 +202,60 @@ public class SolrIndexWriter extends IndexWriter { this.directoryFactory = factory; } + // we override this method to collect metrics for merges. + @Override + public void merge(MergePolicy.OneMerge merge) throws IOException { + long deletedDocs = 0; + long totalNumDocs = merge.totalNumDocs(); + for (SegmentCommitInfo info : merge.segments) { + totalNumDocs -= info.getDelCount(); + deletedDocs += info.getDelCount(); + } + boolean major = totalNumDocs > majorMergeDocs; + int segmentsCount = merge.segments.size(); + Timer.Context context; + if (major) { + runningMajorMerges.incrementAndGet(); + runningMajorMergesDocs.addAndGet(totalNumDocs); + runningMajorMergesSegments.addAndGet(segmentsCount); + if (mergeDetails) { + majorMergedDocs.mark(totalNumDocs); + majorDeletedDocs.mark(deletedDocs); + } + context = majorMerge.time(); + } else { + runningMinorMerges.incrementAndGet(); + runningMinorMergesDocs.addAndGet(totalNumDocs); + runningMinorMergesSegments.addAndGet(segmentsCount); + context = minorMerge.time(); + } + try { + super.merge(merge); + } catch (Throwable t) { + mergeErrors.inc(); + throw t; + } finally { + context.stop(); + if (major) { + runningMajorMerges.decrementAndGet(); + runningMajorMergesDocs.addAndGet(-totalNumDocs); + runningMajorMergesSegments.addAndGet(-segmentsCount); + } else { + runningMinorMerges.decrementAndGet(); + runningMinorMergesDocs.addAndGet(-totalNumDocs); + runningMinorMergesSegments.addAndGet(-segmentsCount); + } + } + } + + @Override + protected void doAfterFlush() throws IOException { + if (flushMeter != null) { // this is null when writer is used only for snapshot cleanup + flushMeter.mark(); + } + super.doAfterFlush(); + } + /** * use DocumentBuilder now... * private final void addField(Document doc, String name, String val) { diff --git a/solr/core/src/java/org/apache/solr/util/stats/MetricUtils.java b/solr/core/src/java/org/apache/solr/util/stats/MetricUtils.java index af5a0b5ff42..4a83c869448 100644 --- a/solr/core/src/java/org/apache/solr/util/stats/MetricUtils.java +++ b/solr/core/src/java/org/apache/solr/util/stats/MetricUtils.java @@ -40,7 +40,7 @@ import org.apache.solr.common.util.NamedList; public class MetricUtils { /** - * Adds metrics from a Timer to a NamedList, using well-known names. + * Adds metrics from a Timer to a NamedList, using well-known back-compat names. * @param lst The NamedList to add the metrics data to * @param timer The Timer to extract the metrics from */ @@ -68,7 +68,7 @@ public class MetricUtils { } /** - * Returns a NamedList respresentation of the given metric registry. Only those metrics + * Returns a NamedList representation of the given metric registry. Only those metrics * are converted to NamedList which match at least one of the given MetricFilter instances. * * @param registry the {@link MetricRegistry} to be converted to NamedList @@ -104,31 +104,74 @@ public class MetricUtils { static NamedList histogramToNamedList(Histogram histogram) { NamedList response = new NamedList(); Snapshot snapshot = histogram.getSnapshot(); - response.add("requests", histogram.getCount()); - response.add("minTime", nsToMs(snapshot.getMin())); - response.add("maxTime", nsToMs(snapshot.getMax())); - response.add("avgTimePerRequest", nsToMs(snapshot.getMean())); - response.add("medianRequestTime", nsToMs(snapshot.getMedian())); - response.add("75thPcRequestTime", nsToMs(snapshot.get75thPercentile())); - response.add("95thPcRequestTime", nsToMs(snapshot.get95thPercentile())); - response.add("99thPcRequestTime", nsToMs(snapshot.get99thPercentile())); - response.add("999thPcRequestTime", nsToMs(snapshot.get999thPercentile())); + response.add("count", histogram.getCount()); + // non-time based values + addSnapshot(response, snapshot, false); return response; } + // optionally convert ns to ms + static double nsToMs(boolean convert, double value) { + if (convert) { + return nsToMs(value); + } else { + return value; + } + } + + static final String MS = "_ms"; + + static final String MIN = "min"; + static final String MIN_MS = MIN + MS; + static final String MAX = "max"; + static final String MAX_MS = MAX + MS; + static final String MEAN = "mean"; + static final String MEAN_MS = MEAN + MS; + static final String MEDIAN = "median"; + static final String MEDIAN_MS = MEDIAN + MS; + static final String STDDEV = "stddev"; + static final String STDDEV_MS = STDDEV + MS; + static final String P75 = "p75"; + static final String P75_MS = P75 + MS; + static final String P95 = "p95"; + static final String P95_MS = P95 + MS; + static final String P99 = "p99"; + static final String P99_MS = P99 + MS; + static final String P999 = "p999"; + static final String P999_MS = P999 + MS; + + // some snapshots represent time in ns, other snapshots represent raw values (eg. chunk size) + static void addSnapshot(NamedList response, Snapshot snapshot, boolean ms) { + response.add((ms ? MIN_MS: MIN), nsToMs(ms, snapshot.getMin())); + response.add((ms ? MAX_MS: MAX), nsToMs(ms, snapshot.getMax())); + response.add((ms ? MEAN_MS : MEAN), nsToMs(ms, snapshot.getMean())); + response.add((ms ? MEDIAN_MS: MEDIAN), nsToMs(ms, snapshot.getMedian())); + response.add((ms ? STDDEV_MS: STDDEV), nsToMs(ms, snapshot.getStdDev())); + response.add((ms ? P75_MS: P75), nsToMs(ms, snapshot.get75thPercentile())); + response.add((ms ? P95_MS: P95), nsToMs(ms, snapshot.get95thPercentile())); + response.add((ms ? P99_MS: P99), nsToMs(ms, snapshot.get99thPercentile())); + response.add((ms ? P999_MS: P999), nsToMs(ms, snapshot.get999thPercentile())); + } + static NamedList timerToNamedList(Timer timer) { NamedList response = new NamedList(); - addMetrics(response, timer); + response.add("count", timer.getCount()); + response.add("meanRate", timer.getMeanRate()); + response.add("1minRate", timer.getOneMinuteRate()); + response.add("5minRate", timer.getFiveMinuteRate()); + response.add("15minRate", timer.getFifteenMinuteRate()); + // time-based values in nanoseconds + addSnapshot(response, timer.getSnapshot(), true); return response; } static NamedList meterToNamedList(Meter meter) { NamedList response = new NamedList(); - response.add("requests", meter.getCount()); - response.add("avgRequestsPerSecond", meter.getMeanRate()); - response.add("1minRateRequestsPerSecond", meter.getOneMinuteRate()); - response.add("5minRateRequestsPerSecond", meter.getFiveMinuteRate()); - response.add("15minRateRequestsPerSecond", meter.getFifteenMinuteRate()); + response.add("count", meter.getCount()); + response.add("meanRate", meter.getMeanRate()); + response.add("1minRate", meter.getOneMinuteRate()); + response.add("5minRate", meter.getFiveMinuteRate()); + response.add("15minRate", meter.getFifteenMinuteRate()); return response; } @@ -140,7 +183,7 @@ public class MetricUtils { static NamedList counterToNamedList(Counter counter) { NamedList response = new NamedList(); - response.add("requests", counter.getCount()); + response.add("count", counter.getCount()); return response; } diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-indexmetrics.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-indexmetrics.xml new file mode 100644 index 00000000000..1acf18dd0b5 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-indexmetrics.xml @@ -0,0 +1,57 @@ + + + + + + + ${solr.data.dir:} + + + + + ${tests.luceneMatchVersion:LATEST} + + + + ${solr.tests.directoryDetails:false} + ${solr.tests.mergeDetails:false} + + + 3 + 100 + + + + + + + ${solr.commitwithin.softcommit:true} + + + + + + explicit + true + text + + + + diff --git a/solr/core/src/test/org/apache/solr/core/TestConfig.java b/solr/core/src/test/org/apache/solr/core/TestConfig.java index 55e1e176576..8244b32d94a 100644 --- a/solr/core/src/test/org/apache/solr/core/TestConfig.java +++ b/solr/core/src/test/org/apache/solr/core/TestConfig.java @@ -128,6 +128,8 @@ public class TestConfig extends SolrTestCaseJ4 { ++numDefaultsTested; assertEquals("default infoStream", InfoStream.NO_OUTPUT, sic.infoStream); + ++numDefaultsTested; assertNotNull("default metrics", sic.metricsInfo); + // mergePolicyInfo and mergePolicyFactoryInfo are mutually exclusive // so ++ count them only once for both instead of individually ++numDefaultsTested; ++numNullDefaults; diff --git a/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java b/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java index 685ef9987f4..345b86db81f 100644 --- a/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java +++ b/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java @@ -66,6 +66,8 @@ import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.CachingDirectoryFactory; import org.apache.solr.core.CoreContainer; +import org.apache.solr.core.DirectoryFactory; +import org.apache.solr.core.MetricsDirectoryFactory; import org.apache.solr.core.SolrCore; import org.apache.solr.core.StandardDirectoryFactory; import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager; @@ -895,12 +897,21 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { } } + private CachingDirectoryFactory getCachingDirectoryFactory(SolrCore core) { + DirectoryFactory df = core.getDirectoryFactory(); + if (df instanceof MetricsDirectoryFactory) { + return (CachingDirectoryFactory)((MetricsDirectoryFactory)df).getDelegate(); + } else { + return (CachingDirectoryFactory)df; + } + } + private void checkForSingleIndex(JettySolrRunner jetty) { CoreContainer cores = jetty.getCoreContainer(); Collection theCores = cores.getCores(); for (SolrCore core : theCores) { String ddir = core.getDataDir(); - CachingDirectoryFactory dirFactory = (CachingDirectoryFactory) core.getDirectoryFactory(); + CachingDirectoryFactory dirFactory = getCachingDirectoryFactory(core); synchronized (dirFactory) { Set livePaths = dirFactory.getLivePaths(); // one for data, one for hte index under data and one for the snapshot metadata. diff --git a/solr/core/src/test/org/apache/solr/handler/admin/CoreMergeIndexesAdminHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/admin/CoreMergeIndexesAdminHandlerTest.java index 6f1a8029e86..937cc860ae9 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/CoreMergeIndexesAdminHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/CoreMergeIndexesAdminHandlerTest.java @@ -24,6 +24,8 @@ import org.apache.lucene.store.LockFactory; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.core.CoreContainer; +import org.apache.solr.core.DirectoryFactory; +import org.apache.solr.core.MetricsDirectoryFactory; import org.apache.solr.core.MockFSDirectoryFactory; import org.apache.solr.core.SolrCore; import org.apache.solr.response.SolrQueryResponse; @@ -75,7 +77,13 @@ public class CoreMergeIndexesAdminHandlerTest extends SolrTestCaseJ4 { final CoreAdminHandler admin = new CoreAdminHandler(cores); try (SolrCore core = cores.getCore("collection1")) { - FailingDirectoryFactory dirFactory = (FailingDirectoryFactory)core.getDirectoryFactory(); + DirectoryFactory df = core.getDirectoryFactory(); + FailingDirectoryFactory dirFactory; + if (df instanceof MetricsDirectoryFactory) { + dirFactory = (FailingDirectoryFactory)((MetricsDirectoryFactory)df).getDelegate(); + } else { + dirFactory = (FailingDirectoryFactory)df; + } try { dirFactory.fail = true; diff --git a/solr/core/src/test/org/apache/solr/handler/admin/MetricsHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/admin/MetricsHandlerTest.java index 3667285ad0d..e15778d5c0c 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/MetricsHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/MetricsHandlerTest.java @@ -49,9 +49,9 @@ public class MetricsHandlerTest extends SolrTestCaseJ4 { assertNotNull(values.get("solr.node")); NamedList nl = (NamedList) values.get("solr.core.collection1"); assertNotNull(nl); - assertNotNull(nl.get("newSearcherErrors")); // counter type - assertNotNull(((NamedList) nl.get("newSearcherErrors")).get("requests")); - assertEquals(0L, ((NamedList) nl.get("newSearcherErrors")).get("requests")); + assertNotNull(nl.get("SEARCHER.new.errors")); // counter type + assertNotNull(((NamedList) nl.get("SEARCHER.new.errors")).get("count")); + assertEquals(0L, ((NamedList) nl.get("SEARCHER.new.errors")).get("count")); nl = (NamedList) values.get("solr.node"); assertNotNull(nl.get("cores.loaded")); // int gauge assertEquals(1, ((NamedList) nl.get("cores.loaded")).get("value")); diff --git a/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java b/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java index 4fec5c3896f..7d1c4c7f982 100644 --- a/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java +++ b/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java @@ -197,6 +197,7 @@ public class SolrIndexConfigTest extends SolrTestCaseJ4 { } else { assertNull(m.get("mergedSegmentWarmer")); } + ++mSizeExpected; assertNotNull(m.get("metrics")); assertEquals(mSizeExpected, m.size()); } diff --git a/solr/core/src/test/org/apache/solr/update/SolrIndexMetricsTest.java b/solr/core/src/test/org/apache/solr/update/SolrIndexMetricsTest.java new file mode 100644 index 00000000000..e17b1bd2ad7 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/update/SolrIndexMetricsTest.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.update; + +import java.util.Map; + +import com.codahale.metrics.Histogram; +import com.codahale.metrics.Meter; +import com.codahale.metrics.Metric; +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Timer; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.request.SolrQueryRequest; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Test proper registration and collection of index and directory metrics. + */ +public class SolrIndexMetricsTest extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() throws Exception { + System.setProperty("solr.tests.mergeDetails", "true"); + System.setProperty("solr.tests.directoryDetails", "true"); + initCore("solrconfig-indexmetrics.xml", "schema.xml"); + } + + @Test + public void testIndexMetrics() throws Exception { + SolrQueryRequest req = lrf.makeRequest(); + UpdateHandler uh = req.getCore().getUpdateHandler(); + AddUpdateCommand add = new AddUpdateCommand(req); + for (int i = 0; i < 1000; i++) { + add.clear(); + add.solrDoc = new SolrInputDocument(); + add.solrDoc.addField("id", "" + i); + add.solrDoc.addField("foo_s", "foo-" + i); + uh.addDoc(add); + } + uh.commit(new CommitUpdateCommand(req, false)); + MetricRegistry registry = h.getCoreContainer().getMetricManager().registry(h.getCore().getCoreMetricManager().getRegistryName()); + assertNotNull(registry); + // make sure all merges are finished + h.reload(); + + Map metrics = registry.getMetrics(); + + assertTrue(metrics.entrySet().stream().filter(e -> e.getKey().startsWith("INDEX")).count() >= 12); + // this is variable, depending on the codec and the number of created files + assertTrue(metrics.entrySet().stream().filter(e -> e.getKey().startsWith("DIRECTORY")).count() > 50); + + // check basic index meters + Timer timer = (Timer)metrics.get("INDEX.merge.minor"); + assertEquals("minorMerge: " + timer.getCount(), 4, timer.getCount()); + timer = (Timer)metrics.get("INDEX.merge.major"); + assertEquals("majorMerge: " + timer.getCount(), 0, timer.getCount()); + Meter meter = (Meter)metrics.get("INDEX.merge.major.docs"); + assertEquals("majorMergeDocs: " + meter.getCount(), 0, meter.getCount()); + meter = (Meter)metrics.get("INDEX.flush"); + assertEquals("flush: " + meter.getCount(), 19, meter.getCount()); + + // check basic directory meters + meter = (Meter)metrics.get("DIRECTORY.total.reads"); + assertTrue("totalReads", meter.getCount() > 0); + meter = (Meter)metrics.get("DIRECTORY.total.writes"); + assertTrue("totalWrites", meter.getCount() > 0); + Histogram histogram = (Histogram)metrics.get("DIRECTORY.total.readSizes"); + assertTrue("readSizes", histogram.getCount() > 0); + histogram = (Histogram)metrics.get("DIRECTORY.total.writeSizes"); + assertTrue("writeSizes", histogram.getCount() > 0); + // check detailed meters + meter = (Meter)metrics.get("DIRECTORY.segments.writes"); + assertTrue("segmentsWrites", meter.getCount() > 0); + histogram = (Histogram)metrics.get("DIRECTORY.segments.writeSizes"); + assertTrue("segmentsWriteSizes", histogram.getCount() > 0); + + } +} diff --git a/solr/core/src/test/org/apache/solr/util/stats/MetricUtilsTest.java b/solr/core/src/test/org/apache/solr/util/stats/MetricUtilsTest.java index 31e8154c70e..e39ad6e6040 100644 --- a/solr/core/src/test/org/apache/solr/util/stats/MetricUtilsTest.java +++ b/solr/core/src/test/org/apache/solr/util/stats/MetricUtilsTest.java @@ -23,7 +23,6 @@ import com.codahale.metrics.Snapshot; import com.codahale.metrics.Timer; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.util.SimpleOrderedMap; import org.junit.Test; public class MetricUtilsTest extends SolrTestCaseJ4 { @@ -34,24 +33,23 @@ public class MetricUtilsTest extends SolrTestCaseJ4 { final Timer timer = new Timer(); final int iterations = random().nextInt(100); for (int i = 0; i < iterations; ++i) { - timer.update(random().nextInt(), TimeUnit.NANOSECONDS); + timer.update(Math.abs(random().nextInt()) + 1, TimeUnit.NANOSECONDS); } // obtain timer metrics - final NamedList lst = new SimpleOrderedMap<>(); - MetricUtils.addMetrics(lst, timer); + NamedList lst = MetricUtils.timerToNamedList(timer); // check that expected metrics were obtained - assertEquals(lst.size(), 9); + assertEquals(14, lst.size()); final Snapshot snapshot = timer.getSnapshot(); // cannot test avgRequestsPerMinute directly because mean rate changes as time increases! // assertEquals(lst.get("avgRequestsPerSecond"), timer.getMeanRate()); - assertEquals(lst.get("5minRateRequestsPerSecond"), timer.getFiveMinuteRate()); - assertEquals(lst.get("15minRateRequestsPerSecond"), timer.getFifteenMinuteRate()); - assertEquals(lst.get("avgTimePerRequest"), MetricUtils.nsToMs(snapshot.getMean())); - assertEquals(lst.get("medianRequestTime"), MetricUtils.nsToMs(snapshot.getMedian())); - assertEquals(lst.get("75thPcRequestTime"), MetricUtils.nsToMs(snapshot.get75thPercentile())); - assertEquals(lst.get("95thPcRequestTime"), MetricUtils.nsToMs(snapshot.get95thPercentile())); - assertEquals(lst.get("99thPcRequestTime"), MetricUtils.nsToMs(snapshot.get99thPercentile())); - assertEquals(lst.get("999thPcRequestTime"), MetricUtils.nsToMs(snapshot.get999thPercentile())); + assertEquals(timer.getFiveMinuteRate(), lst.get("5minRate")); + assertEquals(timer.getFifteenMinuteRate(), lst.get("15minRate")); + assertEquals(MetricUtils.nsToMs(snapshot.getMean()), lst.get("mean_ms")); + assertEquals(MetricUtils.nsToMs(snapshot.getMedian()), lst.get("median_ms")); + assertEquals(MetricUtils.nsToMs(snapshot.get75thPercentile()), lst.get("p75_ms")); + assertEquals(MetricUtils.nsToMs(snapshot.get95thPercentile()), lst.get("p95_ms")); + assertEquals(MetricUtils.nsToMs(snapshot.get99thPercentile()), lst.get("p99_ms")); + assertEquals(MetricUtils.nsToMs(snapshot.get999thPercentile()), lst.get("p999_ms")); } } From b4a002f7d88a2383852e2bfd95b39bf7f6e33f2f Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Tue, 3 Jan 2017 06:26:49 -0500 Subject: [PATCH 81/83] LUCENE-7588: DrillSideways can now run its queries concurrently --- lucene/CHANGES.txt | 5 + .../lucene/search/MultiCollectorManager.java | 105 ++++++ .../apache/lucene/facet/DrillSideways.java | 338 ++++++++++++++---- .../lucene/facet/FacetsCollectorManager.java | 55 +++ .../lucene/facet/TestDrillSideways.java | 309 +++++++++------- .../facet/TestParallelDrillSideways.java | 90 +++++ 6 files changed, 704 insertions(+), 198 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/MultiCollectorManager.java create mode 100644 lucene/facet/src/java/org/apache/lucene/facet/FacetsCollectorManager.java create mode 100644 lucene/facet/src/test/org/apache/lucene/facet/TestParallelDrillSideways.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 474080cf275..5b7a10c12f0 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -98,6 +98,11 @@ New features parsers, by enumerating all paths and creating the corresponding query/ies as sub-clauses (Matt Weber via Mike McCandless) +* LUCENE-7588: DrillSideways can now run queries concurrently, and + supports an IndexSearcher using an executor service to run each query + concurrently across all segments in the index (Emmanuel Keller via + Mike McCandless) + Bug Fixes * LUCENE-7547: JapaneseTokenizerFactory was failing to close the diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiCollectorManager.java b/lucene/core/src/java/org/apache/lucene/search/MultiCollectorManager.java new file mode 100644 index 00000000000..9549cde06e5 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/MultiCollectorManager.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import org.apache.lucene.index.LeafReaderContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +/** + * A {@link CollectorManager} implements which wrap a set of {@link CollectorManager} + * as {@link MultiCollector} acts for {@link Collector}. + */ +public class MultiCollectorManager implements CollectorManager { + + final private CollectorManager[] collectorManagers; + + public MultiCollectorManager(final CollectorManager... collectorManagers) { + this.collectorManagers = collectorManagers; + } + + @Override + public Collectors newCollector() throws IOException { + return new Collectors(); + } + + @Override + public Object[] reduce(Collection reducableCollectors) throws IOException { + final int size = reducableCollectors.size(); + final Object[] results = new Object[collectorManagers.length]; + for (int i = 0; i < collectorManagers.length; i++) { + final List reducableCollector = new ArrayList<>(size); + for (Collectors collectors : reducableCollectors) + reducableCollector.add(collectors.collectors[i]); + results[i] = collectorManagers[i].reduce(reducableCollector); + } + return results; + } + + public class Collectors implements Collector { + + private final Collector[] collectors; + + private Collectors() throws IOException { + collectors = new Collector[collectorManagers.length]; + for (int i = 0; i < collectors.length; i++) + collectors[i] = collectorManagers[i].newCollector(); + } + + @Override + final public LeafCollector getLeafCollector(final LeafReaderContext context) throws IOException { + return new LeafCollectors(context); + } + + @Override + final public boolean needsScores() { + for (Collector collector : collectors) + if (collector.needsScores()) + return true; + return false; + } + + public class LeafCollectors implements LeafCollector { + + private final LeafCollector[] leafCollectors; + + private LeafCollectors(final LeafReaderContext context) throws IOException { + leafCollectors = new LeafCollector[collectors.length]; + for (int i = 0; i < collectors.length; i++) + leafCollectors[i] = collectors[i].getLeafCollector(context); + } + + @Override + final public void setScorer(final Scorer scorer) throws IOException { + for (LeafCollector leafCollector : leafCollectors) + if (leafCollector != null) + leafCollector.setScorer(scorer); + } + + @Override + final public void collect(final int doc) throws IOException { + for (LeafCollector leafCollector : leafCollectors) + if (leafCollector != null) + leafCollector.collect(doc); + } + } + } + +} diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java index 57f0a32742b..61530bcf2d3 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java @@ -16,39 +16,47 @@ */ package org.apache.lucene.facet; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; - import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts; import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField; import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts; import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.search.FilterCollector; import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.FilterCollector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiCollector; +import org.apache.lucene.search.MultiCollectorManager; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.util.ThreadInterruptedException; -/** +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; + +/** * Computes drill down and sideways counts for the provided * {@link DrillDownQuery}. Drill sideways counts include * alternative values/aggregates for the drill-down * dimensions so that a dimension does not disappear after * the user drills down into it. - * *

    Use one of the static search * methods to do the search, and then get the hits and facet * results from the returned {@link DrillSidewaysResult}. - * *

    NOTE: this allocates one {@link * FacetsCollector} for each drill-down, plus one. If your * index has high number of facet labels then this will @@ -58,62 +66,96 @@ import org.apache.lucene.search.TopScoreDocCollector; */ public class DrillSideways { - /** {@link IndexSearcher} passed to constructor. */ + /** + * {@link IndexSearcher} passed to constructor. + */ protected final IndexSearcher searcher; - /** {@link TaxonomyReader} passed to constructor. */ + /** + * {@link TaxonomyReader} passed to constructor. + */ protected final TaxonomyReader taxoReader; - /** {@link SortedSetDocValuesReaderState} passed to - * constructor; can be null. */ + /** + * {@link SortedSetDocValuesReaderState} passed to + * constructor; can be null. + */ protected final SortedSetDocValuesReaderState state; - /** {@link FacetsConfig} passed to constructor. */ + /** + * {@link FacetsConfig} passed to constructor. + */ protected final FacetsConfig config; - /** Create a new {@code DrillSideways} instance. */ + // These are only used for multi-threaded search + private final ExecutorService executor; + + /** + * Create a new {@code DrillSideways} instance. + */ public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader) { this(searcher, config, taxoReader, null); } - - /** Create a new {@code DrillSideways} instance, assuming the categories were - * indexed with {@link SortedSetDocValuesFacetField}. */ + + /** + * Create a new {@code DrillSideways} instance, assuming the categories were + * indexed with {@link SortedSetDocValuesFacetField}. + */ public DrillSideways(IndexSearcher searcher, FacetsConfig config, SortedSetDocValuesReaderState state) { this(searcher, config, null, state); } - /** Create a new {@code DrillSideways} instance, where some - * dimensions were indexed with {@link - * SortedSetDocValuesFacetField} and others were indexed - * with {@link FacetField}. */ - public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader, SortedSetDocValuesReaderState state) { + /** + * Create a new {@code DrillSideways} instance, where some + * dimensions were indexed with {@link + * SortedSetDocValuesFacetField} and others were indexed + * with {@link FacetField}. + */ + public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader, + SortedSetDocValuesReaderState state) { + this(searcher, config, taxoReader, state, null); + } + + /** + * Create a new {@code DrillSideways} instance, where some + * dimensions were indexed with {@link + * SortedSetDocValuesFacetField} and others were indexed + * with {@link FacetField}. + *

    + * Use this constructor to use the concurrent implementation and/or the CollectorManager + */ + public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader, + SortedSetDocValuesReaderState state, ExecutorService executor) { this.searcher = searcher; this.config = config; this.taxoReader = taxoReader; this.state = state; + this.executor = executor; } - /** Subclass can override to customize per-dim Facets - * impl. */ - protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException { + /** + * Subclass can override to customize per-dim Facets + * impl. + */ + protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, + String[] drillSidewaysDims) throws IOException { Facets drillDownFacets; - Map drillSidewaysFacets = new HashMap<>(); + Map drillSidewaysFacets = new HashMap<>(); if (taxoReader != null) { drillDownFacets = new FastTaxonomyFacetCounts(taxoReader, config, drillDowns); if (drillSideways != null) { - for(int i=0;i drillDownDims = query.getDims(); + Map drillDownDims = query.getDims(); FacetsCollector drillDownCollector = new FacetsCollector(); - + if (drillDownDims.isEmpty()) { // There are no drill-down dims, so there is no // drill-sideways to compute: @@ -154,8 +196,10 @@ public class DrillSideways { for (int i = 0; i < drillSidewaysCollectors.length; i++) { drillSidewaysCollectors[i] = new FacetsCollector(); } - - DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, scoreSubDocsAtOnce()); + + DrillSidewaysQuery dsq = + new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, + scoreSubDocsAtOnce()); if (hitCollector.needsScores() == false) { // this is a horrible hack in order to make sure IndexSearcher will not // attempt to cache the DrillSidewaysQuery @@ -168,16 +212,16 @@ public class DrillSideways { } searcher.search(dsq, hitCollector); - return new DrillSidewaysResult(buildFacetsResult(drillDownCollector, drillSidewaysCollectors, drillDownDims.keySet().toArray(new String[drillDownDims.size()])), null); + return new DrillSidewaysResult(buildFacetsResult(drillDownCollector, drillSidewaysCollectors, + drillDownDims.keySet().toArray(new String[drillDownDims.size()])), null); } /** * Search, sorting by {@link Sort}, and computing * drill down and sideways counts. */ - public DrillSidewaysResult search(DrillDownQuery query, - Query filter, FieldDoc after, int topN, Sort sort, boolean doDocScores, - boolean doMaxScore) throws IOException { + public DrillSidewaysResult search(DrillDownQuery query, Query filter, FieldDoc after, int topN, Sort sort, + boolean doDocScores, boolean doMaxScore) throws IOException { if (filter != null) { query = new DrillDownQuery(config, filter, query); } @@ -186,15 +230,38 @@ public class DrillSideways { if (limit == 0) { limit = 1; // the collector does not alow numHits = 0 } - topN = Math.min(topN, limit); - final TopFieldCollector hitCollector = TopFieldCollector.create(sort, - topN, - after, - true, - doDocScores, - doMaxScore); - DrillSidewaysResult r = search(query, hitCollector); - return new DrillSidewaysResult(r.facets, hitCollector.topDocs()); + final int fTopN = Math.min(topN, limit); + + if (executor != null) { // We have an executor, let use the multi-threaded version + + final CollectorManager collectorManager = + new CollectorManager() { + + @Override + public TopFieldCollector newCollector() throws IOException { + return TopFieldCollector.create(sort, fTopN, after, true, doDocScores, doMaxScore); + } + + @Override + public TopDocs reduce(Collection collectors) throws IOException { + final TopDocs[] topDocs = new TopDocs[collectors.size()]; + int pos = 0; + for (TopFieldCollector collector : collectors) + topDocs[pos++] = collector.topDocs(); + return TopDocs.merge(topN, topDocs); + } + + }; + ConcurrentDrillSidewaysResult r = search(query, collectorManager); + return new DrillSidewaysResult(r.facets, r.collectorResult); + + } else { + + final TopFieldCollector hitCollector = + TopFieldCollector.create(sort, fTopN, after, true, doDocScores, doMaxScore); + DrillSidewaysResult r = search(query, hitCollector); + return new DrillSidewaysResult(r.facets, hitCollector.topDocs()); + } } else { return search(after, query, topN); } @@ -212,41 +279,184 @@ public class DrillSideways { * Search, sorting by score, and computing * drill down and sideways counts. */ - public DrillSidewaysResult search(ScoreDoc after, - DrillDownQuery query, int topN) throws IOException { + public DrillSidewaysResult search(ScoreDoc after, DrillDownQuery query, int topN) throws IOException { int limit = searcher.getIndexReader().maxDoc(); if (limit == 0) { limit = 1; // the collector does not alow numHits = 0 } - topN = Math.min(topN, limit); - TopScoreDocCollector hitCollector = TopScoreDocCollector.create(topN, after); - DrillSidewaysResult r = search(query, hitCollector); - return new DrillSidewaysResult(r.facets, hitCollector.topDocs()); + final int fTopN = Math.min(topN, limit); + + if (executor != null) { // We have an executor, let use the multi-threaded version + + final CollectorManager collectorManager = + new CollectorManager() { + + @Override + public TopScoreDocCollector newCollector() throws IOException { + return TopScoreDocCollector.create(fTopN, after); + } + + @Override + public TopDocs reduce(Collection collectors) throws IOException { + final TopDocs[] topDocs = new TopDocs[collectors.size()]; + int pos = 0; + for (TopScoreDocCollector collector : collectors) + topDocs[pos++] = collector.topDocs(); + return TopDocs.merge(topN, topDocs); + } + + }; + ConcurrentDrillSidewaysResult r = search(query, collectorManager); + return new DrillSidewaysResult(r.facets, r.collectorResult); + + } else { + + TopScoreDocCollector hitCollector = TopScoreDocCollector.create(topN, after); + DrillSidewaysResult r = search(query, hitCollector); + return new DrillSidewaysResult(r.facets, hitCollector.topDocs()); + } } - /** Override this and return true if your collector - * (e.g., {@code ToParentBlockJoinCollector}) expects all - * sub-scorers to be positioned on the document being - * collected. This will cause some performance loss; - * default is false. */ + /** + * Override this and return true if your collector + * (e.g., {@code ToParentBlockJoinCollector}) expects all + * sub-scorers to be positioned on the document being + * collected. This will cause some performance loss; + * default is false. + */ protected boolean scoreSubDocsAtOnce() { return false; } - /** Result of a drill sideways search, including the - * {@link Facets} and {@link TopDocs}. */ + /** + * Result of a drill sideways search, including the + * {@link Facets} and {@link TopDocs}. + */ public static class DrillSidewaysResult { - /** Combined drill down and sideways results. */ + /** + * Combined drill down and sideways results. + */ public final Facets facets; - /** Hits. */ + /** + * Hits. + */ public final TopDocs hits; - /** Sole constructor. */ + /** + * Sole constructor. + */ public DrillSidewaysResult(Facets facets, TopDocs hits) { this.facets = facets; this.hits = hits; } } + + private static class CallableCollector implements Callable { + + private final int pos; + private final IndexSearcher searcher; + private final Query query; + private final CollectorManager collectorManager; + + private CallableCollector(int pos, IndexSearcher searcher, Query query, CollectorManager collectorManager) { + this.pos = pos; + this.searcher = searcher; + this.query = query; + this.collectorManager = collectorManager; + } + + @Override + public CallableResult call() throws Exception { + return new CallableResult(pos, searcher.search(query, collectorManager)); + } + } + + private static class CallableResult { + + private final int pos; + private final Object result; + + private CallableResult(int pos, Object result) { + this.pos = pos; + this.result = result; + } + } + + private DrillDownQuery getDrillDownQuery(final DrillDownQuery query, Query[] queries, + final String excludedDimension) { + final DrillDownQuery ddl = new DrillDownQuery(config, query.getBaseQuery()); + query.getDims().forEach((dim, pos) -> { + if (!dim.equals(excludedDimension)) + ddl.add(dim, queries[pos]); + }); + return ddl.getDims().size() == queries.length ? null : ddl; + } + + /** Runs a search, using a {@link CollectorManager} to gather and merge search results */ + public ConcurrentDrillSidewaysResult search(final DrillDownQuery query, + final CollectorManager hitCollectorManager) throws IOException { + + final Map drillDownDims = query.getDims(); + final List callableCollectors = new ArrayList<>(drillDownDims.size() + 1); + + // Add the main DrillDownQuery + callableCollectors.add(new CallableCollector(-1, searcher, query, + new MultiCollectorManager(new FacetsCollectorManager(), hitCollectorManager))); + int i = 0; + final Query[] filters = query.getDrillDownQueries(); + for (String dim : drillDownDims.keySet()) + callableCollectors.add(new CallableCollector(i++, searcher, getDrillDownQuery(query, filters, dim), + new FacetsCollectorManager())); + + final FacetsCollector mainFacetsCollector; + final FacetsCollector[] facetsCollectors = new FacetsCollector[drillDownDims.size()]; + final R collectorResult; + + try { + // Run the query pool + final List> futures = executor.invokeAll(callableCollectors); + + // Extract the results + final Object[] mainResults = (Object[]) futures.get(0).get().result; + mainFacetsCollector = (FacetsCollector) mainResults[0]; + collectorResult = (R) mainResults[1]; + for (i = 1; i < futures.size(); i++) { + final CallableResult result = futures.get(i).get(); + facetsCollectors[result.pos] = (FacetsCollector) result.result; + } + // Fill the null results with the mainFacetsCollector + for (i = 0; i < facetsCollectors.length; i++) + if (facetsCollectors[i] == null) + facetsCollectors[i] = mainFacetsCollector; + + } catch (InterruptedException e) { + throw new ThreadInterruptedException(e); + } catch (ExecutionException e) { + throw new RuntimeException(e); + } + + // build the facets and return the result + return new ConcurrentDrillSidewaysResult<>(buildFacetsResult(mainFacetsCollector, facetsCollectors, + drillDownDims.keySet().toArray(new String[drillDownDims.size()])), null, collectorResult); + } + + /** + * Result of a concurrent drill sideways search, including the + * {@link Facets} and {@link TopDocs}. + */ + public static class ConcurrentDrillSidewaysResult extends DrillSidewaysResult { + + /** The merged search results */ + public final R collectorResult; + + /** + * Sole constructor. + */ + ConcurrentDrillSidewaysResult(Facets facets, TopDocs hits, R collectorResult) { + super(facets, hits); + this.collectorResult = collectorResult; + } + } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/FacetsCollectorManager.java b/lucene/facet/src/java/org/apache/lucene/facet/FacetsCollectorManager.java new file mode 100644 index 00000000000..652436d5211 --- /dev/null +++ b/lucene/facet/src/java/org/apache/lucene/facet/FacetsCollectorManager.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet; + +import org.apache.lucene.search.CollectorManager; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; + +/** + * A {@link CollectorManager} implementation which produce FacetsCollector and product a merged FacetsCollector. + * This is used for concurrent FacetsCollection. + */ +class FacetsCollectorManager implements CollectorManager { + + public final static FacetsCollector EMPTY = new FacetsCollector(); + + @Override + public FacetsCollector newCollector() throws IOException { + return new FacetsCollector(); + } + + @Override + public FacetsCollector reduce(Collection collectors) throws IOException { + if (collectors == null || collectors.size() == 0) + return EMPTY; + if (collectors.size() == 1) + return collectors.iterator().next(); + return new ReducedFacetsCollector(collectors); + } + + private static class ReducedFacetsCollector extends FacetsCollector { + + public ReducedFacetsCollector(final Collection facetsCollectors) { + final List matchingDocs = this.getMatchingDocs(); + facetsCollectors.forEach(facetsCollector -> matchingDocs.addAll(facetsCollector.getMatchingDocs())); + } + } + +} diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java index 39609562d21..ff3e2fc302f 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java @@ -16,16 +16,6 @@ */ package org.apache.lucene.facet; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -68,8 +58,61 @@ import org.apache.lucene.util.InPlaceMergeSorter; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.TestUtil; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + public class TestDrillSideways extends FacetTestCase { + protected DrillSideways getNewDrillSideways(IndexSearcher searcher, FacetsConfig config, + SortedSetDocValuesReaderState state) { + return new DrillSideways(searcher, config, state); + } + + protected DrillSideways getNewDrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader) { + return new DrillSideways(searcher, config, taxoReader); + } + + protected DrillSideways getNewDrillSidewaysScoreSubdocsAtOnce(IndexSearcher searcher, FacetsConfig config, + TaxonomyReader taxoReader) { + return new DrillSideways(searcher, config, taxoReader) { + @Override + protected boolean scoreSubDocsAtOnce() { + return true; + } + }; + } + + protected DrillSideways getNewDrillSidewaysBuildFacetsResult(IndexSearcher searcher, FacetsConfig config, + TaxonomyReader taxoReader) { + return new DrillSideways(searcher, config, taxoReader) { + @Override + protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, + String[] drillSidewaysDims) throws IOException { + Map drillSidewaysFacets = new HashMap<>(); + Facets drillDownFacets = getTaxonomyFacetCounts(taxoReader, config, drillDowns); + if (drillSideways != null) { + for (int i = 0; i < drillSideways.length; i++) { + drillSidewaysFacets.put(drillSidewaysDims[i], getTaxonomyFacetCounts(taxoReader, config, drillSideways[i])); + } + } + + if (drillSidewaysFacets.isEmpty()) { + return drillDownFacets; + } else { + return new MultiFacets(drillSidewaysFacets, drillDownFacets); + } + + } + }; + } + public void testBasic() throws Exception { Directory dir = newDirectory(); Directory taxoDir = newDirectory(); @@ -116,7 +159,7 @@ public class TestDrillSideways extends FacetTestCase { // NRT open TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); - DrillSideways ds = new DrillSideways(searcher, config, taxoReader); + DrillSideways ds = getNewDrillSideways(searcher, config, taxoReader); // case: drill-down on a single field; in this // case the drill-sideways + drill-down counts == @@ -127,12 +170,14 @@ public class TestDrillSideways extends FacetTestCase { assertEquals(2, r.hits.totalHits); // Publish Date is only drill-down, and Lisa published // one in 2012 and one in 2010: - assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString()); + assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", + r.facets.getTopChildren(10, "Publish Date").toString()); // Author is drill-sideways + drill-down: Lisa // (drill-down) published twice, and Frank/Susan/Bob // published once: - assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.facets.getTopChildren(10, "Author").toString()); + assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", + r.facets.getTopChildren(10, "Author").toString()); // Same simple case, but no baseQuery (pure browse): // drill-down on a single field; in this case the @@ -145,12 +190,14 @@ public class TestDrillSideways extends FacetTestCase { assertEquals(2, r.hits.totalHits); // Publish Date is only drill-down, and Lisa published // one in 2012 and one in 2010: - assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString()); + assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", + r.facets.getTopChildren(10, "Publish Date").toString()); // Author is drill-sideways + drill-down: Lisa // (drill-down) published twice, and Frank/Susan/Bob // published once: - assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.facets.getTopChildren(10, "Author").toString()); + assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", + r.facets.getTopChildren(10, "Author").toString()); // Another simple case: drill-down on single fields // but OR of two values @@ -161,17 +208,21 @@ public class TestDrillSideways extends FacetTestCase { assertEquals(3, r.hits.totalHits); // Publish Date is only drill-down: Lisa and Bob // (drill-down) published twice in 2010 and once in 2012: - assertEquals("dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString()); + assertEquals("dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", + r.facets.getTopChildren(10, "Publish Date").toString()); // Author is drill-sideways + drill-down: Lisa // (drill-down) published twice, and Frank/Susan/Bob // published once: - assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.facets.getTopChildren(10, "Author").toString()); + assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", + r.facets.getTopChildren(10, "Author").toString()); assertTrue(r.facets instanceof MultiFacets); List allResults = r.facets.getAllDims(10); assertEquals(2, allResults.size()); - assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", allResults.get(0).toString()); - assertEquals("dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", allResults.get(1).toString()); + assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", + allResults.get(0).toString()); + assertEquals("dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", + allResults.get(1).toString()); // More interesting case: drill-down on two fields ddq = new DrillDownQuery(config); @@ -181,10 +232,12 @@ public class TestDrillSideways extends FacetTestCase { assertEquals(1, r.hits.totalHits); // Publish Date is drill-sideways + drill-down: Lisa // (drill-down) published once in 2010 and once in 2012: - assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString()); + assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", + r.facets.getTopChildren(10, "Publish Date").toString()); // Author is drill-sideways + drill-down: // only Lisa & Bob published (once each) in 2010: - assertEquals("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString()); + assertEquals("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", + r.facets.getTopChildren(10, "Author").toString()); // Even more interesting case: drill down on two fields, // but one of them is OR @@ -198,10 +251,12 @@ public class TestDrillSideways extends FacetTestCase { assertEquals(2, r.hits.totalHits); // Publish Date is both drill-sideways + drill-down: // Lisa or Bob published twice in 2010 and once in 2012: - assertEquals("dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString()); + assertEquals("dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", + r.facets.getTopChildren(10, "Publish Date").toString()); // Author is drill-sideways + drill-down: // only Lisa & Bob published (once each) in 2010: - assertEquals("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString()); + assertEquals("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", + r.facets.getTopChildren(10, "Author").toString()); // Test drilling down on invalid field: ddq = new DrillDownQuery(config); @@ -219,11 +274,13 @@ public class TestDrillSideways extends FacetTestCase { assertEquals(2, r.hits.totalHits); // Publish Date is only drill-down, and Lisa published // one in 2012 and one in 2010: - assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString()); + assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", + r.facets.getTopChildren(10, "Publish Date").toString()); // Author is drill-sideways + drill-down: Lisa // (drill-down) published twice, and Frank/Susan/Bob // published once: - assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.facets.getTopChildren(10, "Author").toString()); + assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", + r.facets.getTopChildren(10, "Author").toString()); // LUCENE-4915: test drilling down on a dimension but // NOT facet counting it: @@ -234,7 +291,8 @@ public class TestDrillSideways extends FacetTestCase { assertEquals(2, r.hits.totalHits); // Publish Date is only drill-down, and Lisa published // one in 2012 and one in 2010: - assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString()); + assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", + r.facets.getTopChildren(10, "Publish Date").toString()); // Test main query gets null scorer: ddq = new DrillDownQuery(config, new TermQuery(new Term("foobar", "baz"))); @@ -288,16 +346,18 @@ public class TestDrillSideways extends FacetTestCase { DrillDownQuery ddq = new DrillDownQuery(config); ddq.add("Author", "Lisa"); - DrillSidewaysResult r = new DrillSideways(searcher, config, taxoReader).search(null, ddq, 10); + DrillSidewaysResult r = getNewDrillSideways(searcher, config, taxoReader).search(null, ddq, 10); assertEquals(1, r.hits.totalHits); // Publish Date is only drill-down, and Lisa published // one in 2012 and one in 2010: - assertEquals("dim=Publish Date path=[] value=1 childCount=1\n 2010 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString()); + assertEquals("dim=Publish Date path=[] value=1 childCount=1\n 2010 (1)\n", + r.facets.getTopChildren(10, "Publish Date").toString()); // Author is drill-sideways + drill-down: Lisa // (drill-down) published once, and Bob // published once: - assertEquals("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString()); + assertEquals("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", + r.facets.getTopChildren(10, "Author").toString()); writer.close(); IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir); @@ -349,11 +409,13 @@ public class TestDrillSideways extends FacetTestCase { DrillDownQuery ddq = new DrillDownQuery(config); ddq.add("dim", "a"); - DrillSidewaysResult r = new DrillSideways(searcher, config, taxoReader).search(null, ddq, 10); + DrillSidewaysResult r = getNewDrillSideways(searcher, config, taxoReader).search(null, ddq, 10); assertEquals(3, r.hits.totalHits); - assertEquals("dim=dim path=[] value=6 childCount=4\n a (3)\n b (1)\n c (1)\n d (1)\n", r.facets.getTopChildren(10, "dim").toString()); - assertEquals("dim=dim path=[a] value=3 childCount=3\n x (1)\n y (1)\n z (1)\n", r.facets.getTopChildren(10, "dim", "a").toString()); + assertEquals("dim=dim path=[] value=6 childCount=4\n a (3)\n b (1)\n c (1)\n d (1)\n", + r.facets.getTopChildren(10, "dim").toString()); + assertEquals("dim=dim path=[a] value=3 childCount=3\n x (1)\n y (1)\n z (1)\n", + r.facets.getTopChildren(10, "dim", "a").toString()); writer.close(); IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir); @@ -363,7 +425,8 @@ public class TestDrillSideways extends FacetTestCase { String id; String contentToken; - public Doc() {} + public Doc() { + } // -1 if the doc is missing this dim, else the index // -into the values for this dim: @@ -427,12 +490,14 @@ public class TestDrillSideways extends FacetTestCase { int numDocs = atLeast(3000); //int numDocs = 20; if (VERBOSE) { - System.out.println("numDims=" + numDims + " numDocs=" + numDocs + " aChance=" + aChance + " bChance=" + bChance + " cChance=" + cChance); + System.out.println( + "numDims=" + numDims + " numDocs=" + numDocs + " aChance=" + aChance + " bChance=" + bChance + " cChance=" + + cChance); } String[][] dimValues = new String[numDims][]; int valueCount = 2; - for(int dim=0;dim values = new HashSet<>(); while (values.size() < valueCount) { String s = TestUtil.randomRealisticUnicodeString(random()); @@ -446,19 +511,19 @@ public class TestDrillSideways extends FacetTestCase { } List docs = new ArrayList<>(); - for(int i=0;i lastDocID; - lastDocID = doc; - } + @Override + public void collect(int doc) { + assert doc > lastDocID; + lastDocID = doc; + } - @Override - protected void doSetNextReader(LeafReaderContext context) throws IOException { - lastDocID = -1; - } + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + lastDocID = -1; + } - @Override - public boolean needsScores() { - return false; - } - }); + @Override + public boolean needsScores() { + return false; + } + }); // Also separately verify that DS respects the // scoreSubDocsAtOnce method, to ensure that all @@ -728,12 +793,7 @@ public class TestDrillSideways extends FacetTestCase { // drill-down values, because in that case it's // easily possible for one of the DD terms to be on // a future docID: - new DrillSideways(s, config, tr) { - @Override - protected boolean scoreSubDocsAtOnce() { - return true; - } - }.search(ddq, new AssertingSubDocsAtOnceCollector()); + getNewDrillSidewaysScoreSubdocsAtOnce(s, config, tr).search(ddq, new AssertingSubDocsAtOnceCollector()); } TestFacetResult expected = slowDrillSidewaysSearch(s, docs, contentToken, drillDowns, dimValues, filter); @@ -741,36 +801,17 @@ public class TestDrillSideways extends FacetTestCase { Sort sort = new Sort(new SortField("id", SortField.Type.STRING)); DrillSideways ds; if (doUseDV) { - ds = new DrillSideways(s, config, sortedSetDVState); + ds = getNewDrillSideways(s, config, sortedSetDVState); } else { - ds = new DrillSideways(s, config, tr) { - @Override - protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException { - Map drillSidewaysFacets = new HashMap<>(); - Facets drillDownFacets = getTaxonomyFacetCounts(taxoReader, config, drillDowns); - if (drillSideways != null) { - for(int i=0;i scores = new HashMap<>(); - for(ScoreDoc sd : hits.scoreDocs) { + Map scores = new HashMap<>(); + for (ScoreDoc sd : hits.scoreDocs) { scores.put(s.doc(sd.doc).get("id"), sd.score); } if (VERBOSE) { @@ -781,14 +822,11 @@ public class TestDrillSideways extends FacetTestCase { // Make sure drill down doesn't change score: Query q = ddq; if (filter != null) { - q = new BooleanQuery.Builder() - .add(q, Occur.MUST) - .add(filter, Occur.FILTER) - .build(); + q = new BooleanQuery.Builder().add(q, Occur.MUST).add(filter, Occur.FILTER).build(); } TopDocs ddqHits = s.search(q, numDocs); assertEquals(expected.hits.size(), ddqHits.totalHits); - for(int i=0;i hits; int[][] counts; int[] uniqueCounts; - public TestFacetResult() {} + + public TestFacetResult() { + } } private int[] getTopNOrds(final int[] counts, final String[] values, int topN) { final int[] ids = new int[counts.length]; - for(int i=0;i docs, - String contentToken, String[][] drillDowns, - String[][] dimValues, Query onlyEven) throws Exception { + private TestFacetResult slowDrillSidewaysSearch(IndexSearcher s, List docs, String contentToken, + String[][] drillDowns, String[][] dimValues, Query onlyEven) throws Exception { int numDims = dimValues.length; List hits = new ArrayList<>(); Counters drillDownCounts = new Counters(dimValues); Counters[] drillSidewaysCounts = new Counters[dimValues.length]; - for(int dim=0;dim idToDocID = new HashMap<>(); - for(int i=0;i idToDocID = new HashMap<>(); + for (int i = 0; i < s.getIndexReader().maxDoc(); i++) { idToDocID.put(s.doc(i).get("id"), i); } @@ -982,36 +1022,35 @@ public class TestDrillSideways extends FacetTestCase { return res; } - void verifyEquals(String[][] dimValues, IndexSearcher s, TestFacetResult expected, - DrillSidewaysResult actual, Map scores, boolean isSortedSetDV) throws Exception { + void verifyEquals(String[][] dimValues, IndexSearcher s, TestFacetResult expected, DrillSidewaysResult actual, + Map scores, boolean isSortedSetDV) throws Exception { if (VERBOSE) { System.out.println(" verify totHits=" + expected.hits.size()); } assertEquals(expected.hits.size(), actual.hits.totalHits); assertEquals(expected.hits.size(), actual.hits.scoreDocs.length); - for(int i=0;i actualValues = new HashMap<>(); + Map actualValues = new HashMap<>(); if (fr != null) { - for(LabelAndValue labelValue : fr.labelValues) { + for (LabelAndValue labelValue : fr.labelValues) { actualValues.put(labelValue.label, labelValue.value.intValue()); if (VERBOSE) { System.out.println(" " + idx + ": " + new BytesRef(labelValue.label) + ": " + labelValue.value); @@ -1026,10 +1065,11 @@ public class TestDrillSideways extends FacetTestCase { if (VERBOSE) { idx = 0; System.out.println(" expected (sorted)"); - for(int i=0;i drillSidewaysFacets = new HashMap<>(); + Facets drillDownFacets = getTaxonomyFacetCounts(taxoReader, config, drillDowns); + if (drillSideways != null) { + for (int i = 0; i < drillSideways.length; i++) { + drillSidewaysFacets.put(drillSidewaysDims[i], getTaxonomyFacetCounts(taxoReader, config, drillSideways[i])); + } + } + + if (drillSidewaysFacets.isEmpty()) { + return drillDownFacets; + } else { + return new MultiFacets(drillSidewaysFacets, drillDownFacets); + } + + } + }; + } + +} From f6fb6941bb62f8d47d653b2ed187ffa0107cd5c5 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Tue, 3 Jan 2017 06:47:47 -0500 Subject: [PATCH 82/83] LUCENE-6664: be more robust to broken token stream offsets --- .../analysis/synonym/FlattenGraphFilter.java | 31 +++++++------------ 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FlattenGraphFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FlattenGraphFilter.java index 7ede190b61d..c1fa1f7cba1 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FlattenGraphFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FlattenGraphFilter.java @@ -17,22 +17,6 @@ package org.apache.lucene.analysis.synonym; -/** - * This filter "casts" token graphs down into a "flat" form, - * for indexing. This is an inherently lossy process: nodes (positions) - * along side paths are forcefully merged. - * - *

    In general this means the output graph will accept token sequences - * that the input graph did not accept, and will also fail to accept - * token sequences that the input graph did accept. - * - *

    This is only necessary at indexing time because Lucene cannot yet index - * an arbitrary token graph. At search time there are better options, e.g. - * the experimental TermAutomatonQuery in sandbox. - * - * @lucene.experimental - */ - import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -49,7 +33,12 @@ import org.apache.lucene.util.RollingBuffer; * Converts an incoming graph token stream, such as one from * {@link SynonymGraphFilter}, into a flat form so that * all nodes form a single linear chain with no side paths. Every - * path through the graph touches every node. + * path through the graph touches every node. This is necessary + * when indexing a graph token stream, because the index does not + * save {@link PositionLengthAttribute} and so it cannot + * preserve the graph structure. However, at search time, + * query parsers can correctly handle the graph and this token + * filter should not be used. * *

    If the graph was not already flat to start, this * is likely a lossy process, i.e. it will often cause the @@ -234,7 +223,11 @@ public final class FlattenGraphFilter extends TokenFilter { // which would otherwise happen if the replacement has more tokens // than the input: int startOffset = Math.max(lastStartOffset, output.startOffset); - offsetAtt.setOffset(startOffset, outputEndNode.endOffset); + + // We must do this in case the incoming tokens have broken offsets: + int endOffset = Math.max(startOffset, outputEndNode.endOffset); + + offsetAtt.setOffset(startOffset, endOffset); lastStartOffset = startOffset; if (inputNode.nextOut == inputNode.tokens.size()) { @@ -382,7 +375,7 @@ public final class FlattenGraphFilter extends TokenFilter { // NOTE, shady: don't call super.end, because we did already from incrementToken } - clearAttributes(); + clearAttributes(); if (done) { // On exc, done is false, and we will not have set these: posIncAtt.setPositionIncrement(finalPosInc); From f3306786a13bda596a60cada45406e6f6a88724f Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 30 Dec 2016 14:45:29 +0000 Subject: [PATCH 83/83] LUCENE-7612: Remove suggester dependency on misc --- dev-tools/idea/lucene/suggest/suggest.iml | 1 - lucene/CHANGES.txt | 3 +++ lucene/suggest/build.xml | 6 ++---- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dev-tools/idea/lucene/suggest/suggest.iml b/dev-tools/idea/lucene/suggest/suggest.iml index ef2b8edf9fb..576ea8bff9b 100644 --- a/dev-tools/idea/lucene/suggest/suggest.iml +++ b/dev-tools/idea/lucene/suggest/suggest.iml @@ -15,7 +15,6 @@ - diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 5b7a10c12f0..67d8ae5ccd7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -141,6 +141,9 @@ Bug Fixes * LUCENE-7606: Normalization with CustomAnalyzer would only apply the last token filter. (Adrien Grand) +* LUCENE-7612: Removed an unused dependency from the suggester to the misc + module. (Alan Woodward) + Improvements * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery, diff --git a/lucene/suggest/build.xml b/lucene/suggest/build.xml index 26b316b31f8..5babe064d18 100644 --- a/lucene/suggest/build.xml +++ b/lucene/suggest/build.xml @@ -30,22 +30,20 @@ - - - - +