diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d574a8a2861..5a6601b6048 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -107,6 +107,9 @@ Bug Fixes allTermsRequired is false and context filters are specified (Mike McCandless) +* LUCENE-7429: AnalyzerWrapper can now modify the normalization chain too and + DelegatingAnalyzerWrapper does the right thing automatically. (Adrien Grand) + Improvements * LUCENE-7439: FuzzyQuery now matches all terms within the specified diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java index b2de5e8b34b..466642c9f37 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java @@ -131,7 +131,7 @@ public final class CustomAnalyzer extends Analyzer { @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer tk = tokenizer.create(attributeFactory()); + final Tokenizer tk = tokenizer.create(attributeFactory(fieldName)); TokenStream ts = tk; for (final TokenFilterFactory filter : tokenFilters) { ts = filter.create(ts); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java index ea987315b07..4d0f03956f9 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java @@ -85,7 +85,7 @@ public final class CollationKeyAnalyzer extends Analyzer { } @Override - protected AttributeFactory attributeFactory() { + protected AttributeFactory attributeFactory(String fieldName) { return factory; } diff --git a/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java b/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java index aa4b42db6a0..3a5d41c999f 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java @@ -238,7 +238,7 @@ public abstract class Analyzer implements Closeable { throw new IllegalStateException("Normalization threw an unexpected exeption", e); } - final AttributeFactory attributeFactory = attributeFactory(); + final AttributeFactory attributeFactory = attributeFactory(fieldName); try (TokenStream ts = normalize(fieldName, new StringTokenStream(attributeFactory, filteredText, text.length()))) { final TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); @@ -286,9 +286,10 @@ public abstract class Analyzer implements Closeable { /** Return the {@link AttributeFactory} to be used for * {@link #tokenStream analysis} and - * {@link #normalize(String, String) normalization}. The default - * implementation returns {@link TokenStream#DEFAULT_TOKEN_ATTRIBUTE_FACTORY}. */ - protected AttributeFactory attributeFactory() { + * {@link #normalize(String, String) normalization} on the given + * {@code FieldName}. The default implementation returns + * {@link TokenStream#DEFAULT_TOKEN_ATTRIBUTE_FACTORY}. */ + protected AttributeFactory attributeFactory(String fieldName) { return TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY; } diff --git a/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java b/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java index 1e5640f71c0..d23d004d729 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java @@ -19,6 +19,8 @@ package org.apache.lucene.analysis; import java.io.Reader; +import org.apache.lucene.util.AttributeFactory; + /** * Extension to {@link Analyzer} suitable for Analyzers which wrap * other Analyzers. @@ -81,6 +83,22 @@ public abstract class AnalyzerWrapper extends Analyzer { return components; } + /** + * Wraps / alters the given TokenStream for normalization purposes, taken + * from the wrapped Analyzer, to form new components. It is through this + * method that new TokenFilters can be added by AnalyzerWrappers. By default, + * the given token stream are returned. + * + * @param fieldName + * Name of the field which is to be analyzed + * @param in + * TokenStream taken from the wrapped Analyzer + * @return Wrapped / altered TokenStreamComponents. + */ + protected TokenStream wrapTokenStreamForNormalization(String fieldName, TokenStream in) { + return in; + } + /** * Wraps / alters the given Reader. Through this method AnalyzerWrappers can * implement {@link #initReader(String, Reader)}. By default, the given reader @@ -95,12 +113,32 @@ public abstract class AnalyzerWrapper extends Analyzer { protected Reader wrapReader(String fieldName, Reader reader) { return reader; } - + + /** + * Wraps / alters the given Reader. Through this method AnalyzerWrappers can + * implement {@link #initReaderForNormalization(String, Reader)}. By default, + * the given reader is returned. + * + * @param fieldName + * name of the field which is to be analyzed + * @param reader + * the reader to wrap + * @return the wrapped reader + */ + protected Reader wrapReaderForNormalization(String fieldName, Reader reader) { + return reader; + } + @Override protected final TokenStreamComponents createComponents(String fieldName) { return wrapComponents(fieldName, getWrappedAnalyzer(fieldName).createComponents(fieldName)); } + @Override + protected final TokenStream normalize(String fieldName, TokenStream in) { + return wrapTokenStreamForNormalization(fieldName, getWrappedAnalyzer(fieldName).normalize(fieldName, in)); + } + @Override public int getPositionIncrementGap(String fieldName) { return getWrappedAnalyzer(fieldName).getPositionIncrementGap(fieldName); @@ -115,4 +153,14 @@ public abstract class AnalyzerWrapper extends Analyzer { public final Reader initReader(String fieldName, Reader reader) { return getWrappedAnalyzer(fieldName).initReader(fieldName, wrapReader(fieldName, reader)); } + + @Override + protected final Reader initReaderForNormalization(String fieldName, Reader reader) { + return getWrappedAnalyzer(fieldName).initReaderForNormalization(fieldName, wrapReaderForNormalization(fieldName, reader)); + } + + @Override + protected final AttributeFactory attributeFactory(String fieldName) { + return getWrappedAnalyzer(fieldName).attributeFactory(fieldName); + } } diff --git a/lucene/core/src/java/org/apache/lucene/analysis/DelegatingAnalyzerWrapper.java b/lucene/core/src/java/org/apache/lucene/analysis/DelegatingAnalyzerWrapper.java index 6f05d4d49d8..edf5b2b43ea 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/DelegatingAnalyzerWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/DelegatingAnalyzerWrapper.java @@ -54,12 +54,22 @@ public abstract class DelegatingAnalyzerWrapper extends AnalyzerWrapper { protected final TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { return super.wrapComponents(fieldName, components); } - + + @Override + protected final TokenStream wrapTokenStreamForNormalization(String fieldName, TokenStream in) { + return super.wrapTokenStreamForNormalization(fieldName, in); + } + @Override protected final Reader wrapReader(String fieldName, Reader reader) { return super.wrapReader(fieldName, reader); } - + + @Override + protected final Reader wrapReaderForNormalization(String fieldName, Reader reader) { + return super.wrapReaderForNormalization(fieldName, reader); + } + private static final class DelegatingReuseStrategy extends ReuseStrategy { DelegatingAnalyzerWrapper wrapper; private final ReuseStrategy fallbackStrategy; diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestDelegatingAnalyzerWrapper.java b/lucene/core/src/test/org/apache/lucene/analysis/TestDelegatingAnalyzerWrapper.java new file mode 100644 index 00000000000..1d6cf153aec --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/analysis/TestDelegatingAnalyzerWrapper.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis; + +import java.io.IOException; +import java.io.Reader; +import java.nio.charset.StandardCharsets; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; + +public class TestDelegatingAnalyzerWrapper extends LuceneTestCase { + + public void testDelegatesNormalization() { + Analyzer analyzer1 = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); + DelegatingAnalyzerWrapper w1 = new DelegatingAnalyzerWrapper(Analyzer.GLOBAL_REUSE_STRATEGY) { + @Override + protected Analyzer getWrappedAnalyzer(String fieldName) { + return analyzer1; + } + }; + assertEquals(new BytesRef("Ab C"), w1.normalize("foo", "Ab C")); + + Analyzer analyzer2 = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true); + DelegatingAnalyzerWrapper w2 = new DelegatingAnalyzerWrapper(Analyzer.GLOBAL_REUSE_STRATEGY) { + @Override + protected Analyzer getWrappedAnalyzer(String fieldName) { + return analyzer2; + } + }; + assertEquals(new BytesRef("ab c"), w2.normalize("foo", "Ab C")); + } + + public void testDelegatesAttributeFactory() throws Exception { + Analyzer analyzer1 = new MockBytesAnalyzer(); + DelegatingAnalyzerWrapper w1 = new DelegatingAnalyzerWrapper(Analyzer.GLOBAL_REUSE_STRATEGY) { + @Override + protected Analyzer getWrappedAnalyzer(String fieldName) { + return analyzer1; + } + }; + assertEquals(new BytesRef("Ab C".getBytes(StandardCharsets.UTF_16LE)), w1.normalize("foo", "Ab C")); + } + + public void testDelegatesCharFilter() throws Exception { + Analyzer analyzer1 = new Analyzer() { + @Override + protected Reader initReaderForNormalization(String fieldName, Reader reader) { + return new DummyCharFilter(reader, 'b', 'z'); + } + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new MockTokenizer(attributeFactory(fieldName)); + return new TokenStreamComponents(tokenizer); + } + }; + DelegatingAnalyzerWrapper w1 = new DelegatingAnalyzerWrapper(Analyzer.GLOBAL_REUSE_STRATEGY) { + @Override + protected Analyzer getWrappedAnalyzer(String fieldName) { + return analyzer1; + } + }; + assertEquals(new BytesRef("az c"), w1.normalize("foo", "ab c")); + } + + private static class DummyCharFilter extends CharFilter { + + private final char match, repl; + + public DummyCharFilter(Reader input, char match, char repl) { + super(input); + this.match = match; + this.repl = repl; + } + + @Override + protected int correct(int currentOff) { + return currentOff; + } + + @Override + public int read(char[] cbuf, int off, int len) throws IOException { + final int read = input.read(cbuf, off, len); + for (int i = 0; i < read; ++i) { + if (cbuf[off+i] == match) { + cbuf[off+i] = repl; + } + } + return read; + } + + } +} diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java index b8cfc5be1d6..4d51717a0d2 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java @@ -30,7 +30,7 @@ public final class MockBytesAnalyzer extends Analyzer { } @Override - protected AttributeFactory attributeFactory() { + protected AttributeFactory attributeFactory(String fieldName) { return MockUTF16TermAttributeImpl.UTF16_TERM_ATTRIBUTE_FACTORY; } } diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index ae1d709775a..b1daf1b3200 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -170,6 +170,9 @@ New Features * SOLR-9559: Add ExecutorStream to execute stored Streaming Expressions (Joel Bernstein) +* SOLR-1085: Add support for MoreLikeThis queries and responses in SolrJ client. + (Maurice Jumelet, Bill Mitchell, Cao Manh Dat via shalin) + Bug Fixes ---------------------- @@ -379,6 +382,10 @@ Other Changes * SOLR-9533: Reload core config when a core is reloaded (Gethin James, Joel Bernstein) +* SOLR-9371: Fix bin/solr calculations for start/stop wait time and RMI_PORT. + (Shawn Heisey via Erick Erickson) + + ================== 6.2.1 ================== Bug Fixes diff --git a/solr/bin/solr b/solr/bin/solr index 9d55e0a41f8..1d8edfa90f7 100755 --- a/solr/bin/solr +++ b/solr/bin/solr @@ -119,6 +119,9 @@ else JAVA=java fi +if [ -z "$SOLR_STOP_WAIT" ]; then + SOLR_STOP_WAIT=180 +fi # test that Java exists, is executable and correct version JAVA_VER=$("$JAVA" -version 2>&1) if [[ $? -ne 0 ]] ; then @@ -231,7 +234,7 @@ function print_usage() { echo "" echo " -p Specify the port to start the Solr HTTP listener on; default is 8983" echo " The specified port (SOLR_PORT) will also be used to determine the stop port" - echo " STOP_PORT=(\$SOLR_PORT-1000) and JMX RMI listen port RMI_PORT=(1\$SOLR_PORT). " + echo " STOP_PORT=(\$SOLR_PORT-1000) and JMX RMI listen port RMI_PORT=(\$SOLR_PORT+10000). " echo " For instance, if you set -p 8985, then the STOP_PORT=7985 and RMI_PORT=18985" echo "" echo " -d Specify the Solr server directory; defaults to server" @@ -575,9 +578,24 @@ function stop_solr() { SOLR_PID="$4" if [ "$SOLR_PID" != "" ]; then - echo -e "Sending stop command to Solr running on port $SOLR_PORT ... waiting 5 seconds to allow Jetty process $SOLR_PID to stop gracefully." + echo -e "Sending stop command to Solr running on port $SOLR_PORT ... waiting up to $SOLR_STOP_WAIT seconds to allow Jetty process $SOLR_PID to stop gracefully." "$JAVA" $SOLR_SSL_OPTS $AUTHC_OPTS -jar "$DIR/start.jar" "STOP.PORT=$STOP_PORT" "STOP.KEY=$STOP_KEY" --stop || true - (sleep 5) & + (loops=0 + while true + do + CHECK_PID=`ps auxww | awk '{print $2}' | grep -w $SOLR_PID | sort -r | tr -d ' '` + if [ "$CHECK_PID" != "" ]; then + slept=$((loops * 2)) + if [ $slept -lt $SOLR_STOP_WAIT ]; then + sleep 2 + loops=$[$loops+1] + else + exit # subshell! + fi + else + exit # subshell! + fi + done) & spinner $! rm -f "$SOLR_PID_DIR/solr-$SOLR_PORT.pid" else @@ -1459,7 +1477,11 @@ fi if [ "$ENABLE_REMOTE_JMX_OPTS" == "true" ]; then if [ -z "$RMI_PORT" ]; then - RMI_PORT="1$SOLR_PORT" + RMI_PORT=`expr $SOLR_PORT + 10000` + if [ $RMI_PORT -gt 65535 ]; then + echo -e "\nRMI_PORT is $RMI_PORT, which is invalid!\n" + exit 1 + fi fi REMOTE_JMX_OPTS=('-Dcom.sun.management.jmxremote' \ @@ -1620,18 +1642,19 @@ function launch_solr() { # no lsof on cygwin though if hash lsof 2>/dev/null ; then # hash returns true if lsof is on the path - echo -n "Waiting up to 30 seconds to see Solr running on port $SOLR_PORT" + echo -n "Waiting up to $SOLR_STOP_WAIT seconds to see Solr running on port $SOLR_PORT" # Launch in a subshell to show the spinner (loops=0 while true do running=`lsof -PniTCP:$SOLR_PORT -sTCP:LISTEN` if [ -z "$running" ]; then - if [ $loops -lt 6 ]; then - sleep 5 + slept=$((loops * 2)) + if [ $slept -lt $SOLR_STOP_WAIT ]; then + sleep 2 loops=$[$loops+1] else - echo -e "Still not seeing Solr listening on $SOLR_PORT after 30 seconds!" + echo -e "Still not seeing Solr listening on $SOLR_PORT after $SOLR_STOP_WAIT seconds!" tail -30 "$SOLR_LOGS_DIR/solr.log" exit # subshell! fi diff --git a/solr/bin/solr.in.sh b/solr/bin/solr.in.sh index 40c59a6f1b9..a84c474eeea 100644 --- a/solr/bin/solr.in.sh +++ b/solr/bin/solr.in.sh @@ -21,6 +21,12 @@ # affecting other Java applications on your server/workstation. #SOLR_JAVA_HOME="" +# This controls the number of seconds that the solr script will wait for +# Solr to stop gracefully or Solr to start. If the graceful stop fails, +# the script will forcibly stop Solr. If the start fails, the script will +# give up waiting and display the last few lines of the logfile. +#SOLR_STOP_WAIT="180" + # Increase Java Heap as needed to support your indexing / query needs #SOLR_HEAP="512m" diff --git a/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java b/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java index a5afbeccd8e..ab5458c4a72 100644 --- a/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java +++ b/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java @@ -99,7 +99,7 @@ public final class TokenizerChain extends SolrAnalyzer { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tk = tokenizer.create(attributeFactory()); + Tokenizer tk = tokenizer.create(attributeFactory(fieldName)); TokenStream ts = tk; for (TokenFilterFactory filter : filters) { ts = filter.create(ts); diff --git a/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java b/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java index 6ccdd128563..7cf6d3975b7 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java @@ -173,7 +173,7 @@ public class MoreLikeThisComponent extends SearchComponent { && rb.req.getParams().getBool(COMPONENT_NAME, false)) { Map tempResults = new LinkedHashMap<>(); - int mltcount = rb.req.getParams().getInt(MoreLikeThisParams.DOC_COUNT, 5); + int mltcount = rb.req.getParams().getInt(MoreLikeThisParams.DOC_COUNT, MoreLikeThisParams.DEFAULT_DOC_COUNT); String keyName = rb.req.getSchema().getUniqueKeyField().getName(); for (ShardRequest sreq : rb.finished) { diff --git a/solr/core/src/java/org/apache/solr/util/SolrCLI.java b/solr/core/src/java/org/apache/solr/util/SolrCLI.java index 375019095ed..1975d8e58fc 100644 --- a/solr/core/src/java/org/apache/solr/util/SolrCLI.java +++ b/solr/core/src/java/org/apache/solr/util/SolrCLI.java @@ -3262,7 +3262,7 @@ public class SolrCLI { .create("m"), OptionBuilder .withDescription("Timeout in ms for commands supporting a timeout") - .withLongOpt("ms") + .withLongOpt("timeout") .hasArg(true) .withType(Long.class) .withArgName("ms") diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java b/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java index 7eee7be1304..e6d3d69f05c 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java @@ -27,6 +27,7 @@ import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.params.HighlightParams; import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.MoreLikeThisParams; import org.apache.solr.common.params.StatsParams; import org.apache.solr.common.params.TermsParams; @@ -801,6 +802,253 @@ public class SolrQuery extends ModifiableSolrParams return this; } + + /** + * Add field for MoreLikeThis. Automatically + * enables MoreLikeThis. + * + * @param field the names of the field to be added + * @return this + */ + public SolrQuery addMoreLikeThisField(String field) { + this.setMoreLikeThis(true); + return addValueToParam(MoreLikeThisParams.SIMILARITY_FIELDS, field); + } + + public SolrQuery setMoreLikeThisFields(String... fields) { + if( fields == null || fields.length == 0 ) { + this.remove( MoreLikeThisParams.SIMILARITY_FIELDS ); + this.setMoreLikeThis(false); + return this; + } + + StringBuilder sb = new StringBuilder(); + sb.append(fields[0]); + for (int i = 1; i < fields.length; i++) { + sb.append(','); + sb.append(fields[i]); + } + this.set(MoreLikeThisParams.SIMILARITY_FIELDS, sb.toString()); + this.setMoreLikeThis(true); + return this; + } + + /** + * @return an array with the fields used to compute similarity. + */ + public String[] getMoreLikeThisFields() { + String fl = this.get(MoreLikeThisParams.SIMILARITY_FIELDS); + if(fl==null || fl.length()==0) { + return null; + } + return fl.split(","); + } + + /** + * Sets the frequency below which terms will be ignored in the source doc + * + * @param mintf the minimum term frequency + * @return this + */ + public SolrQuery setMoreLikeThisMinTermFreq(int mintf) { + this.set(MoreLikeThisParams.MIN_TERM_FREQ, mintf); + return this; + } + + /** + * Gets the frequency below which terms will be ignored in the source doc + */ + public int getMoreLikeThisMinTermFreq() { + return this.getInt(MoreLikeThisParams.MIN_TERM_FREQ, 2); + } + + /** + * Sets the frequency at which words will be ignored which do not occur in + * at least this many docs. + * + * @param mindf the minimum document frequency + * @return this + */ + public SolrQuery setMoreLikeThisMinDocFreq(int mindf) { + this.set(MoreLikeThisParams.MIN_DOC_FREQ, mindf); + return this; + } + + /** + * Gets the frequency at which words will be ignored which do not occur in + * at least this many docs. + */ + public int getMoreLikeThisMinDocFreq() { + return this.getInt(MoreLikeThisParams.MIN_DOC_FREQ, 5); + } + + /** + * Sets the minimum word length below which words will be ignored. + * + * @param minwl the minimum word length + * @return this + */ + public SolrQuery setMoreLikeThisMinWordLen(int minwl) { + this.set(MoreLikeThisParams.MIN_WORD_LEN, minwl); + return this; + } + + /** + * Gets the minimum word length below which words will be ignored. + */ + public int getMoreLikeThisMinWordLen() { + return this.getInt(MoreLikeThisParams.MIN_WORD_LEN, 0); + } + + /** + * Sets the maximum word length above which words will be ignored. + * + * @param maxwl the maximum word length + * @return this + */ + public SolrQuery setMoreLikeThisMaxWordLen(int maxwl) { + this.set(MoreLikeThisParams.MAX_WORD_LEN, maxwl); + return this; + } + + /** + * Gets the maximum word length above which words will be ignored. + */ + public int getMoreLikeThisMaxWordLen() { + return this.getInt(MoreLikeThisParams.MAX_WORD_LEN, 0); + } + + /** + * Sets the maximum number of query terms that will be included in any + * generated query. + * + * @param maxqt the maximum number of query terms + * @return this + */ + public SolrQuery setMoreLikeThisMaxQueryTerms(int maxqt) { + this.set(MoreLikeThisParams.MAX_QUERY_TERMS, maxqt); + return this; + } + + /** + * Gets the maximum number of query terms that will be included in any + * generated query. + */ + public int getMoreLikeThisMaxQueryTerms() { + return this.getInt(MoreLikeThisParams.MAX_QUERY_TERMS, 25); + } + + /** + * Sets the maximum number of tokens to parse in each example doc field + * that is not stored with TermVector support. + * + * @param maxntp the maximum number of tokens to parse + * @return this + */ + public SolrQuery setMoreLikeThisMaxTokensParsed(int maxntp) { + this.set(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED, maxntp); + return this; + } + + /** + * Gets the maximum number of tokens to parse in each example doc field + * that is not stored with TermVector support. + */ + public int getMoreLikeThisMaxTokensParsed() { + return this.getInt(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED, 5000); + } + + /** + * Sets if the query will be boosted by the interesting term relevance. + * + * @param b set to true to boost the query with the interesting term relevance + * @return this + */ + public SolrQuery setMoreLikeThisBoost(boolean b) { + this.set(MoreLikeThisParams.BOOST, b); + return this; + } + + /** + * Gets if the query will be boosted by the interesting term relevance. + */ + public boolean getMoreLikeThisBoost() { + return this.getBool(MoreLikeThisParams.BOOST, false); + } + + /** + * Sets the query fields and their boosts using the same format as that + * used in DisMaxQParserPlugin. These fields must also be added + * using {@link #addMoreLikeThisField(String)}. + * + * @param qf the query fields + * @return this + */ + public SolrQuery setMoreLikeThisQF(String qf) { + this.set(MoreLikeThisParams.QF, qf); + return this; + } + + /** + * Gets the query fields and their boosts. + */ + public String getMoreLikeThisQF() { + return this.get(MoreLikeThisParams.QF); + } + + /** + * Sets the number of similar documents to return for each result. + * + * @param count the number of similar documents to return for each result + * @return this + */ + public SolrQuery setMoreLikeThisCount(int count) { + this.set(MoreLikeThisParams.DOC_COUNT, count); + return this; + } + + /** + * Gets the number of similar documents to return for each result. + */ + public int getMoreLikeThisCount() { + return this.getInt(MoreLikeThisParams.DOC_COUNT, MoreLikeThisParams.DEFAULT_DOC_COUNT); + } + + /** + * Enable/Disable MoreLikeThis. After enabling MoreLikeThis, the fields + * used for computing similarity must be specified calling + * {@link #addMoreLikeThisField(String)}. + * + * @param b flag to indicate if MoreLikeThis should be enabled. if b==false + * removes all mlt.* parameters + * @return this + */ + public SolrQuery setMoreLikeThis(boolean b) { + if(b) { + this.set(MoreLikeThisParams.MLT, true); + } else { + this.remove(MoreLikeThisParams.MLT); + this.remove(MoreLikeThisParams.SIMILARITY_FIELDS); + this.remove(MoreLikeThisParams.MIN_TERM_FREQ); + this.remove(MoreLikeThisParams.MIN_DOC_FREQ); + this.remove(MoreLikeThisParams.MIN_WORD_LEN); + this.remove(MoreLikeThisParams.MAX_WORD_LEN); + this.remove(MoreLikeThisParams.MAX_QUERY_TERMS); + this.remove(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED); + this.remove(MoreLikeThisParams.BOOST); + this.remove(MoreLikeThisParams.QF); + this.remove(MoreLikeThisParams.DOC_COUNT); + } + return this; + } + + /** + * @return true if MoreLikeThis is enabled, false otherwise + */ + public boolean getMoreLikeThis() { + return this.getBool(MoreLikeThisParams.MLT, false); + } + public SolrQuery setFields(String ... fields) { if( fields == null || fields.length == 0 ) { this.remove( CommonParams.FL ); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java index debb079e9e5..eb595aaad1c 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java @@ -51,6 +51,7 @@ public class QueryResponse extends SolrResponseBase private Map> _suggestInfo = null; private NamedList _statsInfo = null; private NamedList> _termsInfo = null; + private NamedList _moreLikeThisInfo = null; private String _cursorMarkNext = null; // Grouping response @@ -168,6 +169,9 @@ public class QueryResponse extends SolrResponseBase _termsInfo = (NamedList>) res.getVal( i ); extractTermsInfo( _termsInfo ); } + else if ( "moreLikeThis".equals( n ) ) { + _moreLikeThisInfo = (NamedList) res.getVal( i ); + } else if ( CursorMarkParams.CURSOR_MARK_NEXT.equals( n ) ) { _cursorMarkNext = (String) res.getVal( i ); } @@ -547,6 +551,10 @@ public class QueryResponse extends SolrResponseBase public TermsResponse getTermsResponse() { return _termsResponse; } + + public NamedList getMoreLikeThis() { + return _moreLikeThisInfo; + } /** * See also: {@link #getLimitingFacets()} diff --git a/solr/solrj/src/java/org/apache/solr/common/params/MoreLikeThisParams.java b/solr/solrj/src/java/org/apache/solr/common/params/MoreLikeThisParams.java index b41cbfdddda..c898fdb8dae 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/MoreLikeThisParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/MoreLikeThisParams.java @@ -50,6 +50,9 @@ public interface MoreLikeThisParams // Do you want to include the original document in the results or not public final static String INTERESTING_TERMS = PREFIX + "interestingTerms"; // false,details,(list or true) + + // the default doc count + public final static int DEFAULT_DOC_COUNT = 5; public enum TermStyle { NONE, diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java index 0f91adfe162..88227ba03f5 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java @@ -1996,37 +1996,38 @@ abstract public class SolrExampleTests extends SolrExampleTestsBase // test with mlt.fl having comma separated values SolrQuery q = new SolrQuery("*:*"); q.setRows(20); - q.setParam("mlt", "true"); - q.setParam("mlt.mintf", "0"); - q.setParam("mlt.count", "2"); - q.setParam("mlt.fl", "x_s,y_s,z_s"); + q.setMoreLikeThisFields("x_s", "y_s", "z_s"); + q.setMoreLikeThisMinTermFreq(0); + q.setMoreLikeThisCount(2); QueryResponse response = client.query(q); assertEquals(20, response.getResults().getNumFound()); - NamedList moreLikeThis = (NamedList) response.getResponse().get("moreLikeThis"); + NamedList moreLikeThis = response.getMoreLikeThis(); assertNotNull("MoreLikeThis response should not have been null", moreLikeThis); for (int i=0; i<20; i++) { String id = "testMoreLikeThis" + i; - SolrDocumentList mltResp = (SolrDocumentList) moreLikeThis.get(id); + SolrDocumentList mltResp = moreLikeThis.get(id); assertNotNull("MoreLikeThis response for id=" + id + " should not be null", mltResp); assertTrue("MoreLikeThis response for id=" + id + " had numFound=0", mltResp.getNumFound() > 0); + assertTrue("MoreLikeThis response for id=" + id + " had not returned exactly 2 documents", mltResp.size() == 2); } // now test with multiple mlt.fl parameters q = new SolrQuery("*:*"); q.setRows(20); q.setParam("mlt", "true"); - q.setParam("mlt.mintf", "0"); - q.setParam("mlt.count", "2"); q.setParam("mlt.fl", "x_s", "y_s", "z_s"); + q.setMoreLikeThisMinTermFreq(0); + q.setMoreLikeThisCount(2); response = client.query(q); assertEquals(20, response.getResults().getNumFound()); - moreLikeThis = (NamedList) response.getResponse().get("moreLikeThis"); + moreLikeThis = response.getMoreLikeThis(); assertNotNull("MoreLikeThis response should not have been null", moreLikeThis); for (int i=0; i<20; i++) { String id = "testMoreLikeThis" + i; - SolrDocumentList mltResp = (SolrDocumentList) moreLikeThis.get(id); + SolrDocumentList mltResp = moreLikeThis.get(id); assertNotNull("MoreLikeThis response for id=" + id + " should not be null", mltResp); assertTrue("MoreLikeThis response for id=" + id + " had numFound=0", mltResp.getNumFound() > 0); + assertTrue("MoreLikeThis response for id=" + id + " had not returned exactly 2 documents", mltResp.size() == 2); } } diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java index 816a2cca18a..d27847f6803 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java @@ -431,4 +431,29 @@ public class SolrQueryTest extends LuceneTestCase { assertNull(solrQuery.getParams("f.field3.facet.interval.set")); } + + public void testMoreLikeThis() { + SolrQuery solrQuery = new SolrQuery(); + solrQuery.addMoreLikeThisField("mlt1"); + assertTrue(solrQuery.getMoreLikeThis()); + + solrQuery.addMoreLikeThisField("mlt2"); + solrQuery.addMoreLikeThisField("mlt3"); + solrQuery.addMoreLikeThisField("mlt4"); + assertEquals(4, solrQuery.getMoreLikeThisFields().length); + solrQuery.setMoreLikeThisFields(null); + assertTrue(null == solrQuery.getMoreLikeThisFields()); + assertFalse(solrQuery.getMoreLikeThis()); + + assertEquals(true, solrQuery.setMoreLikeThisBoost(true).getMoreLikeThisBoost()); + assertEquals("qf", solrQuery.setMoreLikeThisQF("qf").getMoreLikeThisQF()); + assertEquals(10, solrQuery.setMoreLikeThisMaxTokensParsed(10).getMoreLikeThisMaxTokensParsed()); + assertEquals(11, solrQuery.setMoreLikeThisMinTermFreq(11).getMoreLikeThisMinTermFreq()); + assertEquals(12, solrQuery.setMoreLikeThisMinDocFreq(12).getMoreLikeThisMinDocFreq()); + assertEquals(13, solrQuery.setMoreLikeThisMaxWordLen(13).getMoreLikeThisMaxWordLen()); + assertEquals(14, solrQuery.setMoreLikeThisMinWordLen(14).getMoreLikeThisMinWordLen()); + assertEquals(15, solrQuery.setMoreLikeThisMaxQueryTerms(15).getMoreLikeThisMaxQueryTerms()); + assertEquals(16, solrQuery.setMoreLikeThisCount(16).getMoreLikeThisCount()); + + } }