From 7dc025bdced2de84d6dcdc9e34589429e00fbcbd Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 12 Dec 2011 17:28:09 +0000 Subject: [PATCH 1/8] LUCENE-3642: fix invalid offsets from CharTokenizer, [Edge]NGramFilters, SmartChinese, add sanity check to BaseTokenStreamTestCase git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1213329 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 + .../analysis/BaseTokenStreamTestCase.java | 4 + .../analysis/ngram/EdgeNGramTokenFilter.java | 12 +- .../analysis/ngram/NGramTokenFilter.java | 12 +- .../lucene/analysis/util/CharTokenizer.java | 10 +- .../analysis/core/TestDuelingAnalyzers.java | 123 ++++++++++++++++++ .../ngram/EdgeNGramTokenFilterTest.java | 25 ++++ .../analysis/ngram/NGramTokenFilterTest.java | 25 ++++ .../analysis/util/TestCharTokenizers.java | 82 ++++++++++++ .../analysis/cn/smart/WordTokenFilter.java | 15 ++- .../cn/smart/TestSmartChineseAnalyzer.java | 23 ++++ 11 files changed, 329 insertions(+), 6 deletions(-) create mode 100644 modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 2d679c8c267..ee5f23aa6be 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -731,6 +731,10 @@ Bug fixes * LUCENE-3641: Fixed MultiReader to correctly propagate readerFinishedListeners to clones/reopened readers. (Uwe Schindler) +* LUCENE-3642: Fixed bugs in CharTokenizer, n-gram filters, and smart chinese + where they would create invalid offsets in some situations, leading to problems + in highlighting. (Max Beutel via Robert Muir) + Documentation * LUCENE-3597: Fixed incorrect grouping documentation. (Martijn van Groningen, Robert Muir) diff --git a/lucene/src/test-framework/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java b/lucene/src/test-framework/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java index 7f14aca29aa..e7fe8a2628a 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java +++ b/lucene/src/test-framework/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java @@ -135,6 +135,10 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { assertTrue("startOffset must be >= 0", offsetAtt.startOffset() >= 0); assertTrue("endOffset must be >= 0", offsetAtt.endOffset() >= 0); assertTrue("endOffset must be >= startOffset", offsetAtt.endOffset() >= offsetAtt.startOffset()); + if (finalOffset != null) { + assertTrue("startOffset must be <= finalOffset", offsetAtt.startOffset() <= finalOffset.intValue()); + assertTrue("endOffset must be <= finalOffset", offsetAtt.endOffset() <= finalOffset.intValue()); + } } if (posIncrAtt != null) { assertTrue("posIncrement must be >= 0", posIncrAtt.getPositionIncrement() >= 0); diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java index 55fa29b777c..f43a6d7d929 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java @@ -71,6 +71,8 @@ public final class EdgeNGramTokenFilter extends TokenFilter { private int curTermLength; private int curGramSize; private int tokStart; + private int tokEnd; // only used if the length changed before this filter + private boolean hasIllegalOffsets; // only if the length changed before this filter private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); @@ -126,6 +128,10 @@ public final class EdgeNGramTokenFilter extends TokenFilter { curTermLength = termAtt.length(); curGramSize = minGram; tokStart = offsetAtt.startOffset(); + tokEnd = offsetAtt.endOffset(); + // if length by start + end offsets doesn't match the term text then assume + // this is a synonym and don't adjust the offsets. + hasIllegalOffsets = (tokStart + curTermLength) != tokEnd; } } if (curGramSize <= maxGram) { @@ -135,7 +141,11 @@ public final class EdgeNGramTokenFilter extends TokenFilter { int start = side == Side.FRONT ? 0 : curTermLength - curGramSize; int end = start + curGramSize; clearAttributes(); - offsetAtt.setOffset(tokStart + start, tokStart + end); + if (hasIllegalOffsets) { + offsetAtt.setOffset(tokStart, tokEnd); + } else { + offsetAtt.setOffset(tokStart + start, tokStart + end); + } termAtt.copyBuffer(curTermBuffer, start, curGramSize); curGramSize++; return true; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java index c73208bf36b..ae8735035a2 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java @@ -38,6 +38,8 @@ public final class NGramTokenFilter extends TokenFilter { private int curGramSize; private int curPos; private int tokStart; + private int tokEnd; // only used if the length changed before this filter + private boolean hasIllegalOffsets; // only if the length changed before this filter private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); @@ -81,13 +83,21 @@ public final class NGramTokenFilter extends TokenFilter { curGramSize = minGram; curPos = 0; tokStart = offsetAtt.startOffset(); + tokEnd = offsetAtt.endOffset(); + // if length by start + end offsets doesn't match the term text then assume + // this is a synonym and don't adjust the offsets. + hasIllegalOffsets = (tokStart + curTermLength) != tokEnd; } } while (curGramSize <= maxGram) { while (curPos+curGramSize <= curTermLength) { // while there is input clearAttributes(); termAtt.copyBuffer(curTermBuffer, curPos, curGramSize); - offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize); + if (hasIllegalOffsets) { + offsetAtt.setOffset(tokStart, tokEnd); + } else { + offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize); + } curPos++; return true; } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java index 5d91a3a3fe1..f1899ff4842 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java @@ -144,6 +144,7 @@ public abstract class CharTokenizer extends Tokenizer { clearAttributes(); int length = 0; int start = -1; // this variable is always initialized + int end = -1; char[] buffer = termAtt.buffer(); while (true) { if (bufferIndex >= dataLen) { @@ -162,15 +163,18 @@ public abstract class CharTokenizer extends Tokenizer { } // use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based methods are gone final int c = charUtils.codePointAt(ioBuffer.getBuffer(), bufferIndex); - bufferIndex += Character.charCount(c); + final int charCount = Character.charCount(c); + bufferIndex += charCount; if (isTokenChar(c)) { // if it's a token char if (length == 0) { // start of token assert start == -1; - start = offset + bufferIndex - 1; + start = offset + bufferIndex - charCount; + end = start; } else if (length >= buffer.length-1) { // check if a supplementary could run out of bounds buffer = termAtt.resizeBuffer(2+length); // make sure a supplementary fits in the buffer } + end += charCount; length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized if (length >= MAX_WORD_LEN) // buffer overflow! make sure to check for >= surrogate pair could break == test break; @@ -180,7 +184,7 @@ public abstract class CharTokenizer extends Tokenizer { termAtt.setLength(length); assert start != -1; - offsetAtt.setOffset(correctOffset(start), finalOffset = correctOffset(start+length)); + offsetAtt.setOffset(correctOffset(start), finalOffset = correctOffset(end)); return true; } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java new file mode 100644 index 00000000000..8b5c691ef07 --- /dev/null +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java @@ -0,0 +1,123 @@ +package org.apache.lucene.analysis.core; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.BasicOperations; +import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import org.apache.lucene.util.automaton.State; +import org.apache.lucene.util.automaton.Transition; + +/** + * Compares MockTokenizer (which is simple with no optimizations) with equivalent + * core tokenizers (that have optimizations like buffering). + * + * Any tests here need to probably consider unicode version of the JRE (it could + * cause false fails). + */ +public class TestDuelingAnalyzers extends LuceneTestCase { + private CharacterRunAutomaton jvmLetter; + + public void setUp() throws Exception { + super.setUp(); + // build an automaton matching this jvm's letter definition + State initial = new State(); + State accept = new State(); + accept.setAccept(true); + for (int i = 0; i <= 0x10FFFF; i++) { + if (Character.isLetter(i)) { + initial.addTransition(new Transition(i, i, accept)); + } + } + Automaton single = new Automaton(initial); + single.reduce(); + Automaton repeat = BasicOperations.repeat(single); + jvmLetter = new CharacterRunAutomaton(repeat); + } + + public void testLetterAscii() throws Exception { + Analyzer left = new MockAnalyzer(random, jvmLetter, false); + Analyzer right = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(tokenizer, tokenizer); + } + }; + for (int i = 0; i < 10000; i++) { + String s = _TestUtil.randomSimpleString(random); + assertEquals(s, left.tokenStream("foo", new StringReader(s)), + right.tokenStream("foo", new StringReader(s))); + } + } + + public void testLetterUnicode() throws Exception { + Analyzer left = new MockAnalyzer(random, jvmLetter, false); + Analyzer right = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(tokenizer, tokenizer); + } + }; + for (int i = 0; i < 10000; i++) { + String s = _TestUtil.randomUnicodeString(random); + assertEquals(s, left.tokenStream("foo", new StringReader(s)), + right.tokenStream("foo", new StringReader(s))); + } + } + + // we only check a few core attributes here. + // TODO: test other things + public void assertEquals(String s, TokenStream left, TokenStream right) throws Exception { + left.reset(); + right.reset(); + CharTermAttribute leftTerm = left.addAttribute(CharTermAttribute.class); + CharTermAttribute rightTerm = right.addAttribute(CharTermAttribute.class); + OffsetAttribute leftOffset = left.addAttribute(OffsetAttribute.class); + OffsetAttribute rightOffset = right.addAttribute(OffsetAttribute.class); + PositionIncrementAttribute leftPos = left.addAttribute(PositionIncrementAttribute.class); + PositionIncrementAttribute rightPos = right.addAttribute(PositionIncrementAttribute.class); + + while (left.incrementToken()) { + assertTrue("wrong number of tokens for input: " + s, right.incrementToken()); + assertEquals("wrong term text for input: " + s, leftTerm.toString(), rightTerm.toString()); + assertEquals("wrong position for input: " + s, leftPos.getPositionIncrement(), rightPos.getPositionIncrement()); + assertEquals("wrong start offset for input: " + s, leftOffset.startOffset(), rightOffset.startOffset()); + assertEquals("wrong end offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset()); + }; + assertFalse("wrong number of tokens for input: " + s, right.incrementToken()); + left.end(); + right.end(); + assertEquals("wrong final offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset()); + left.close(); + right.close(); + } +} diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java index 6af99c91164..a88f23ce755 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java @@ -17,11 +17,16 @@ package org.apache.lucene.analysis.ngram; * limitations under the License. */ +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; +import java.io.Reader; import java.io.StringReader; /** @@ -104,4 +109,24 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase { tokenizer.reset(new StringReader("abcde")); assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3}); } + + // LUCENE-3642 + // EdgeNgram blindly adds term length to offset, but this can take things out of bounds + // wrt original text if a previous filter increases the length of the word (in this case æ -> ae) + // so in this case we behave like WDF, and preserve any modified offsets + public void testInvalidOffsets() throws Exception { + Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + TokenFilter filters = new ASCIIFoldingFilter(tokenizer); + filters = new EdgeNGramTokenFilter(filters, EdgeNGramTokenFilter.Side.FRONT, 2, 15); + return new TokenStreamComponents(tokenizer, filters); + } + }; + assertAnalyzesTo(analyzer, "mosfellsbær", + new String[] { "mo", "mos", "mosf", "mosfe", "mosfel", "mosfell", "mosfells", "mosfellsb", "mosfellsba", "mosfellsbae", "mosfellsbaer" }, + new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + new int[] { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 }); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java index ef5c970adb8..6b12775bb9e 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java @@ -17,11 +17,16 @@ package org.apache.lucene.analysis.ngram; * limitations under the License. */ +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; +import java.io.Reader; import java.io.StringReader; /** @@ -93,4 +98,24 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase { tokenizer.reset(new StringReader("abcde")); assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,1,2,3,4}, new int[]{1,2,3,4,5}); } + + // LUCENE-3642 + // EdgeNgram blindly adds term length to offset, but this can take things out of bounds + // wrt original text if a previous filter increases the length of the word (in this case æ -> ae) + // so in this case we behave like WDF, and preserve any modified offsets + public void testInvalidOffsets() throws Exception { + Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + TokenFilter filters = new ASCIIFoldingFilter(tokenizer); + filters = new NGramTokenFilter(filters, 2, 2); + return new TokenStreamComponents(tokenizer, filters); + } + }; + assertAnalyzesTo(analyzer, "mosfellsbær", + new String[] { "mo", "os", "sf", "fe", "el", "ll", "ls", "sb", "ba", "ae", "er" }, + new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + new int[] { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 }); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java index f129596df92..c8963a2fb39 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java @@ -18,11 +18,17 @@ package org.apache.lucene.analysis.util; */ import java.io.IOException; +import java.io.Reader; import java.io.StringReader; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.LetterTokenizer; import org.apache.lucene.analysis.core.LowerCaseTokenizer; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.util._TestUtil; /** @@ -94,4 +100,80 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString())); assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()}); } + + // LUCENE-3642: normalize SMP->BMP and check that offsets are correct + public void testCrossPlaneNormalization() throws IOException { + Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader) { + @Override + protected int normalize(int c) { + if (c > 0xffff) { + return 'δ'; + } else { + return c; + } + } + }; + return new TokenStreamComponents(tokenizer, tokenizer); + } + }; + int num = 10000 * RANDOM_MULTIPLIER; + for (int i = 0; i < num; i++) { + String s = _TestUtil.randomUnicodeString(random); + TokenStream ts = analyzer.tokenStream("foo", new StringReader(s)); + ts.reset(); + OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); + while (ts.incrementToken()) { + String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset()); + for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) { + cp = highlightedText.codePointAt(j); + assertTrue("non-letter:" + Integer.toHexString(cp), Character.isLetter(cp)); + } + } + ts.end(); + ts.close(); + } + // just for fun + checkRandomData(random, analyzer, num); + } + + // LUCENE-3642: normalize BMP->SMP and check that offsets are correct + public void testCrossPlaneNormalization2() throws IOException { + Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader) { + @Override + protected int normalize(int c) { + if (c <= 0xffff) { + return 0x1043C; + } else { + return c; + } + } + }; + return new TokenStreamComponents(tokenizer, tokenizer); + } + }; + int num = 10000 * RANDOM_MULTIPLIER; + for (int i = 0; i < num; i++) { + String s = _TestUtil.randomUnicodeString(random); + TokenStream ts = analyzer.tokenStream("foo", new StringReader(s)); + ts.reset(); + OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); + while (ts.incrementToken()) { + String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset()); + for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) { + cp = highlightedText.codePointAt(j); + assertTrue("non-letter:" + Integer.toHexString(cp), Character.isLetter(cp)); + } + } + ts.end(); + ts.close(); + } + // just for fun + checkRandomData(random, analyzer, num); + } } diff --git a/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java b/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java index 6f0ecea5dd3..fa871b2f6d0 100644 --- a/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java +++ b/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java @@ -43,6 +43,10 @@ public final class WordTokenFilter extends TokenFilter { private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + + private int tokStart; // only used if the length changed before this filter + private int tokEnd; // only used if the length changed before this filter + private boolean hasIllegalOffsets; // only if the length changed before this filter /** * Construct a new WordTokenizer. @@ -59,6 +63,11 @@ public final class WordTokenFilter extends TokenFilter { if (tokenIter == null || !tokenIter.hasNext()) { // there are no remaining tokens from the current sentence... are there more sentences? if (input.incrementToken()) { + tokStart = offsetAtt.startOffset(); + tokEnd = offsetAtt.endOffset(); + // if length by start + end offsets doesn't match the term text then assume + // this is a synonym and don't adjust the offsets. + hasIllegalOffsets = (tokStart + termAtt.length()) != tokEnd; // a new sentence is available: process it. tokenBuffer = wordSegmenter.segmentSentence(termAtt.toString(), offsetAtt.startOffset()); tokenIter = tokenBuffer.iterator(); @@ -77,7 +86,11 @@ public final class WordTokenFilter extends TokenFilter { // There are remaining tokens from the current sentence, return the next one. SegToken nextWord = tokenIter.next(); termAtt.copyBuffer(nextWord.charArray, 0, nextWord.charArray.length); - offsetAtt.setOffset(nextWord.startOffset, nextWord.endOffset); + if (hasIllegalOffsets) { + offsetAtt.setOffset(tokStart, tokEnd); + } else { + offsetAtt.setOffset(nextWord.startOffset, nextWord.endOffset); + } typeAtt.setType("word"); return true; } diff --git a/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java b/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java index 41644f349d7..78fe87f8691 100644 --- a/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java +++ b/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java @@ -17,11 +17,16 @@ package org.apache.lucene.analysis.cn.smart; +import java.io.Reader; import java.io.StringReader; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; import org.apache.lucene.util.Version; public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { @@ -196,6 +201,24 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { } } + // LUCENE-3642 + public void testInvalidOffset() throws Exception { + Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + TokenFilter filters = new ASCIIFoldingFilter(tokenizer); + filters = new WordTokenFilter(filters); + return new TokenStreamComponents(tokenizer, filters); + } + }; + + assertAnalyzesTo(analyzer, "mosfellsbær", + new String[] { "mosfellsbaer" }, + new int[] { 0 }, + new int[] { 11 }); + } + /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); From 4da73b1f9e087d88c07cddcb79bf8c88ab10ada4 Mon Sep 17 00:00:00 2001 From: "Chris M. Hostetter" Date: Mon, 12 Dec 2011 22:31:51 +0000 Subject: [PATCH 2/8] SOLR-2920: Refactor frequent conditional use of DefaultSolrParams and AppendedSolrParams into factory methods git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1213474 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 4 ++++ .../apache/solr/highlight/GapFragmenter.java | 5 +---- .../apache/solr/highlight/HtmlFormatter.java | 7 ++----- .../solr/highlight/RegexFragmenter.java | 8 +++----- .../solr/highlight/SimpleFragListBuilder.java | 10 +++++----- .../solr/highlight/SingleFragListBuilder.java | 10 +++++----- .../solr/highlight/SolrBoundaryScanner.java | 6 ++---- .../solr/highlight/SolrFragmentsBuilder.java | 11 ++++------ .../org/apache/solr/search/DisMaxQParser.java | 4 ++-- .../search/ExtendedDismaxQParserPlugin.java | 6 +----- .../org/apache/solr/util/SolrPluginUtils.java | 17 +++++----------- .../apache/solr/BasicFunctionalityTest.java | 6 ++---- .../solrj/impl/CommonsHttpSolrServer.java | 16 +++++---------- .../common/params/AppendedSolrParams.java | 5 +++++ .../solr/common/params/DefaultSolrParams.java | 5 +++++ .../apache/solr/common/params/SolrParams.java | 20 ++++++++++++++++++- .../solr/common/params/SolrParamTest.java | 5 +---- 17 files changed, 71 insertions(+), 74 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index d7826945327..4f460c5ae08 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -429,6 +429,10 @@ Other Changes ---------------------- * SOLR-2922: Upgrade commons-io and commons-lang to 2.1 and 2.6, respectively. (koji) +* SOLR-2920: Refactor frequent conditional use of DefaultSolrParams and + AppendedSolrParams into factory methods. + (David Smiley via hossman) + ================== 3.5.0 ================== New Features diff --git a/solr/core/src/java/org/apache/solr/highlight/GapFragmenter.java b/solr/core/src/java/org/apache/solr/highlight/GapFragmenter.java index 765316b864b..eaca4c818ec 100644 --- a/solr/core/src/java/org/apache/solr/highlight/GapFragmenter.java +++ b/solr/core/src/java/org/apache/solr/highlight/GapFragmenter.java @@ -22,7 +22,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.NullFragmenter; import org.apache.lucene.search.highlight.SimpleFragmenter; -import org.apache.solr.common.params.DefaultSolrParams; import org.apache.solr.common.params.HighlightParams; import org.apache.solr.common.params.SolrParams; @@ -31,9 +30,7 @@ public class GapFragmenter extends HighlightingPluginBase implements SolrFragmen public Fragmenter getFragmenter(String fieldName, SolrParams params ) { numRequests++; - if( defaults != null ) { - params = new DefaultSolrParams( params, defaults ); - } + params = SolrParams.wrapDefaults(params, defaults); int fragsize = params.getFieldInt( fieldName, HighlightParams.FRAGSIZE, 100 ); return (fragsize <= 0) ? new NullFragmenter() : new LuceneGapFragmenter(fragsize); diff --git a/solr/core/src/java/org/apache/solr/highlight/HtmlFormatter.java b/solr/core/src/java/org/apache/solr/highlight/HtmlFormatter.java index eda5ecf8229..c8eaa2287cd 100644 --- a/solr/core/src/java/org/apache/solr/highlight/HtmlFormatter.java +++ b/solr/core/src/java/org/apache/solr/highlight/HtmlFormatter.java @@ -18,7 +18,6 @@ package org.apache.solr.highlight; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; -import org.apache.solr.common.params.DefaultSolrParams; import org.apache.solr.common.params.HighlightParams; import org.apache.solr.common.params.SolrParams; @@ -30,10 +29,8 @@ public class HtmlFormatter extends HighlightingPluginBase implements SolrFormatt public Formatter getFormatter(String fieldName, SolrParams params ) { numRequests++; - if( defaults != null ) { - params = new DefaultSolrParams( params, defaults ); - } - + params = SolrParams.wrapDefaults(params, defaults); + return new SimpleHTMLFormatter( params.getFieldParam(fieldName, HighlightParams.SIMPLE_PRE, "" ), params.getFieldParam(fieldName, HighlightParams.SIMPLE_POST, "")); diff --git a/solr/core/src/java/org/apache/solr/highlight/RegexFragmenter.java b/solr/core/src/java/org/apache/solr/highlight/RegexFragmenter.java index a958d66a9ab..ff48f79fd63 100644 --- a/solr/core/src/java/org/apache/solr/highlight/RegexFragmenter.java +++ b/solr/core/src/java/org/apache/solr/highlight/RegexFragmenter.java @@ -26,7 +26,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.NullFragmenter; -import org.apache.solr.common.params.DefaultSolrParams; import org.apache.solr.common.params.HighlightParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; @@ -66,10 +65,9 @@ public class RegexFragmenter extends HighlightingPluginBase implements SolrFragm public Fragmenter getFragmenter(String fieldName, SolrParams params ) { - numRequests++; - if( defaults != null ) { - params = new DefaultSolrParams( params, defaults ); - } + numRequests++; + params = SolrParams.wrapDefaults(params, defaults); + int fragsize = params.getFieldInt( fieldName, HighlightParams.FRAGSIZE, LuceneRegexFragmenter.DEFAULT_FRAGMENT_SIZE ); int increment = params.getFieldInt( fieldName, HighlightParams.INCREMENT, LuceneRegexFragmenter.DEFAULT_INCREMENT_GAP ); float slop = params.getFieldFloat( fieldName, HighlightParams.SLOP, LuceneRegexFragmenter.DEFAULT_SLOP ); diff --git a/solr/core/src/java/org/apache/solr/highlight/SimpleFragListBuilder.java b/solr/core/src/java/org/apache/solr/highlight/SimpleFragListBuilder.java index 1547d0c3087..5de44128f21 100644 --- a/solr/core/src/java/org/apache/solr/highlight/SimpleFragListBuilder.java +++ b/solr/core/src/java/org/apache/solr/highlight/SimpleFragListBuilder.java @@ -18,18 +18,18 @@ package org.apache.solr.highlight; import org.apache.lucene.search.vectorhighlight.FragListBuilder; -import org.apache.solr.common.params.DefaultSolrParams; import org.apache.solr.common.params.SolrParams; public class SimpleFragListBuilder extends HighlightingPluginBase implements SolrFragListBuilder { public FragListBuilder getFragListBuilder(SolrParams params) { + // NOTE: This class (currently) makes no use of params + // If that ever changes, it should wrap them with defaults... + // params = SolrParams.wrapDefaults(params, defaults) + numRequests++; - if( defaults != null ) { - params = new DefaultSolrParams( params, defaults ); - } - + return new org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder(); } diff --git a/solr/core/src/java/org/apache/solr/highlight/SingleFragListBuilder.java b/solr/core/src/java/org/apache/solr/highlight/SingleFragListBuilder.java index 9bba3f4ecd3..b484398f3ec 100644 --- a/solr/core/src/java/org/apache/solr/highlight/SingleFragListBuilder.java +++ b/solr/core/src/java/org/apache/solr/highlight/SingleFragListBuilder.java @@ -18,18 +18,18 @@ package org.apache.solr.highlight; import org.apache.lucene.search.vectorhighlight.FragListBuilder; -import org.apache.solr.common.params.DefaultSolrParams; import org.apache.solr.common.params.SolrParams; public class SingleFragListBuilder extends HighlightingPluginBase implements SolrFragListBuilder { public FragListBuilder getFragListBuilder(SolrParams params) { + // NOTE: This class (currently) makes no use of params + // If that ever changes, it should wrap them with defaults... + // params = SolrParams.wrapDefaults(params, defaults) + numRequests++; - if( defaults != null ) { - params = new DefaultSolrParams( params, defaults ); - } - + return new org.apache.lucene.search.vectorhighlight.SingleFragListBuilder(); } diff --git a/solr/core/src/java/org/apache/solr/highlight/SolrBoundaryScanner.java b/solr/core/src/java/org/apache/solr/highlight/SolrBoundaryScanner.java index f7ccd165447..fd0aa015b13 100644 --- a/solr/core/src/java/org/apache/solr/highlight/SolrBoundaryScanner.java +++ b/solr/core/src/java/org/apache/solr/highlight/SolrBoundaryScanner.java @@ -18,7 +18,6 @@ package org.apache.solr.highlight; import org.apache.lucene.search.vectorhighlight.BoundaryScanner; -import org.apache.solr.common.params.DefaultSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.core.SolrInfoMBean; import org.apache.solr.util.plugin.NamedListInitializedPlugin; @@ -28,9 +27,8 @@ public abstract class SolrBoundaryScanner extends HighlightingPluginBase impleme public BoundaryScanner getBoundaryScanner(String fieldName, SolrParams params){ numRequests++; - if( defaults != null ) { - params = new DefaultSolrParams( params, defaults ); - } + params = SolrParams.wrapDefaults(params, defaults); + return get(fieldName, params); } diff --git a/solr/core/src/java/org/apache/solr/highlight/SolrFragmentsBuilder.java b/solr/core/src/java/org/apache/solr/highlight/SolrFragmentsBuilder.java index 2e63067329d..2a22b00bf5b 100644 --- a/solr/core/src/java/org/apache/solr/highlight/SolrFragmentsBuilder.java +++ b/solr/core/src/java/org/apache/solr/highlight/SolrFragmentsBuilder.java @@ -20,7 +20,6 @@ package org.apache.solr.highlight; import org.apache.lucene.search.vectorhighlight.BoundaryScanner; import org.apache.lucene.search.vectorhighlight.FragmentsBuilder; import org.apache.solr.common.SolrException; -import org.apache.solr.common.params.DefaultSolrParams; import org.apache.solr.common.params.HighlightParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.core.SolrInfoMBean; @@ -40,9 +39,8 @@ public abstract class SolrFragmentsBuilder extends HighlightingPluginBase */ public FragmentsBuilder getFragmentsBuilder(SolrParams params, BoundaryScanner bs) { numRequests++; - if( defaults != null ) { - params = new DefaultSolrParams( params, defaults ); - } + params = SolrParams.wrapDefaults(params, defaults); + return getFragmentsBuilder( params, getPreTags( params, null ), getPostTags( params, null ), bs ); } @@ -55,9 +53,8 @@ public abstract class SolrFragmentsBuilder extends HighlightingPluginBase } private String[] getTags( SolrParams params, String paramName, String fieldName, String def ){ - if( defaults != null ) { - params = new DefaultSolrParams( params, defaults ); - } + params = SolrParams.wrapDefaults(params, defaults); + String value = null; if( fieldName == null ) value = params.get( paramName, def ); diff --git a/solr/core/src/java/org/apache/solr/search/DisMaxQParser.java b/solr/core/src/java/org/apache/solr/search/DisMaxQParser.java index e64fa18e77d..527cd363e3c 100644 --- a/solr/core/src/java/org/apache/solr/search/DisMaxQParser.java +++ b/solr/core/src/java/org/apache/solr/search/DisMaxQParser.java @@ -23,7 +23,6 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.solr.schema.IndexSchema; import org.apache.solr.common.SolrException; -import org.apache.solr.common.params.DefaultSolrParams; import org.apache.solr.common.params.DisMaxParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; @@ -81,7 +80,8 @@ public class DisMaxQParser extends QParser { @Override public Query parse() throws ParseException { - SolrParams solrParams = localParams == null ? params : new DefaultSolrParams(localParams, params); + SolrParams solrParams = SolrParams.wrapDefaults(localParams, params); + queryFields = SolrPluginUtils.parseFieldBoosts(solrParams.getParams(DisMaxParams.QF)); if (0 == queryFields.size()) { queryFields.put(req.getSchema().getDefaultSearchFieldName(), 1.0f); diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java index d89be8cf53c..4893554e760 100755 --- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java @@ -31,8 +31,6 @@ import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.*; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.solr.common.params.DefaultSolrParams; import org.apache.solr.common.params.DisMaxParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; @@ -42,8 +40,6 @@ import org.apache.solr.util.SolrPluginUtils; import org.apache.solr.analysis.*; import java.util.*; -import java.io.Reader; -import java.io.IOException; /** * An advanced multi-field query parser. @@ -102,7 +98,7 @@ class ExtendedDismaxQParser extends QParser { SolrParams localParams = getLocalParams(); SolrParams params = getParams(); - SolrParams solrParams = localParams == null ? params : new DefaultSolrParams(localParams, params); + SolrParams solrParams = SolrParams.wrapDefaults(localParams, params); final String minShouldMatch = DisMaxQParser.parseMinShouldMatch(req.getSchema(), solrParams); diff --git a/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java b/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java index 8af5fd30931..70d9398d87e 100644 --- a/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java +++ b/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java @@ -25,10 +25,8 @@ import org.apache.lucene.search.BooleanClause.Occur; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrException; -import org.apache.solr.common.params.AppendedSolrParams; -import org.apache.solr.common.params.DefaultSolrParams; -import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.UpdateParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; @@ -81,15 +79,10 @@ public class SolrPluginUtils { SolrParams appends, SolrParams invariants) { SolrParams p = req.getParams(); - if (defaults != null) { - p = new DefaultSolrParams(p,defaults); - } - if (appends != null) { - p = new AppendedSolrParams(p,appends); - } - if (invariants != null) { - p = new DefaultSolrParams(invariants,p); - } + p = SolrParams.wrapDefaults(p, defaults); + p = SolrParams.wrapAppended(p, appends); + p = SolrParams.wrapDefaults(invariants, p); + req.setParams(p); } diff --git a/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java b/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java index f0a754c818c..8992f345069 100644 --- a/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java +++ b/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java @@ -34,9 +34,7 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LogMergePolicy; import org.apache.solr.common.SolrException; -import org.apache.solr.common.params.AppendedSolrParams; import org.apache.solr.common.params.CommonParams; -import org.apache.solr.common.params.DefaultSolrParams; import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; @@ -448,7 +446,7 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 { assertEquals(p.getInt("iii",5), 5); assertEquals(p.getFieldParam("field1","i"), "555"); - req.setParams(new DefaultSolrParams(p, new MapSolrParams(m))); + req.setParams(SolrParams.wrapDefaults(p, new MapSolrParams(m))); p = req.getParams(); assertEquals(req.getOriginalParams().get("s"), "bbb"); assertEquals(p.get("i"), "555"); @@ -470,7 +468,7 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 { more.add("s", "ccc"); more.add("ss","YYY"); more.add("xx","XXX"); - p = new AppendedSolrParams(p, SolrParams.toSolrParams(more)); + p = SolrParams.wrapAppended(p, SolrParams.toSolrParams(more)); assertEquals(3, p.getParams("s").length); assertEquals("bbb", p.getParams("s")[0]); assertEquals("aaa", p.getParams("s")[1]); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CommonsHttpSolrServer.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CommonsHttpSolrServer.java index 30d6ab5cf57..36c806744b7 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CommonsHttpSolrServer.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CommonsHttpSolrServer.java @@ -47,7 +47,9 @@ import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.common.params.*; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.ContentStream; import org.apache.solr.common.util.NamedList; import org.slf4j.Logger; @@ -266,16 +268,8 @@ public class CommonsHttpSolrServer extends SolrServer ModifiableSolrParams wparams = new ModifiableSolrParams(); wparams.set( CommonParams.WT, parser.getWriterType() ); wparams.set( CommonParams.VERSION, parser.getVersion()); - if( params == null ) { - params = wparams; - } - else { - params = new DefaultSolrParams( wparams, params ); - } - - if( _invariantParams != null ) { - params = new DefaultSolrParams( _invariantParams, params ); - } + params = SolrParams.wrapDefaults(wparams, params); + params = SolrParams.wrapDefaults(_invariantParams, params); int tries = _maxRetries + 1; try { diff --git a/solr/solrj/src/java/org/apache/solr/common/params/AppendedSolrParams.java b/solr/solrj/src/java/org/apache/solr/common/params/AppendedSolrParams.java index f0d5ed53308..a435b0dcbe5 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/AppendedSolrParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/AppendedSolrParams.java @@ -23,6 +23,11 @@ package org.apache.solr.common.params; * that all of the values are returned. */ public class AppendedSolrParams extends DefaultSolrParams { + + /** + * @deprecated (3.6) Use {@link SolrParams#wrapAppended(SolrParams, SolrParams)} instead. + */ + @Deprecated public AppendedSolrParams(SolrParams main, SolrParams extra) { super(main, extra); } diff --git a/solr/solrj/src/java/org/apache/solr/common/params/DefaultSolrParams.java b/solr/solrj/src/java/org/apache/solr/common/params/DefaultSolrParams.java index edfad64bbe3..6ddc07fd98d 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/DefaultSolrParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/DefaultSolrParams.java @@ -28,7 +28,12 @@ public class DefaultSolrParams extends SolrParams { protected final SolrParams params; protected final SolrParams defaults; + /** + * @deprecated (3.6) Use {@link SolrParams#wrapDefaults(SolrParams, SolrParams)} instead. + */ + @Deprecated public DefaultSolrParams(SolrParams params, SolrParams defaults) { + assert params != null && defaults != null; this.params = params; this.defaults = defaults; } diff --git a/solr/solrj/src/java/org/apache/solr/common/params/SolrParams.java b/solr/solrj/src/java/org/apache/solr/common/params/SolrParams.java index ae7b291cf09..f8b9ea9c270 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/SolrParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/SolrParams.java @@ -32,7 +32,7 @@ import org.apache.solr.common.util.StrUtils; * */ public abstract class SolrParams implements Serializable { - + /** returns the String value of a param, or null if not set */ public abstract String get(String param); @@ -249,6 +249,24 @@ public abstract class SolrParams implements Serializable { } } + @SuppressWarnings({"deprecation"}) + public static SolrParams wrapDefaults(SolrParams params, SolrParams defaults) { + if (params == null) + return defaults; + if (defaults == null) + return params; + return new DefaultSolrParams(params,defaults); + } + + @SuppressWarnings({"deprecation"}) + public static SolrParams wrapAppended(SolrParams params, SolrParams defaults) { + if (params == null) + return defaults; + if (defaults == null) + return params; + return new AppendedSolrParams(params,defaults); + } + /** Create a Map from a NamedList given no keys are repeated */ public static Map toMap(NamedList params) { HashMap map = new HashMap(); diff --git a/solr/solrj/src/test/org/apache/solr/common/params/SolrParamTest.java b/solr/solrj/src/test/org/apache/solr/common/params/SolrParamTest.java index b1b399c86d9..823fbbbbdca 100755 --- a/solr/solrj/src/test/org/apache/solr/common/params/SolrParamTest.java +++ b/solr/solrj/src/test/org/apache/solr/common/params/SolrParamTest.java @@ -22,9 +22,6 @@ import java.util.Map; import org.apache.lucene.util.LuceneTestCase; import org.apache.solr.common.SolrException; -import org.apache.solr.common.params.DefaultSolrParams; -import org.apache.solr.common.params.MapSolrParams; -import org.apache.solr.common.params.SolrParams; /** */ @@ -158,7 +155,7 @@ public class SolrParamTest extends LuceneTestCase dmap.put( "dint" , "123" ); // these are defined in params dmap.put( "int" , "456" ); - SolrParams defaults = new DefaultSolrParams( params, new MapSolrParams( dmap ) ); + SolrParams defaults = SolrParams.wrapDefaults(params, new MapSolrParams(dmap)); // in params, not in default assertEquals( pstr , defaults.get( "str" ) ); From 7c0069cbb727fe4a578d988ca22df3c18e24247d Mon Sep 17 00:00:00 2001 From: Grant Ingersoll Date: Tue, 13 Dec 2011 14:21:30 +0000 Subject: [PATCH 3/8] SOLR-1730: check the cause exception git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1213704 13f79535-47bb-0310-9956-ffa450edef68 --- .../solr/handler/component/BadComponentTest.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/handler/component/BadComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/BadComponentTest.java index 9471162ebf1..feffca0aa6a 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/BadComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/BadComponentTest.java @@ -33,9 +33,14 @@ public class BadComponentTest extends SolrTestCaseJ4{ System.setProperty("elevate.file", "foo.xml"); initCore("solrconfig-elevate.xml", "schema12.xml"); assertTrue(false); - } catch (Throwable e) { - log.error("Exception", e); - assertTrue(true); + } catch (RuntimeException e) { + //TODO: better way of checking this? + if (e.getCause() instanceof SolrException && e.getCause().getCause().getMessage().equals("Error initializing QueryElevationComponent.")){ + log.error("Exception", e); + assertTrue(true); + } else { + assertTrue(false); + } } finally { System.clearProperty("elevate.file"); } From d66ae611930b061dfd26f1944c92e60e2d07e3bf Mon Sep 17 00:00:00 2001 From: Grant Ingersoll Date: Tue, 13 Dec 2011 14:21:41 +0000 Subject: [PATCH 4/8] SOLR-1730: fix logging in exception git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1213706 13f79535-47bb-0310-9956-ffa450edef68 --- solr/core/src/java/org/apache/solr/core/SolrCore.java | 1 - .../org/apache/solr/handler/component/BadComponentTest.java | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java index b8d502504ab..812e8ef956d 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java @@ -613,7 +613,6 @@ public final class SolrCore implements SolrInfoMBean { resourceLoader.inform( resourceLoader ); resourceLoader.inform( this ); // last call before the latch is released. } catch (Throwable e) { - log.error("Error in constructing the core", e); latch.countDown();//release the latch, otherwise we block trying to do the close. This should be fine, since counting down on a latch of 0 is still fine //close down the searcher and any other resources, if it exists, as this is not recoverable close(); diff --git a/solr/core/src/test/org/apache/solr/handler/component/BadComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/BadComponentTest.java index feffca0aa6a..ae94d3cd6dc 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/BadComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/BadComponentTest.java @@ -30,19 +30,21 @@ public class BadComponentTest extends SolrTestCaseJ4{ @Test public void testBadElevate() throws Exception { try { + ignoreException(".*constructing.*"); + ignoreException(".*QueryElevationComponent.*"); System.setProperty("elevate.file", "foo.xml"); initCore("solrconfig-elevate.xml", "schema12.xml"); assertTrue(false); } catch (RuntimeException e) { //TODO: better way of checking this? - if (e.getCause() instanceof SolrException && e.getCause().getCause().getMessage().equals("Error initializing QueryElevationComponent.")){ - log.error("Exception", e); + if (e.getCause() instanceof SolrException){ assertTrue(true); } else { assertTrue(false); } } finally { System.clearProperty("elevate.file"); + resetExceptionIgnores(); } } } From 0d86474d3c59396d27add2263b742e988ad9ddca Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Tue, 13 Dec 2011 16:32:24 +0000 Subject: [PATCH 5/8] LUCENE-3643: FilteredQuery and IndexSearcher.search(Query, Filter,...) now optimize the special case "query instanceof MatchAllDocsQuery" to execute as ConstantScoreQuery git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1213771 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 + .../apache/lucene/search/FilteredQuery.java | 63 +++++++++----- .../org/apache/lucene/search/QueryUtils.java | 4 +- .../lucene/search/TestFilteredQuery.java | 83 ++++++++++++++++++- 4 files changed, 125 insertions(+), 29 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index ee5f23aa6be..5c8727fcbea 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -638,6 +638,10 @@ Optimizations boolean clauses are required and instances of TermQuery. (Simon Willnauer, Robert Muir) +* LUCENE-3643: FilteredQuery and IndexSearcher.search(Query, Filter,...) + now optimize the special case query instanceof MatchAllDocsQuery to + execute as ConstantScoreQuery. (Uwe Schindler) + Bug fixes * LUCENE-2803: The FieldCache can miss values if an entry for a reader diff --git a/lucene/src/java/org/apache/lucene/search/FilteredQuery.java b/lucene/src/java/org/apache/lucene/search/FilteredQuery.java index fc3a7a78378..87624d04942 100644 --- a/lucene/src/java/org/apache/lucene/search/FilteredQuery.java +++ b/lucene/src/java/org/apache/lucene/search/FilteredQuery.java @@ -33,25 +33,23 @@ import java.util.Set; *

Note: the bits are retrieved from the filter each time this * query is used in a search - use a CachingWrapperFilter to avoid * regenerating the bits every time. - * - *

Created: Apr 20, 2004 8:58:29 AM - * * @since 1.4 * @see CachingWrapperFilter */ -public class FilteredQuery -extends Query { +public class FilteredQuery extends Query { - Query query; - Filter filter; + private final Query query; + private final Filter filter; /** * Constructs a new query which applies a filter to the results of the original query. - * Filter.getDocIdSet() will be called every time this query is used in a search. + * {@link Filter#getDocIdSet} will be called every time this query is used in a search. * @param query Query to be filtered, cannot be null. * @param filter Filter to apply to query results, cannot be null. */ public FilteredQuery (Query query, Filter filter) { + if (query == null || filter == null) + throw new IllegalArgumentException("Query and filter cannot be null."); this.query = query; this.filter = filter; } @@ -229,31 +227,45 @@ extends Query { }; } - /** Rewrites the wrapped query. */ + /** Rewrites the query. If the wrapped is an instance of + * {@link MatchAllDocsQuery} it returns a {@link ConstantScoreQuery}. Otherwise + * it returns a new {@code FilteredQuery} wrapping the rewritten query. */ @Override public Query rewrite(IndexReader reader) throws IOException { - Query rewritten = query.rewrite(reader); - if (rewritten != query) { - FilteredQuery clone = (FilteredQuery)this.clone(); - clone.query = rewritten; - return clone; + final Query queryRewritten = query.rewrite(reader); + + if (queryRewritten instanceof MatchAllDocsQuery) { + // Special case: If the query is a MatchAllDocsQuery, we only + // return a CSQ(filter). + final Query rewritten = new ConstantScoreQuery(filter); + // Combine boost of MatchAllDocsQuery and the wrapped rewritten query: + rewritten.setBoost(this.getBoost() * queryRewritten.getBoost()); + return rewritten; + } + + if (queryRewritten != query) { + // rewrite to a new FilteredQuery wrapping the rewritten query + final Query rewritten = new FilteredQuery(queryRewritten, filter); + rewritten.setBoost(this.getBoost()); + return rewritten; } else { + // nothing to rewrite, we are done! return this; } } - public Query getQuery() { + public final Query getQuery() { return query; } - public Filter getFilter() { + public final Filter getFilter() { return filter; } // inherit javadoc @Override public void extractTerms(Set terms) { - getQuery().extractTerms(terms); + getQuery().extractTerms(terms); } /** Prints a user-readable version of this query. */ @@ -271,16 +283,21 @@ extends Query { /** Returns true iff o is equal to this. */ @Override public boolean equals(Object o) { - if (o instanceof FilteredQuery) { - FilteredQuery fq = (FilteredQuery) o; - return (query.equals(fq.query) && filter.equals(fq.filter) && getBoost()==fq.getBoost()); - } - return false; + if (o == this) + return true; + if (!super.equals(o)) + return false; + assert o instanceof FilteredQuery; + final FilteredQuery fq = (FilteredQuery) o; + return fq.query.equals(this.query) && fq.filter.equals(this.filter); } /** Returns a hash code value for this object. */ @Override public int hashCode() { - return query.hashCode() ^ filter.hashCode() + Float.floatToRawIntBits(getBoost()); + int hash = super.hashCode(); + hash = hash * 31 + query.hashCode(); + hash = hash * 31 + filter.hashCode(); + return hash; } } diff --git a/lucene/src/test-framework/java/org/apache/lucene/search/QueryUtils.java b/lucene/src/test-framework/java/org/apache/lucene/search/QueryUtils.java index 5c489f29888..187700cd6f9 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/search/QueryUtils.java +++ b/lucene/src/test-framework/java/org/apache/lucene/search/QueryUtils.java @@ -79,8 +79,8 @@ public class QueryUtils { } public static void checkUnequal(Query q1, Query q2) { - Assert.assertTrue(!q1.equals(q2)); - Assert.assertTrue(!q2.equals(q1)); + Assert.assertFalse(q1 + " equal to " + q2, q1.equals(q2)); + Assert.assertFalse(q2 + " equal to " + q1, q2.equals(q1)); // possible this test can fail on a hash collision... if that // happens, please change test to use a different example. diff --git a/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java b/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java index a8ef3f3f41d..004a9601346 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java @@ -132,6 +132,11 @@ public class TestFilteredQuery extends LuceneTestCase { assertEquals (2, hits.length); QueryUtils.check(random, filteredquery,searcher); + filteredquery = new FilteredQueryRA(new MatchAllDocsQuery(), filter, useRandomAccess); + hits = searcher.search (filteredquery, null, 1000).scoreDocs; + assertEquals (2, hits.length); + QueryUtils.check(random, filteredquery,searcher); + filteredquery = new FilteredQueryRA(new TermQuery (new Term ("field", "x")), filter, useRandomAccess); hits = searcher.search (filteredquery, null, 1000).scoreDocs; assertEquals (1, hits.length); @@ -220,9 +225,9 @@ public class TestFilteredQuery extends LuceneTestCase { private void tBooleanMUST(final boolean useRandomAccess) throws Exception { BooleanQuery bq = new BooleanQuery(); - Query query = new FilteredQueryRA(new MatchAllDocsQuery(), new SingleDocTestFilter(0), useRandomAccess); + Query query = new FilteredQueryRA(new TermQuery(new Term("field", "one")), new SingleDocTestFilter(0), useRandomAccess); bq.add(query, BooleanClause.Occur.MUST); - query = new FilteredQueryRA(new MatchAllDocsQuery(), new SingleDocTestFilter(1), useRandomAccess); + query = new FilteredQueryRA(new TermQuery(new Term("field", "one")), new SingleDocTestFilter(1), useRandomAccess); bq.add(query, BooleanClause.Occur.MUST); ScoreDoc[] hits = searcher.search(bq, null, 1000).scoreDocs; assertEquals(0, hits.length); @@ -238,9 +243,9 @@ public class TestFilteredQuery extends LuceneTestCase { private void tBooleanSHOULD(final boolean useRandomAccess) throws Exception { BooleanQuery bq = new BooleanQuery(); - Query query = new FilteredQueryRA(new MatchAllDocsQuery(), new SingleDocTestFilter(0), useRandomAccess); + Query query = new FilteredQueryRA(new TermQuery(new Term("field", "one")), new SingleDocTestFilter(0), useRandomAccess); bq.add(query, BooleanClause.Occur.SHOULD); - query = new FilteredQueryRA(new MatchAllDocsQuery(), new SingleDocTestFilter(1), useRandomAccess); + query = new FilteredQueryRA(new TermQuery(new Term("field", "one")), new SingleDocTestFilter(1), useRandomAccess); bq.add(query, BooleanClause.Occur.SHOULD); ScoreDoc[] hits = searcher.search(bq, null, 1000).scoreDocs; assertEquals(2, hits.length); @@ -288,6 +293,76 @@ public class TestFilteredQuery extends LuceneTestCase { assertEquals(1, hits.length); QueryUtils.check(random, query, searcher); } + + public void testEqualsHashcode() throws Exception { + // some tests before, if the used queries and filters work: + assertEquals(new PrefixFilter(new Term("field", "o")), new PrefixFilter(new Term("field", "o"))); + assertFalse(new PrefixFilter(new Term("field", "a")).equals(new PrefixFilter(new Term("field", "o")))); + QueryUtils.checkHashEquals(new TermQuery(new Term("field", "one"))); + QueryUtils.checkUnequal( + new TermQuery(new Term("field", "one")), new TermQuery(new Term("field", "two")) + ); + // now test FilteredQuery equals/hashcode: + QueryUtils.checkHashEquals(new FilteredQuery(new TermQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "o")))); + QueryUtils.checkUnequal( + new FilteredQuery(new TermQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "o"))), + new FilteredQuery(new TermQuery(new Term("field", "two")), new PrefixFilter(new Term("field", "o"))) + ); + QueryUtils.checkUnequal( + new FilteredQuery(new TermQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "a"))), + new FilteredQuery(new TermQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "o"))) + ); + } + + public void testInvalidArguments() throws Exception { + try { + new FilteredQuery(null, null); + fail("Should throw IllegalArgumentException"); + } catch (IllegalArgumentException iae) { + // pass + } + try { + new FilteredQuery(new TermQuery(new Term("field", "one")), null); + fail("Should throw IllegalArgumentException"); + } catch (IllegalArgumentException iae) { + // pass + } + try { + new FilteredQuery(null, new PrefixFilter(new Term("field", "o"))); + fail("Should throw IllegalArgumentException"); + } catch (IllegalArgumentException iae) { + // pass + } + } + + private void assertRewrite(FilteredQuery fq, Class clazz) throws Exception { + // assign crazy boost to FQ + final float boost = random.nextFloat() * 100.f; + fq.setBoost(boost); + + // assign crazy boost to inner + final float innerBoost = random.nextFloat() * 100.f; + fq.getQuery().setBoost(innerBoost); + + // check the class and boosts of rewritten query + final Query rewritten = searcher.rewrite(fq); + assertTrue("is not instance of " + clazz.getName(), clazz.isInstance(rewritten)); + if (rewritten instanceof FilteredQuery) { + assertEquals(boost, rewritten.getBoost(), 1.E-5f); + assertEquals(innerBoost, ((FilteredQuery) rewritten).getQuery().getBoost(), 1.E-5f); + } else { + assertEquals(boost * innerBoost, rewritten.getBoost(), 1.E-5f); + } + + // check that the original query was not modified + assertEquals(boost, fq.getBoost(), 1.E-5f); + assertEquals(innerBoost, fq.getQuery().getBoost(), 1.E-5f); + } + + public void testRewrite() throws Exception { + assertRewrite(new FilteredQuery(new TermQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "o"))), FilteredQuery.class); + assertRewrite(new FilteredQuery(new MatchAllDocsQuery(), new PrefixFilter(new Term("field", "o"))), ConstantScoreQuery.class); + } public static final class FilteredQueryRA extends FilteredQuery { private final boolean useRandomAccess; From d6a96a43d9891ffbd4210d41b5e8a50a55bf7916 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 13 Dec 2011 17:34:58 +0000 Subject: [PATCH 6/8] LUCENE-3586: allow specifying -dir-impl FSDirectory subclass to CheckIndex, IndexUpgrader git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1213800 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 + .../org/apache/lucene/index/CheckIndex.java | 60 ++++++---- .../org/apache/lucene/index/IndexReader.java | 27 ++++- .../apache/lucene/index/IndexUpgrader.java | 37 ++++-- .../apache/lucene/util/CommandLineUtil.java | 112 ++++++++++++++++++ .../apache/lucene/util/LuceneTestCase.java | 28 ++--- 6 files changed, 212 insertions(+), 56 deletions(-) create mode 100644 lucene/src/java/org/apache/lucene/util/CommandLineUtil.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 5c8727fcbea..a952c18a243 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -716,6 +716,10 @@ New Features * LUCENE-3593: Added a FieldValueFilter that accepts all documents that either have at least one or no value at all in a specific field. (Simon Willnauer, Uwe Schindler, Robert Muir) + +* LUCENE-3586: CheckIndex and IndexUpgrader allow you to specify the + specific FSDirectory implementation to use (with the new -dir-impl + command-line option). (Luca Cavanna via Mike McCandless) Bug fixes diff --git a/lucene/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/src/java/org/apache/lucene/index/CheckIndex.java index 462b78cf116..eda9b7c4536 100644 --- a/lucene/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/src/java/org/apache/lucene/index/CheckIndex.java @@ -17,15 +17,6 @@ package org.apache.lucene.index; * limitations under the License. */ -import org.apache.lucene.document.FieldType; // for javadocs -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.codecs.Codec; import java.io.File; import java.io.IOException; import java.io.PrintStream; @@ -37,6 +28,16 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.lucene.document.FieldType; // for javadocs +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.codecs.Codec; + import org.apache.lucene.index.codecs.BlockTreeTermsReader; import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.values.IndexDocValues; @@ -44,6 +45,7 @@ import org.apache.lucene.index.values.IndexDocValues.Source; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CommandLineUtil; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.StringHelper; @@ -1408,41 +1410,48 @@ public class CheckIndex { boolean verbose = false; List onlySegments = new ArrayList(); String indexPath = null; + String dirImpl = null; int i = 0; while(i < args.length) { - if (args[i].equals("-fix")) { + String arg = args[i]; + if ("-fix".equals(arg)) { doFix = true; - i++; - } else if (args[i].equals("-codec")) { + } else if ("-codec".equals(arg)) { if (i == args.length-1) { System.out.println("ERROR: missing name for -codec option"); System.exit(1); } - codec = Codec.forName(args[i+1]); - i+=2; - } else if (args[i].equals("-verbose")) { - verbose = true; i++; - } else if (args[i].equals("-segment")) { + codec = Codec.forName(args[i]); + } else if (arg.equals("-verbose")) { + verbose = true; + } else if (arg.equals("-segment")) { if (i == args.length-1) { System.out.println("ERROR: missing name for -segment option"); System.exit(1); } - onlySegments.add(args[i+1]); - i += 2; + i++; + onlySegments.add(args[i]); + } else if ("-dir-impl".equals(arg)) { + if (i == args.length - 1) { + System.out.println("ERROR: missing value for -dir-impl option"); + System.exit(1); + } + i++; + dirImpl = args[i]; } else { if (indexPath != null) { System.out.println("ERROR: unexpected extra argument '" + args[i] + "'"); System.exit(1); } indexPath = args[i]; - i++; } + i++; } if (indexPath == null) { System.out.println("\nERROR: index path not specified"); - System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + + System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y] [-dir-impl X]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + " -codec X: when fixing, codec to write the new segments_N file with\n" + @@ -1450,7 +1459,8 @@ public class CheckIndex { " -segment X: only check the specified segments. This can be specified multiple\n" + " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + " You can't use this with the -fix option\n" + - "\n" + + " -dir-impl X: use a specific " + FSDirectory.class.getSimpleName() + " implementation. " + + "If no package is specified the " + FSDirectory.class.getPackage().getName() + " package will be used.\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" + @@ -1480,7 +1490,11 @@ public class CheckIndex { System.out.println("\nOpening index @ " + indexPath + "\n"); Directory dir = null; try { - dir = FSDirectory.open(new File(indexPath)); + if (dirImpl == null) { + dir = FSDirectory.open(new File(indexPath)); + } else { + dir = CommandLineUtil.newFSDirectory(dirImpl, new File(indexPath)); + } } catch (Throwable t) { System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting"); t.printStackTrace(System.out); diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java index 15f0f2562f6..0ae804d766f 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java @@ -36,6 +36,7 @@ import org.apache.lucene.store.*; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CommandLineUtil; import org.apache.lucene.util.ReaderUtil; // for javadocs /** IndexReader is an abstract class, providing an interface for accessing an @@ -982,17 +983,28 @@ public abstract class IndexReader implements Cloneable,Closeable { public static void main(String [] args) { String filename = null; boolean extract = false; + String dirImpl = null; - for (int i = 0; i < args.length; ++i) { - if (args[i].equals("-extract")) { + int j = 0; + while(j < args.length) { + String arg = args[j]; + if ("-extract".equals(arg)) { extract = true; + } else if ("-dir-impl".equals(arg)) { + if (j == args.length - 1) { + System.out.println("ERROR: missing value for -dir-impl option"); + System.exit(1); + } + j++; + dirImpl = args[j]; } else if (filename == null) { - filename = args[i]; + filename = arg; } + j++; } if (filename == null) { - System.out.println("Usage: org.apache.lucene.index.IndexReader [-extract] "); + System.out.println("Usage: org.apache.lucene.index.IndexReader [-extract] [-dir-impl X] "); return; } @@ -1004,7 +1016,12 @@ public abstract class IndexReader implements Cloneable,Closeable { File file = new File(filename); String dirname = file.getAbsoluteFile().getParent(); filename = file.getName(); - dir = FSDirectory.open(new File(dirname)); + if (dirImpl == null) { + dir = FSDirectory.open(new File(dirname)); + } else { + dir = CommandLineUtil.newFSDirectory(dirImpl, new File(dirname)); + } + cfr = new CompoundFileDirectory(dir, filename, IOContext.DEFAULT, false); String [] files = cfr.listAll(); diff --git a/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java b/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java index 396ec5cf178..227d82f6c8e 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java +++ b/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java @@ -19,6 +19,7 @@ package org.apache.lucene.index; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.CommandLineUtil; import org.apache.lucene.util.Constants; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.Version; @@ -54,36 +55,56 @@ public final class IndexUpgrader { private static void printUsage() { System.err.println("Upgrades an index so all segments created with a previous Lucene version are rewritten."); System.err.println("Usage:"); - System.err.println(" java " + IndexUpgrader.class.getName() + " [-delete-prior-commits] [-verbose] indexDir"); + System.err.println(" java " + IndexUpgrader.class.getName() + " [-delete-prior-commits] [-verbose] [-dir-impl X] indexDir"); System.err.println("This tool keeps only the last commit in an index; for this"); System.err.println("reason, if the incoming index has more than one commit, the tool"); System.err.println("refuses to run by default. Specify -delete-prior-commits to override"); System.err.println("this, allowing the tool to delete all but the last commit."); + System.err.println("Specify a " + FSDirectory.class.getSimpleName() + + " implementation through the -dir-impl option to force its use. If no package is specified the " + + FSDirectory.class.getPackage().getName() + " package will be used."); System.err.println("WARNING: This tool may reorder document IDs!"); System.exit(1); } @SuppressWarnings("deprecation") public static void main(String[] args) throws IOException { - String dir = null; + String path = null; boolean deletePriorCommits = false; PrintStream out = null; - for (String arg : args) { + String dirImpl = null; + int i = 0; + while (i clazz = loadFSDirectoryClass(clazzName); + return newFSDirectory(clazz, file); + } catch (ClassNotFoundException e) { + throw new IllegalArgumentException(FSDirectory.class.getSimpleName() + + " implementation not found: " + clazzName, e); + } catch (ClassCastException e) { + throw new IllegalArgumentException(clazzName + " is not a " + FSDirectory.class.getSimpleName() + + " implementation", e); + } catch (NoSuchMethodException e) { + throw new IllegalArgumentException(clazzName + " constructor with " + + File.class.getSimpleName() + " as parameter not found", e); + } catch (Exception e) { + throw new IllegalArgumentException("Error creating " + clazzName + " instance", e); + } + } + + /** + * Loads a specific Directory implementation + * @param className The name of the Directory class to load + * @return The Directory class loaded + * @throws ClassNotFoundException + */ + public static Class loadDirectoryClass(String clazzName) + throws ClassNotFoundException { + return Class.forName(adjustDirectoryClassName(clazzName)).asSubclass(Directory.class); + } + + /** + * Loads a specific FSDirectory implementation + * @param className The name of the FSDirectory class to load + * @return The FSDirectory class loaded + * @throws ClassNotFoundException + */ + public static Class loadFSDirectoryClass(String clazzName) + throws ClassNotFoundException { + return Class.forName(adjustDirectoryClassName(clazzName)).asSubclass(FSDirectory.class); + } + + private static String adjustDirectoryClassName(String clazzName) { + if (clazzName == null || clazzName.trim().length() == 0) { + throw new IllegalArgumentException("The " + FSDirectory.class.getSimpleName() + + " implementation cannot be null or empty"); + } + + if (clazzName.indexOf(".") == -1) {// if not fully qualified, assume .store + clazzName = Directory.class.getPackage().getName() + "." + clazzName; + } + return clazzName; + } + + /** + * Creates a new specific FSDirectory instance + * @param clazz The class of the object to be created + * @param file The file to be used as parameter constructor + * @return The new FSDirectory instance + * @throws NoSuchMethodException + * @throws InstantiationException + * @throws IllegalAccessException + * @throws InvocationTargetException + */ + public static FSDirectory newFSDirectory(Class clazz, File file) + throws NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException { + // Assuming every FSDirectory has a ctor(File): + Constructor ctor = clazz.getConstructor(File.class); + return ctor.newInstance(file); + } + +} diff --git a/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java index 9e093847e0c..56a40e568a6 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java @@ -26,7 +26,6 @@ import java.lang.annotation.Inherited; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; -import java.lang.reflect.Constructor; import java.util.*; import java.util.Map.Entry; import java.util.concurrent.ExecutorService; @@ -1035,24 +1034,16 @@ public abstract class LuceneTestCase extends Assert { fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)]; } - if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store - fsdirClass = "org.apache.lucene.store." + fsdirClass; - } - Class clazz; try { try { - clazz = Class.forName(fsdirClass).asSubclass(FSDirectory.class); + clazz = CommandLineUtil.loadFSDirectoryClass(fsdirClass); } catch (ClassCastException e) { // TEST_DIRECTORY is not a sub-class of FSDirectory, so draw one at random fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)]; - - if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store - fsdirClass = "org.apache.lucene.store." + fsdirClass; - } - - clazz = Class.forName(fsdirClass).asSubclass(FSDirectory.class); + clazz = CommandLineUtil.loadFSDirectoryClass(fsdirClass); } + MockDirectoryWrapper dir = new MockDirectoryWrapper(random, newFSDirectoryImpl(clazz, f)); if (lf != null) { dir.setLockFactory(lf); @@ -1165,10 +1156,7 @@ public abstract class LuceneTestCase extends Assert { throws IOException { FSDirectory d = null; try { - // Assuming every FSDirectory has a ctor(File), but not all may take a - // LockFactory too, so setting it afterwards. - Constructor ctor = clazz.getConstructor(File.class); - d = ctor.newInstance(file); + d = CommandLineUtil.newFSDirectory(clazz, file); } catch (Exception e) { d = FSDirectory.open(file); } @@ -1186,12 +1174,12 @@ public abstract class LuceneTestCase extends Assert { } static Directory newDirectoryImpl(Random random, String clazzName) { - if (clazzName.equals("random")) + if (clazzName.equals("random")) { clazzName = randomDirectory(random); - if (clazzName.indexOf(".") == -1) // if not fully qualified, assume .store - clazzName = "org.apache.lucene.store." + clazzName; + } + try { - final Class clazz = Class.forName(clazzName).asSubclass(Directory.class); + final Class clazz = CommandLineUtil.loadDirectoryClass(clazzName); // If it is a FSDirectory type, try its ctor(File) if (FSDirectory.class.isAssignableFrom(clazz)) { final File dir = _TestUtil.getTempDir("index"); From 6d706399020a42fcf4005e1384ac8f67dbb7e781 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 13 Dec 2011 17:36:14 +0000 Subject: [PATCH 7/8] LUCENE-3586: fixup javadocs git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1213803 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/util/CommandLineUtil.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/util/CommandLineUtil.java b/lucene/src/java/org/apache/lucene/util/CommandLineUtil.java index bd6e7b891b6..4f1c38c4787 100644 --- a/lucene/src/java/org/apache/lucene/util/CommandLineUtil.java +++ b/lucene/src/java/org/apache/lucene/util/CommandLineUtil.java @@ -36,7 +36,7 @@ public final class CommandLineUtil { /** * Creates a specific FSDirectory instance starting from its class name - * @param className The name of the FSDirectory class to load + * @param clazzName The name of the FSDirectory class to load * @param file The file to be used as parameter constructor * @return the new FSDirectory instance */ @@ -60,7 +60,7 @@ public final class CommandLineUtil { /** * Loads a specific Directory implementation - * @param className The name of the Directory class to load + * @param clazzName The name of the Directory class to load * @return The Directory class loaded * @throws ClassNotFoundException */ @@ -71,7 +71,7 @@ public final class CommandLineUtil { /** * Loads a specific FSDirectory implementation - * @param className The name of the FSDirectory class to load + * @param clazzName The name of the FSDirectory class to load * @return The FSDirectory class loaded * @throws ClassNotFoundException */ From 0f8fe10c7d2792f8ab3039b8893b32abf58fd831 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 13 Dec 2011 18:01:49 +0000 Subject: [PATCH 8/8] LUCENE-3531: add boolean (default false) to CachingWrapperFilter to control whether it should recache on new deletes git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1213811 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/search/CachingWrapperFilter.java | 70 +++++++++++------ .../search/CachingWrapperFilterHelper.java | 75 ------------------- .../search/TestCachingWrapperFilter.java | 75 ++++++++++++++++--- 3 files changed, 111 insertions(+), 109 deletions(-) delete mode 100644 lucene/src/test-framework/java/org/apache/lucene/search/CachingWrapperFilterHelper.java diff --git a/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java b/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java index 4de6e2691da..68d36b69d0c 100644 --- a/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java +++ b/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java @@ -38,28 +38,27 @@ public class CachingWrapperFilter extends Filter { Filter filter; protected final FilterCache cache; + private final boolean recacheDeletes; - static class FilterCache { + private static class FilterCache { /** * A transient Filter cache (package private because of test) */ - // NOTE: not final so that we can dynamically re-init - // after de-serialize - transient Map cache; + private final Map> cache = new WeakHashMap>(); - public synchronized T get(IndexReader reader, Object coreKey) throws IOException { - T value; - - if (cache == null) { - cache = new WeakHashMap(); + public synchronized T get(IndexReader reader, Object coreKey, Object coreSubKey) throws IOException { + Map innerCache = cache.get(coreKey); + if (innerCache == null) { + innerCache = new WeakHashMap(); + cache.put(coreKey, innerCache); } - return cache.get(coreKey); + return innerCache.get(coreSubKey); } - public synchronized void put(Object coreKey, T value) { - cache.put(coreKey, value); + public synchronized void put(Object coreKey, Object coreSubKey, T value) { + cache.get(coreKey).put(coreSubKey, value); } } @@ -67,7 +66,19 @@ public class CachingWrapperFilter extends Filter { * @param filter Filter to cache results of */ public CachingWrapperFilter(Filter filter) { + this(filter, false); + } + + /** Wraps another filter's result and caches it. If + * recacheDeletes is true, then new deletes (for example + * after {@link IndexReader#openIfChanged}) will be AND'd + * and cached again. + * + * @param filter Filter to cache results of + */ + public CachingWrapperFilter(Filter filter, boolean recacheDeletes) { this.filter = filter; + this.recacheDeletes = recacheDeletes; cache = new FilterCache(); } @@ -106,33 +117,48 @@ public class CachingWrapperFilter extends Filter { final IndexReader reader = context.reader; final Object coreKey = reader.getCoreCacheKey(); - DocIdSet docIdSet = cache.get(reader, coreKey); + // Only cache if incoming acceptDocs is == live docs; + // if Lucene passes in more interesting acceptDocs in + // the future we don't want to over-cache: + final boolean doCacheSubAcceptDocs = recacheDeletes && acceptDocs == reader.getLiveDocs(); + + final Bits subAcceptDocs; + if (doCacheSubAcceptDocs) { + subAcceptDocs = acceptDocs; + } else { + subAcceptDocs = null; + } + + DocIdSet docIdSet = cache.get(reader, coreKey, subAcceptDocs); if (docIdSet != null) { hitCount++; } else { missCount++; - // cache miss: we use no acceptDocs here - // (this saves time on building DocIdSet, the acceptDocs will be applied on the cached set) - docIdSet = docIdSetToCache(filter.getDocIdSet(context, null/**!!!*/), reader); - cache.put(coreKey, docIdSet); + docIdSet = docIdSetToCache(filter.getDocIdSet(context, subAcceptDocs), reader); + cache.put(coreKey, subAcceptDocs, docIdSet); + } + + if (doCacheSubAcceptDocs) { + return docIdSet; + } else { + return BitsFilteredDocIdSet.wrap(docIdSet, acceptDocs); } - - return BitsFilteredDocIdSet.wrap(docIdSet, acceptDocs); } @Override public String toString() { - return "CachingWrapperFilter("+filter+")"; + return "CachingWrapperFilter("+filter+",recacheDeletes=" + recacheDeletes + ")"; } @Override public boolean equals(Object o) { if (!(o instanceof CachingWrapperFilter)) return false; - return this.filter.equals(((CachingWrapperFilter)o).filter); + final CachingWrapperFilter other = (CachingWrapperFilter) o; + return this.filter.equals(other.filter) && this.recacheDeletes == other.recacheDeletes; } @Override public int hashCode() { - return filter.hashCode() ^ 0x1117BF25; + return (filter.hashCode() ^ 0x1117BF25) + (recacheDeletes ? 0 : 1); } } diff --git a/lucene/src/test-framework/java/org/apache/lucene/search/CachingWrapperFilterHelper.java b/lucene/src/test-framework/java/org/apache/lucene/search/CachingWrapperFilterHelper.java deleted file mode 100644 index 105e72bf9cd..00000000000 --- a/lucene/src/test-framework/java/org/apache/lucene/search/CachingWrapperFilterHelper.java +++ /dev/null @@ -1,75 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import junit.framework.Assert; - -import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.util.Bits; - -/** - * A unit test helper class to test when the filter is getting cached and when it is not. - */ -public class CachingWrapperFilterHelper extends CachingWrapperFilter { - - private boolean shouldHaveCache = false; - - /** - * @param filter Filter to cache results of - */ - public CachingWrapperFilterHelper(Filter filter) { - super(filter); - } - - public void setShouldHaveCache(boolean shouldHaveCache) { - this.shouldHaveCache = shouldHaveCache; - } - - @Override - public synchronized DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { - - final int saveMissCount = missCount; - DocIdSet docIdSet = super.getDocIdSet(context, acceptDocs); - - if (shouldHaveCache) { - Assert.assertEquals("Cache should have data ", saveMissCount, missCount); - } else { - Assert.assertTrue("Cache should be null " + docIdSet, missCount > saveMissCount); - } - - return docIdSet; - } - - @Override - public String toString() { - return "CachingWrapperFilterHelper("+filter+")"; - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof CachingWrapperFilterHelper)) return false; - return this.filter.equals(o); - } - - @Override - public int hashCode() { - return this.filter.hashCode() ^ 0x5525aacb; - } -} diff --git a/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java b/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java index d46783594e2..68161a5df37 100644 --- a/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java @@ -30,8 +30,9 @@ import org.apache.lucene.index.SlowMultiReaderWrapper; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; -import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class TestCachingWrapperFilter extends LuceneTestCase { @@ -164,6 +165,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase { // asserts below requires no unexpected merges: setMergePolicy(newLogMergePolicy(10)) ); + _TestUtil.keepFullyDeletedSegments(writer.w); // NOTE: cannot use writer.getReader because RIW (on // flipping a coin) may give us a newly opened reader, @@ -173,7 +175,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase { // same reason we don't wrap? IndexSearcher searcher = newSearcher(reader, false); - // add a doc, refresh the reader, and check that its there + // add a doc, refresh the reader, and check that it's there Document doc = new Document(); doc.add(newField("id", "1", StringField.TYPE_STORED)); writer.addDocument(doc); @@ -186,23 +188,76 @@ public class TestCachingWrapperFilter extends LuceneTestCase { final Filter startFilter = new QueryWrapperFilter(new TermQuery(new Term("id", "1"))); - CachingWrapperFilter filter = new CachingWrapperFilter(startFilter); + // force cache to regenerate after deletions: + CachingWrapperFilter filter = new CachingWrapperFilter(startFilter, true); docs = searcher.search(new MatchAllDocsQuery(), filter, 1); + assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); - int missCount = filter.missCount; - assertTrue(missCount > 0); + Query constantScore = new ConstantScoreQuery(filter); docs = searcher.search(constantScore, 1); assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + + // make sure we get a cache hit when we reopen reader + // that had no change to deletions + + // fake delete (deletes nothing): + writer.deleteDocuments(new Term("foo", "bar")); + + IndexReader oldReader = reader; + reader = refreshReader(reader); + assertTrue(reader == oldReader); + int missCount = filter.missCount; + docs = searcher.search(constantScore, 1); + assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + + // cache hit: assertEquals(missCount, filter.missCount); + // now delete the doc, refresh the reader, and see that it's not there + writer.deleteDocuments(new Term("id", "1")); + // NOTE: important to hold ref here so GC doesn't clear // the cache entry! Else the assert below may sometimes // fail: - IndexReader oldReader = reader; + oldReader = reader; + reader = refreshReader(reader); + + searcher = newSearcher(reader, false); + + missCount = filter.missCount; + docs = searcher.search(new MatchAllDocsQuery(), filter, 1); + assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + + // cache miss, because we asked CWF to recache when + // deletes changed: + assertEquals(missCount+1, filter.missCount); + docs = searcher.search(constantScore, 1); + assertEquals("[just filter] Should *not* find a hit...", 0, docs.totalHits); + + // apply deletes dynamically: + filter = new CachingWrapperFilter(startFilter); + writer.addDocument(doc); + reader = refreshReader(reader); + searcher = newSearcher(reader, false); + + docs = searcher.search(new MatchAllDocsQuery(), filter, 1); + assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); + missCount = filter.missCount; + assertTrue(missCount > 0); + constantScore = new ConstantScoreQuery(filter); + docs = searcher.search(constantScore, 1); + assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + assertEquals(missCount, filter.missCount); writer.addDocument(doc); + + // NOTE: important to hold ref here so GC doesn't clear + // the cache entry! Else the assert below may sometimes + // fail: + oldReader = reader; + reader = refreshReader(reader); searcher = newSearcher(reader, false); @@ -216,11 +271,6 @@ public class TestCachingWrapperFilter extends LuceneTestCase { assertEquals("[just filter] Should find a hit...", 2, docs.totalHits); assertEquals(missCount, filter.missCount); - // NOTE: important to hold ref here so GC doesn't clear - // the cache entry! Else the assert below may sometimes - // fail: - IndexReader oldReader2 = reader; - // now delete the doc, refresh the reader, and see that it's not there writer.deleteDocuments(new Term("id", "1")); @@ -229,10 +279,12 @@ public class TestCachingWrapperFilter extends LuceneTestCase { docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + // CWF reused the same entry (it dynamically applied the deletes): assertEquals(missCount, filter.missCount); docs = searcher.search(constantScore, 1); assertEquals("[just filter] Should *not* find a hit...", 0, docs.totalHits); + // CWF reused the same entry (it dynamically applied the deletes): assertEquals(missCount, filter.missCount); // NOTE: silliness to make sure JRE does not eliminate @@ -240,7 +292,6 @@ public class TestCachingWrapperFilter extends LuceneTestCase { // CachingWrapperFilter's WeakHashMap from dropping the // entry: assertTrue(oldReader != null); - assertTrue(oldReader2 != null); reader.close(); writer.close();