Upgraded to Lucene 2.9-dev r794238.

Other changes include:
    LUCENE-1614 - Use Lucene's DocIdSetIterator.NO_MORE_DOCS as the sentinel value.
    LUCENE-1630 - Add acceptsDocsOutOfOrder method to Collector implementations.
    LUCENE-1673, LUCENE-1701 - Trie has moved to Lucene core and renamed to NumericRangeQuery.
    LUCENE-1662, LUCENE-1687 - Replace usage of ExtendedFieldCache by FieldCache.

SOLR-1241: Solr's CharFilter has been moved to Lucene. Remove CharFilter and related classes from Solr and use Lucene's corresponding code.

SOLR-1261: Lucene trunk renamed RangeQuery & Co to TermRangeQuery.

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@794328 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shalin Shekhar Mangar 2009-07-15 17:21:04 +00:00
parent c9eb4ea1ee
commit 03cf5cdad6
43 changed files with 135 additions and 1355 deletions

View File

@ -552,6 +552,17 @@ Other Changes
hitting "/admin/logging.jsp" should switch to "/admin/logging". hitting "/admin/logging.jsp" should switch to "/admin/logging".
(hossman) (hossman)
42. Upgraded to Lucene 2.9-dev r794238. Other changes include:
LUCENE-1614 - Use Lucene's DocIdSetIterator.NO_MORE_DOCS as the sentinel value.
LUCENE-1630 - Add acceptsDocsOutOfOrder method to Collector implementations.
LUCENE-1673, LUCENE-1701 - Trie has moved to Lucene core and renamed to NumericRangeQuery.
LUCENE-1662, LUCENE-1687 - Replace usage of ExtendedFieldCache by FieldCache.
(shalin)
42. SOLR-1241: Solr's CharFilter has been moved to Lucene. Remove CharFilter and related classes
from Solr and use Lucene's corresponding code (koji via shalin)
43. SOLR-1261: Lucene trunk renamed RangeQuery & Co to TermRangeQuery (Uwe Schindler via shalin)
Build Build
---------------------- ----------------------

View File

@ -252,12 +252,12 @@
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- charFilter + "CharStream aware" WhitespaceTokenizer --> <!-- charFilter + WhitespaceTokenizer -->
<!-- <!--
<fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" > <fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" >
<analyzer> <analyzer>
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
<tokenizer class="solr.CharStreamAwareWhitespaceTokenizerFactory"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
--> -->
@ -347,7 +347,7 @@
<!-- "default" values can be specified for fields, indicating which <!-- "default" values can be specified for fields, indicating which
value should be used if no value is specified when adding a document. value should be used if no value is specified when adding a document.
--> -->
<field name="popularity" type="sint" indexed="true" stored="true" default="0"/> <field name="popularity" type="sint" indexed="true" stored="true"/>
<field name="inStock" type="boolean" indexed="true" stored="true"/> <field name="inStock" type="boolean" indexed="true" stored="true"/>
<!-- Some sample docs exists solely to demonstrate the spellchecker <!-- Some sample docs exists solely to demonstrate the spellchecker

View File

@ -1,2 +1,2 @@
AnyObjectId[2e6629706d0dc36f2a3d6bef6f6bbc2dec9716cb] was removed in git history. AnyObjectId[3913f541b7e2915956524f4fc7ee4254dabc1449] was removed in git history.
Apache SVN contains full history. Apache SVN contains full history.

View File

@ -1,2 +1,2 @@
AnyObjectId[e2eea446e51f23e97164689936003016f555f807] was removed in git history. AnyObjectId[51b1184b0a653dbe09561e08cb7bb30936ccdd19] was removed in git history.
Apache SVN contains full history. Apache SVN contains full history.

View File

@ -1,2 +1,2 @@
AnyObjectId[c072989b47055f39817417190760c4180da20bd1] was removed in git history. AnyObjectId[8732882f60d8c2c314257d02e1fb35e662313c14] was removed in git history.
Apache SVN contains full history. Apache SVN contains full history.

View File

@ -1,2 +1,2 @@
AnyObjectId[358e8188673e16f06243bfd926405a2a5659e0c8] was removed in git history. AnyObjectId[4a6bad8fd3391c2dabdd8762d7fdff47511c8012] was removed in git history.
Apache SVN contains full history. Apache SVN contains full history.

View File

@ -1,2 +1,2 @@
AnyObjectId[150d8a5ed1d794a503ded3becfaab7d0ef16a131] was removed in git history. AnyObjectId[89ffe35842473c57edcbecd24a116b6993826ae1] was removed in git history.
Apache SVN contains full history. Apache SVN contains full history.

View File

@ -1,2 +1,2 @@
AnyObjectId[4b73ea8711ca57acc3e4ad465e488dbed07880a2] was removed in git history. AnyObjectId[a2210e09cef58fe74c62b3cd67b995263477c999] was removed in git history.
Apache SVN contains full history. Apache SVN contains full history.

View File

@ -1,2 +1,2 @@
AnyObjectId[b039d8393c248d6d4bb093137d247d7ee157be68] was removed in git history. AnyObjectId[53b91de6e65f2610ba49f5870efb18df8b6f8398] was removed in git history.
Apache SVN contains full history. Apache SVN contains full history.

View File

@ -1,2 +1,2 @@
AnyObjectId[b4ae0e32b791ab75b21d23a27eae798a6fccc499] was removed in git history. AnyObjectId[668555685e6f196033d4aff7aaf22e1913205c23] was removed in git history.
Apache SVN contains full history. Apache SVN contains full history.

View File

@ -1,75 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.util.ArrayList;
import java.util.List;
/**
*
* @version $Id$
* @since Solr 1.4
*
*/
public abstract class BaseCharFilter extends CharFilter {
private List<OffCorrectMap> pcmList;
public BaseCharFilter( CharStream in ){
super(in);
}
protected int correct( int currentOff ){
if( pcmList == null || pcmList.isEmpty() ) return currentOff;
for( int i = pcmList.size() - 1; i >= 0; i-- ){
if( currentOff >= pcmList.get( i ).off )
return currentOff + pcmList.get( i ).cumulativeDiff;
}
return currentOff;
}
protected int getLastCumulativeDiff(){
return pcmList == null || pcmList.isEmpty() ? 0 : pcmList.get( pcmList.size() - 1 ).cumulativeDiff;
}
protected void addOffCorrectMap( int off, int cumulativeDiff ){
if( pcmList == null ) pcmList = new ArrayList<OffCorrectMap>();
pcmList.add( new OffCorrectMap( off, cumulativeDiff ) );
}
static class OffCorrectMap {
int off;
int cumulativeDiff;
OffCorrectMap( int off, int cumulativeDiff ){
this.off = off;
this.cumulativeDiff = cumulativeDiff;
}
public String toString(){
StringBuilder sb = new StringBuilder();
sb.append('(');
sb.append(off);
sb.append(',');
sb.append(cumulativeDiff);
sb.append(')');
return sb.toString();
}
}
}

View File

@ -1,75 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.io.IOException;
/**
*
* Subclasses of CharFilter can be chained to filter CharStream.
*
* @version $Id$
* @since Solr 1.4
*
*/
public abstract class CharFilter extends CharStream {
protected CharStream input;
protected CharFilter( CharStream in ){
input = in;
}
/**
*
* Subclass may want to override to correct the current offset.
*
* @param currentOff current offset
* @return corrected offset
*/
protected int correct( int currentOff ){
return currentOff;
}
@Override
public final int correctOffset(int currentOff) {
return input.correctOffset( correct( currentOff ) );
}
@Override
public void close() throws IOException {
input.close();
}
@Override
public int read(char[] cbuf, int off, int len) throws IOException {
return input.read(cbuf, off, len);
}
public boolean markSupported(){
return input.markSupported();
}
public void mark( int readAheadLimit ) throws IOException {
input.mark(readAheadLimit);
}
public void reset() throws IOException {
input.reset();
}
}

View File

@ -19,6 +19,8 @@ package org.apache.solr.analysis;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.CharStream;
/** /**
* *
* @version $Id$ * @version $Id$

View File

@ -1,69 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.io.IOException;
import java.io.Reader;
/**
* CharReader is a Reader wrapper. It reads chars from Reader and outputs CharStream.
*
* @version $Id$
* @since Solr 1.4
*
*/
public final class CharReader extends CharStream {
protected Reader input;
public static CharStream get( Reader input ){
return input instanceof CharStream ?
(CharStream)input : new CharReader(input);
}
private CharReader( Reader in ){
input = in;
}
@Override
public int correctOffset(int currentOff) {
return currentOff;
}
@Override
public void close() throws IOException {
input.close();
}
@Override
public int read(char[] cbuf, int off, int len) throws IOException {
return input.read(cbuf, off, len );
}
public boolean markSupported(){
return input.markSupported();
}
public void mark( int readAheadLimit ) throws IOException {
input.mark(readAheadLimit);
}
public void reset() throws IOException {
input.reset();
}
}

View File

@ -1,38 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.io.Reader;
/**
* CharStream adds <a href="#correctOffset(int)">correctOffset</a> functionality over Reader.
*
* @version $Id$
* @since Solr 1.4
*
*/
public abstract class CharStream extends Reader {
/**
* called by CharFilter(s) and Tokenizer to correct token offset.
*
* @param currentOff current offset
* @return corrected token offset
*/
public abstract int correctOffset( int currentOff );
}

View File

@ -1,276 +0,0 @@
package org.apache.solr.analysis;
import org.apache.lucene.analysis.Tokenizer;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import java.io.Reader;
/**
* CJKTokenizer was modified from StopTokenizer which does a decent job for
* most European languages. It performs other token methods for double-byte
* Characters: the token will return at each two characters with overlap match.<br>
* Example: "java C1C2C3C4" will be segment to: "java" "C1C2" "C2C3" "C3C4" it
* also need filter filter zero length token ""<br>
* for Digit: digit, '+', '#' will token as letter<br>
* for more info on Asia language(Chinese Japanese Korean) text segmentation:
* please search <a
* href="http://www.google.com/search?q=word+chinese+segment">google</a>
*
*/
/*
* LUCENE-973 is applied
*/
/**
*
* @version $Id$
* @since Solr 1.4
*
*/
public final class CharStreamAwareCJKTokenizer extends Tokenizer {
//~ Static fields/initializers ---------------------------------------------
/** Word token type */
static final int WORD_TYPE = 0;
/** Single byte token type */
static final int SINGLE_TOKEN_TYPE = 1;
/** Double byte token type */
static final int DOUBLE_TOKEN_TYPE = 2;
/** Names for token types */
static final String[] TOKEN_TYPE_NAMES = { "word", "single", "double" };
/** Max word length */
private static final int MAX_WORD_LEN = 255;
/** buffer size: */
private static final int IO_BUFFER_SIZE = 256;
//~ Instance fields --------------------------------------------------------
/** word offset, used to imply which character(in ) is parsed */
private int offset = 0;
/** the index used only for ioBuffer */
private int bufferIndex = 0;
/** data length */
private int dataLen = 0;
/**
* character buffer, store the characters which are used to compose <br>
* the returned Token
*/
private final char[] buffer = new char[MAX_WORD_LEN];
/**
* I/O buffer, used to store the content of the input(one of the <br>
* members of Tokenizer)
*/
private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
/** word type: single=>ASCII double=>non-ASCII word=>default */
private int tokenType = WORD_TYPE;
/**
* tag: previous character is a cached double-byte character "C1C2C3C4"
* ----(set the C1 isTokened) C1C2 "C2C3C4" ----(set the C2 isTokened)
* C1C2 C2C3 "C3C4" ----(set the C3 isTokened) "C1C2 C2C3 C3C4"
*/
private boolean preIsTokened = false;
//~ Constructors -----------------------------------------------------------
/**
* Construct a token stream processing the given input.
*
* @param in I/O reader
*/
public CharStreamAwareCJKTokenizer(CharStream in) {
input = in;
}
//~ Methods ----------------------------------------------------------------
/**
* Returns the next token in the stream, or null at EOS.
* See http://java.sun.com/j2se/1.3/docs/api/java/lang/Character.UnicodeBlock.html
* for detail.
*
* @param reusableToken a reusable token
* @return Token
*
* @throws java.io.IOException - throw IOException when read error <br>
* happened in the InputStream
*
*/
public final Token next(final Token reusableToken) throws java.io.IOException {
/** how many character(s) has been stored in buffer */
assert reusableToken != null;
int length = 0;
/** the position used to create Token */
int start = offset;
while (true) {
/** current character */
char c;
/** unicode block of current character for detail */
Character.UnicodeBlock ub;
offset++;
if (bufferIndex >= dataLen) {
dataLen = input.read(ioBuffer);
bufferIndex = 0;
}
if (dataLen == -1) {
if (length > 0) {
if (preIsTokened == true) {
length = 0;
preIsTokened = false;
}
break;
} else {
return null;
}
} else {
//get current character
c = ioBuffer[bufferIndex++];
//get the UnicodeBlock of the current character
ub = Character.UnicodeBlock.of(c);
}
//if the current character is ASCII or Extend ASCII
if ((ub == Character.UnicodeBlock.BASIC_LATIN)
|| (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
) {
if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
// convert HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN
int i = (int) c;
i = i - 65248;
c = (char) i;
}
// if the current character is a letter or "_" "+" "#"
if (Character.isLetterOrDigit(c)
|| ((c == '_') || (c == '+') || (c == '#'))
) {
if (length == 0) {
// "javaC1C2C3C4linux" <br>
// ^--: the current character begin to token the ASCII
// letter
start = offset - 1;
} else if (tokenType == DOUBLE_TOKEN_TYPE) {
// "javaC1C2C3C4linux" <br>
// ^--: the previous non-ASCII
// : the current character
offset--;
bufferIndex--;
if (preIsTokened == true) {
// there is only one non-ASCII has been stored
length = 0;
preIsTokened = false;
break;
} else {
break;
}
}
// store the LowerCase(c) in the buffer
buffer[length++] = Character.toLowerCase(c);
tokenType = SINGLE_TOKEN_TYPE;
// break the procedure if buffer overflowed!
if (length == MAX_WORD_LEN) {
break;
}
} else if (length > 0) {
if (preIsTokened == true) {
length = 0;
preIsTokened = false;
} else {
break;
}
}
} else {
// non-ASCII letter, e.g."C1C2C3C4"
if (Character.isLetter(c)) {
if (length == 0) {
start = offset - 1;
buffer[length++] = c;
tokenType = DOUBLE_TOKEN_TYPE;
} else {
if (tokenType == SINGLE_TOKEN_TYPE) {
offset--;
bufferIndex--;
//return the previous ASCII characters
break;
} else {
buffer[length++] = c;
tokenType = DOUBLE_TOKEN_TYPE;
if (length == 2) {
offset--;
bufferIndex--;
preIsTokened = true;
break;
}
}
}
} else if (length > 0) {
if (preIsTokened == true) {
// empty the buffer
length = 0;
preIsTokened = false;
} else {
break;
}
}
}
}
if (length > 0) {
// Because of "CharStream aware" tokenizer, using correctOffset() to
// correct start/end offsets
return reusableToken.reinit
(buffer, 0, length,
((CharStream)input).correctOffset( start ),
((CharStream)input).correctOffset( start+length ),
TOKEN_TYPE_NAMES[tokenType]);
} else if (dataLen != -1) {
// Don't return an empty string - recurse to get the next token
return next(reusableToken);
} else {
return null;
}
}
}

View File

@ -1,33 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.io.Reader;
/**
*
* @version $Id$
* @since Solr 1.4
*
*/
public class CharStreamAwareCJKTokenizerFactory extends BaseTokenizerFactory {
public CharStreamAwareCJKTokenizer create(Reader input) {
return new CharStreamAwareCJKTokenizer( CharReader.get(input) );
}
}

View File

@ -1,102 +0,0 @@
package org.apache.solr.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
/** An abstract base class for simple, character-oriented tokenizers.*/
public abstract class CharStreamAwareCharTokenizer extends Tokenizer {
public CharStreamAwareCharTokenizer(CharStream input) {
super(input);
}
private int offset = 0, bufferIndex = 0, dataLen = 0;
private static final int MAX_WORD_LEN = 255;
private static final int IO_BUFFER_SIZE = 4096;
private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
/** Returns true iff a character should be included in a token. This
* tokenizer generates as tokens adjacent sequences of characters which
* satisfy this predicate. Characters for which this is false are used to
* define token boundaries and are not included in tokens. */
protected abstract boolean isTokenChar(char c);
/** Called on each token character to normalize it before it is added to the
* token. The default implementation does nothing. Subclasses may use this
* to, e.g., lowercase tokens. */
protected char normalize(char c) {
return c;
}
public final Token next(final Token reusableToken) throws IOException {
assert reusableToken != null;
reusableToken.clear();
int length = 0;
int start = bufferIndex;
char[] buffer = reusableToken.termBuffer();
while (true) {
if (bufferIndex >= dataLen) {
offset += dataLen;
dataLen = input.read(ioBuffer);
if (dataLen == -1) {
if (length > 0)
break;
else
return null;
}
bufferIndex = 0;
}
final char c = ioBuffer[bufferIndex++];
if (isTokenChar(c)) { // if it's a token char
if (length == 0) // start of token
start = offset + bufferIndex - 1;
else if (length == buffer.length)
buffer = reusableToken.resizeTermBuffer(1+length);
buffer[length++] = normalize(c); // buffer it, normalized
if (length == MAX_WORD_LEN) // buffer overflow!
break;
} else if (length > 0) // at non-Letter w/ chars
break; // return 'em
}
reusableToken.setTermLength(length);
// Because of "CharStream aware" tokenizer, using correctOffset() to
// correct start/end offsets
reusableToken.setStartOffset(((CharStream)input).correctOffset(start));
reusableToken.setEndOffset(((CharStream)input).correctOffset(start+length));
return reusableToken;
}
public void reset(Reader input) throws IOException {
super.reset(input);
bufferIndex = 0;
offset = 0;
dataLen = 0;
}
}

View File

@ -1,33 +0,0 @@
package org.apache.solr.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** A WhitespaceTokenizer is a tokenizer that divides text at whitespace.
* Adjacent sequences of non-Whitespace characters form tokens. */
public class CharStreamAwareWhitespaceTokenizer extends CharStreamAwareCharTokenizer {
/** Construct a new WhitespaceTokenizer. */
public CharStreamAwareWhitespaceTokenizer(CharStream in) {
super(in);
}
/** Collects only characters which do not satisfy
* {@link Character#isWhitespace(char)}.*/
protected boolean isTokenChar(char c) {
return !Character.isWhitespace(c);
}
}

View File

@ -1,33 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.io.Reader;
/**
*
* @version $Id$
* @since Solr 1.4
*
*/
public class CharStreamAwareWhitespaceTokenizerFactory extends BaseTokenizerFactory {
public CharStreamAwareWhitespaceTokenizer create(Reader input) {
return new CharStreamAwareWhitespaceTokenizer( CharReader.get(input) );
}
}

View File

@ -1,123 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.io.IOException;
import java.util.LinkedList;
/**
*
* @version $Id$
* @since Solr 1.4
*
*/
public class MappingCharFilter extends BaseCharFilter {
private final NormalizeMap normMap;
private LinkedList<Character> buffer;
private String replacement;
private int charPointer;
private int nextCharCounter;
public MappingCharFilter( NormalizeMap normMap, CharStream in ){
super( in );
this.normMap = normMap;
}
public int read() throws IOException {
while( true ){
if( replacement != null && charPointer < replacement.length() )
return replacement.charAt( charPointer++ );
int firstChar = nextChar();
if( firstChar == -1 ) return -1;
NormalizeMap nm = normMap.submap != null ?
normMap.submap.get( (char)firstChar ) : null;
if( nm == null ) return firstChar;
NormalizeMap result = match( nm );
if( result == null ) return firstChar;
replacement = result.normStr;
charPointer = 0;
if( result.diff != 0 ){
int prevCumulativeDiff = getLastCumulativeDiff();
if( result.diff < 0 ){
for( int i = 0; i < -result.diff ; i++ )
addOffCorrectMap( nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i );
}
else{
addOffCorrectMap( nextCharCounter - result.diff - prevCumulativeDiff, prevCumulativeDiff + result.diff ) ;
}
}
}
}
private int nextChar() throws IOException {
nextCharCounter++;
if( buffer != null && !buffer.isEmpty() )
return buffer.removeFirst();
return input.read();
}
private void pushChar( int c ){
nextCharCounter--;
if( buffer == null )
buffer = new LinkedList<Character>();
buffer.addFirst( (char)c );
}
private void pushLastChar( int c ){
if( buffer == null )
buffer = new LinkedList<Character>();
buffer.addLast( (char)c );
}
private NormalizeMap match( NormalizeMap map ) throws IOException {
NormalizeMap result = null;
if( map.submap != null ){
int chr = nextChar();
if( chr != -1 ){
NormalizeMap subMap = map.submap.get( (char)chr );
if( subMap != null ){
result = match( subMap );
}
if( result == null )
pushChar( chr );
}
}
if( result == null && map.normStr != null )
result = map;
return result;
}
public int read( char[] cbuf, int off, int len ) throws IOException {
char[] tmp = new char[len];
int l = input.read( tmp, 0, len );
if( l != -1 ){
for( int i = 0; i < l; i++ )
pushLastChar( tmp[i] );
}
l = 0;
for( int i = off; i < off + len; i++ ){
int c = read();
if( c == -1 ) break;
cbuf[i] = (char)c;
l++;
}
return l == 0 ? -1 : l;
}
}

View File

@ -24,6 +24,9 @@ import java.util.List;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.MappingCharFilter;
import org.apache.lucene.analysis.NormalizeCharMap;
import org.apache.solr.common.ResourceLoader; import org.apache.solr.common.ResourceLoader;
import org.apache.solr.common.util.StrUtils; import org.apache.solr.common.util.StrUtils;
import org.apache.solr.util.plugin.ResourceLoaderAware; import org.apache.solr.util.plugin.ResourceLoaderAware;
@ -37,7 +40,7 @@ import org.apache.solr.util.plugin.ResourceLoaderAware;
public class MappingCharFilterFactory extends BaseCharFilterFactory implements public class MappingCharFilterFactory extends BaseCharFilterFactory implements
ResourceLoaderAware { ResourceLoaderAware {
protected NormalizeMap normMap; protected NormalizeCharMap normMap;
private String mapping; private String mapping;
public void inform(ResourceLoader loader) { public void inform(ResourceLoader loader) {
@ -62,7 +65,7 @@ public class MappingCharFilterFactory extends BaseCharFilterFactory implements
catch( IOException e ){ catch( IOException e ){
throw new RuntimeException( e ); throw new RuntimeException( e );
} }
normMap = new NormalizeMap(); normMap = new NormalizeCharMap();
parseRules( wlist, normMap ); parseRules( wlist, normMap );
} }
} }
@ -74,7 +77,7 @@ public class MappingCharFilterFactory extends BaseCharFilterFactory implements
// "source" => "target" // "source" => "target"
static Pattern p = Pattern.compile( "\"(.*)\"\\s*=>\\s*\"(.*)\"\\s*$" ); static Pattern p = Pattern.compile( "\"(.*)\"\\s*=>\\s*\"(.*)\"\\s*$" );
protected void parseRules( List<String> rules, NormalizeMap normMap ){ protected void parseRules( List<String> rules, NormalizeCharMap normMap ){
for( String rule : rules ){ for( String rule : rules ){
Matcher m = p.matcher( rule ); Matcher m = p.matcher( rule );
if( !m.find() ) if( !m.find() )

View File

@ -1,55 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.util.HashMap;
import java.util.Map;
/**
*
* @version $Id$
* @since Solr 1.4
*
*/
public class NormalizeMap {
Map<Character, NormalizeMap> submap;
String normStr;
int diff;
public void add( String singleMatch, String replacement ){
NormalizeMap currMap = this;
for( int i = 0; i < singleMatch.length(); i++ ){
char c = singleMatch.charAt( i );
if( currMap.submap == null ){
currMap.submap = new HashMap<Character, NormalizeMap>( 1 );
}
NormalizeMap map = currMap.submap.get( c );
if( map == null ){
map = new NormalizeMap();
currMap.submap.put( c, map );
}
currMap = map;
}
if( currMap.normStr != null ){
throw new RuntimeException( "MappingCharFilter: there is already a mapping for " + singleMatch );
}
currMap.normStr = replacement;
currMap.diff = singleMatch.length() - replacement.length();
}
}

View File

@ -18,7 +18,8 @@
package org.apache.solr.analysis; package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.solr.analysis.TokenizerFactory; import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.CharReader;
import java.io.Reader; import java.io.Reader;

View File

@ -1,81 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.search.trie.TrieUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.DateField;
import org.apache.solr.schema.TrieField;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
/**
* Query time tokenizer for trie fields. It uses methods in TrieUtils to create a prefix coded representation of the
* given number which is used for term queries.
* <p/>
* Note that queries on trie date types are not tokenized and returned as is.
*
* @version $Id$
* @see org.apache.lucene.search.trie.TrieUtils
* @see org.apache.solr.schema.TrieField
* @since solr 1.4
*/
public class TrieQueryTokenizerFactory extends BaseTokenizerFactory {
protected static final DateField dateField = new DateField();
protected final TrieField.TrieTypes type;
public TrieQueryTokenizerFactory(TrieField.TrieTypes type) {
this.type = type;
}
public TokenStream create(Reader reader) {
try {
StringBuilder builder = new StringBuilder();
char[] buf = new char[8];
int len;
while ((len = reader.read(buf)) != -1)
builder.append(buf, 0, len);
String value, number = builder.toString();
switch (type) {
case INTEGER:
value = TrieUtils.intToPrefixCoded(Integer.parseInt(number));
break;
case FLOAT:
value = TrieUtils.intToPrefixCoded(TrieUtils.floatToSortableInt(Float.parseFloat(number)));
break;
case LONG:
value = TrieUtils.longToPrefixCoded(Long.parseLong(number));
break;
case DOUBLE:
value = TrieUtils.longToPrefixCoded(TrieUtils.doubleToSortableLong(Double.parseDouble(number)));
break;
case DATE:
value = TrieUtils.longToPrefixCoded(dateField.parseMath(null, number).getTime());
break;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field");
}
return new KeywordTokenizer(new StringReader(value));
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unable to create trie query tokenizer", e);
}
}
}

View File

@ -16,12 +16,8 @@
*/ */
package org.apache.solr.analysis; package org.apache.solr.analysis;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.lucene.search.trie.TrieUtils;
import org.apache.lucene.search.trie.IntTrieTokenStream;
import org.apache.lucene.search.trie.LongTrieTokenStream;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.schema.DateField; import org.apache.solr.schema.DateField;
import static org.apache.solr.schema.TrieField.TrieTypes; import static org.apache.solr.schema.TrieField.TrieTypes;
@ -30,22 +26,23 @@ import java.io.IOException;
import java.io.Reader; import java.io.Reader;
/** /**
* Index time tokenizer for trie fields. It uses methods in TrieUtils to create multiple trie encoded string per number. * Tokenizer for trie fields. It uses NumericTokenStream to create multiple trie encoded string per number.
* Each string created by this tokenizer for a given number differs from the previous by the given precisionStep. * Each string created by this tokenizer for a given number differs from the previous by the given precisionStep.
* For query time token streams that only contain the highest precision term, use 32/64 as precisionStep.
* <p/> * <p/>
* Refer to {@linkplain org.apache.lucene.search.trie package description} for more details. * Refer to {@link org.apache.lucene.search.NumericRangeQuery} for more details.
* *
* @version $Id$ * @version $Id$
* @see org.apache.lucene.search.trie.TrieUtils * @see org.apache.lucene.search.NumericRangeQuery
* @see org.apache.solr.schema.TrieField * @see org.apache.solr.schema.TrieField
* @since solr 1.4 * @since solr 1.4
*/ */
public class TrieIndexTokenizerFactory extends BaseTokenizerFactory { public class TrieTokenizerFactory extends BaseTokenizerFactory {
protected static final DateField dateField = new DateField(); protected static final DateField dateField = new DateField();
protected final int precisionStep; protected final int precisionStep;
protected final TrieTypes type; protected final TrieTypes type;
public TrieIndexTokenizerFactory(TrieTypes type, int precisionStep) { public TrieTokenizerFactory(TrieTypes type, int precisionStep) {
this.type = type; this.type = type;
this.precisionStep = precisionStep; this.precisionStep = precisionStep;
} }
@ -59,15 +56,15 @@ public class TrieIndexTokenizerFactory extends BaseTokenizerFactory {
builder.append(buf, 0, len); builder.append(buf, 0, len);
switch (type) { switch (type) {
case INTEGER: case INTEGER:
return new IntTrieTokenStream(Integer.parseInt(builder.toString()), precisionStep); return new NumericTokenStream(precisionStep).setIntValue(Integer.parseInt(builder.toString()));
case FLOAT: case FLOAT:
return new IntTrieTokenStream(TrieUtils.floatToSortableInt(Float.parseFloat(builder.toString())), precisionStep); return new NumericTokenStream(precisionStep).setFloatValue(Float.parseFloat(builder.toString()));
case LONG: case LONG:
return new LongTrieTokenStream(Long.parseLong(builder.toString()), precisionStep); return new NumericTokenStream(precisionStep).setLongValue(Long.parseLong(builder.toString()));
case DOUBLE: case DOUBLE:
return new LongTrieTokenStream(TrieUtils.doubleToSortableLong(Double.parseDouble(builder.toString())), precisionStep); return new NumericTokenStream(precisionStep).setDoubleValue(Double.parseDouble(builder.toString()));
case DATE: case DATE:
return new LongTrieTokenStream(dateField.parseMath(null, builder.toString()).getTime(), precisionStep); return new NumericTokenStream(precisionStep).setLongValue(dateField.parseMath(null, builder.toString()).getTime());
default: default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field"); throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field");
} }

View File

@ -21,8 +21,6 @@ import java.io.IOException;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.util.Constants;
/** /**
* Directory provider which mimics original Solr FSDirectory based behavior. * Directory provider which mimics original Solr FSDirectory based behavior.
@ -31,10 +29,6 @@ import org.apache.lucene.util.Constants;
public class StandardDirectoryFactory extends DirectoryFactory { public class StandardDirectoryFactory extends DirectoryFactory {
public Directory open(String path) throws IOException { public Directory open(String path) throws IOException {
if (!Constants.WINDOWS) { return FSDirectory.open(new File(path));
return new NIOFSDirectory(new File(path), null);
}
return new FSDirectory(new File(path), null);
} }
} }

View File

@ -21,12 +21,11 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.*; import org.apache.lucene.search.*;
import org.apache.lucene.search.ExtendedFieldCache.DoubleParser; import org.apache.lucene.search.FieldCache.DoubleParser;
import org.apache.lucene.search.ExtendedFieldCache.LongParser; import org.apache.lucene.search.FieldCache.LongParser;
import org.apache.lucene.search.FieldCache.FloatParser; import org.apache.lucene.search.FieldCache.FloatParser;
import org.apache.lucene.search.FieldCache.IntParser; import org.apache.lucene.search.FieldCache.IntParser;
import org.apache.lucene.search.FieldCache.Parser; import org.apache.lucene.search.FieldCache.Parser;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.ParseException;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
@ -45,7 +44,6 @@ import org.apache.solr.search.*;
import org.apache.solr.util.SolrPluginUtils; import org.apache.solr.util.SolrPluginUtils;
import java.io.IOException; import java.io.IOException;
import java.io.Reader;
import java.net.URL; import java.net.URL;
import java.util.*; import java.util.*;
import java.text.Collator; import java.text.Collator;
@ -615,7 +613,7 @@ public class QueryComponent extends SearchComponent
static ScoreDocComparator comparatorLong (final IndexReader reader, final String fieldname, Parser parser) static ScoreDocComparator comparatorLong (final IndexReader reader, final String fieldname, Parser parser)
throws IOException { throws IOException {
final String field = fieldname.intern(); final String field = fieldname.intern();
final long[] fieldOrder = parser == null ? ExtendedFieldCache.EXT_DEFAULT.getLongs(reader, field) : ExtendedFieldCache.EXT_DEFAULT.getLongs(reader, field, (LongParser) parser); final long[] fieldOrder = parser == null ? FieldCache.DEFAULT.getLongs(reader, field) : FieldCache.DEFAULT.getLongs(reader, field, (LongParser) parser);
return new ScoreDocComparator() { return new ScoreDocComparator() {
public final int compare (final ScoreDoc i, final ScoreDoc j) { public final int compare (final ScoreDoc i, final ScoreDoc j) {
@ -680,7 +678,7 @@ public class QueryComponent extends SearchComponent
static ScoreDocComparator comparatorDouble(final IndexReader reader, final String fieldname, Parser parser) static ScoreDocComparator comparatorDouble(final IndexReader reader, final String fieldname, Parser parser)
throws IOException { throws IOException {
final String field = fieldname.intern(); final String field = fieldname.intern();
final double[] fieldOrder = parser == null ? ExtendedFieldCache.EXT_DEFAULT.getDoubles(reader, field) : ExtendedFieldCache.EXT_DEFAULT.getDoubles(reader, field, (DoubleParser) parser); final double[] fieldOrder = parser == null ? FieldCache.DEFAULT.getDoubles(reader, field) : FieldCache.DEFAULT.getDoubles(reader, field, (DoubleParser) parser);
return new ScoreDocComparator () { return new ScoreDocComparator () {
public final int compare (final ScoreDoc i, final ScoreDoc j) { public final int compare (final ScoreDoc i, final ScoreDoc j) {

View File

@ -309,7 +309,7 @@ public class SimpleFacets {
throws IOException { throws IOException {
DocSet hasVal = searcher.getDocSet DocSet hasVal = searcher.getDocSet
(new ConstantScoreRangeQuery(fieldName, null, null, false, false)); (new TermRangeQuery(fieldName, null, null, false, false));
return docs.andNotSize(hasVal); return docs.andNotSize(hasVal);
} }
@ -665,14 +665,13 @@ public class SimpleFacets {
} }
/** /**
* Macro for getting the numDocs of a ConstantScoreRangeQuery over docs * Macro for getting the numDocs of a TermRangeQuery over docs
* @see SolrIndexSearcher#numDocs * @see SolrIndexSearcher#numDocs
* @see ConstantScoreRangeQuery * @see TermRangeQuery
*/ */
protected int rangeCount(String field, String low, String high, protected int rangeCount(String field, String low, String high,
boolean iLow, boolean iHigh) throws IOException { boolean iLow, boolean iHigh) throws IOException {
return searcher.numDocs(new ConstantScoreRangeQuery(field,low,high, return searcher.numDocs(new TermRangeQuery(field,low,high,iLow,iHigh),
iLow,iHigh),
base); base);
} }

View File

@ -25,7 +25,7 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.RangeQuery; import org.apache.lucene.search.TermRangeQuery;
import org.apache.solr.search.function.ValueSource; import org.apache.solr.search.function.ValueSource;
import org.apache.solr.search.function.OrdFieldSource; import org.apache.solr.search.function.OrdFieldSource;
import org.apache.solr.search.Sorting; import org.apache.solr.search.Sorting;
@ -446,13 +446,12 @@ public abstract class FieldType extends FieldProperties {
* @see org.apache.solr.search.SolrQueryParser#getRangeQuery(String, String, String, boolean) * @see org.apache.solr.search.SolrQueryParser#getRangeQuery(String, String, String, boolean)
*/ */
public Query getRangeQuery(QParser parser, String field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { public Query getRangeQuery(QParser parser, String field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
RangeQuery rangeQuery = new RangeQuery( // constant score mode is now enabled per default
return new TermRangeQuery(
field, field,
part1 == null ? null : toInternal(part1), part1 == null ? null : toInternal(part1),
part2 == null ? null : toInternal(part2), part2 == null ? null : toInternal(part2),
minInclusive, maxInclusive); minInclusive, maxInclusive);
rangeQuery.setConstantScoreRewrite(true);
return rangeQuery;
} }
} }

View File

@ -19,9 +19,8 @@ package org.apache.solr.schema;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.trie.IntTrieRangeQuery; import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.trie.LongTrieRangeQuery; import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.trie.TrieUtils;
import org.apache.solr.analysis.*; import org.apache.solr.analysis.*;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.request.TextResponseWriter; import org.apache.solr.request.TextResponseWriter;
@ -33,8 +32,9 @@ import java.io.IOException;
import java.util.Map; import java.util.Map;
/** /**
* Provides field types to support for Lucene's Trie Range Queries. See {@linkplain org.apache.lucene.search.trie * Provides field types to support for Lucene's Trie Range Queries.
* package description} for more details. It supports integer, float, long, double and date types. * See {@link org.apache.lucene.search.NumericRangeQuery} for more details.
* It supports integer, float, long, double and date types.
* <p/> * <p/>
* For each number being added to this field, multiple terms are generated as per the algorithm described in the above * For each number being added to this field, multiple terms are generated as per the algorithm described in the above
* link. The possible number of terms increases dramatically with higher precision steps (factor 2^precisionStep). For * link. The possible number of terms increases dramatically with higher precision steps (factor 2^precisionStep). For
@ -46,7 +46,7 @@ import java.util.Map;
* generated, range search will be no faster than any other number field, but sorting will be possible. * generated, range search will be no faster than any other number field, but sorting will be possible.
* *
* @version $Id$ * @version $Id$
* @see org.apache.lucene.search.trie.TrieUtils * @see org.apache.lucene.search.NumericRangeQuery
* @since solr 1.4 * @since solr 1.4
*/ */
public class TrieField extends FieldType { public class TrieField extends FieldType {
@ -81,8 +81,9 @@ public class TrieField extends FieldType {
CharFilterFactory[] filterFactories = new CharFilterFactory[0]; CharFilterFactory[] filterFactories = new CharFilterFactory[0];
TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0]; TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
analyzer = new TokenizerChain(filterFactories, new TrieIndexTokenizerFactory(type, precisionStep), tokenFilterFactories); analyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(type, precisionStep), tokenFilterFactories);
queryAnalyzer = new TokenizerChain(filterFactories, new TrieQueryTokenizerFactory(type), tokenFilterFactories); // for query time we only need one token, so we use the biggest possible precisionStep:
queryAnalyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(type, Integer.MAX_VALUE), tokenFilterFactories);
} }
@Override @Override
@ -107,12 +108,14 @@ public class TrieField extends FieldType {
public SortField getSortField(SchemaField field, boolean top) { public SortField getSortField(SchemaField field, boolean top) {
switch (type) { switch (type) {
case INTEGER: case INTEGER:
return new SortField(field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER, top);
case FLOAT: case FLOAT:
return TrieUtils.getIntSortField(field.getName(), top); return new SortField(field.getName(), FieldCache.NUMERIC_UTILS_FLOAT_PARSER, top);
case LONG:
case DOUBLE:
case DATE: case DATE:
return TrieUtils.getLongSortField(field.getName(), top); case LONG:
return new SortField(field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER, top);
case DOUBLE:
return new SortField(field.getName(), FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, top);
default: default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + field.name); throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + field.name);
} }
@ -121,15 +124,14 @@ public class TrieField extends FieldType {
public ValueSource getValueSource(SchemaField field) { public ValueSource getValueSource(SchemaField field) {
switch (type) { switch (type) {
case INTEGER: case INTEGER:
return new IntFieldSource(field.getName(), TrieUtils.FIELD_CACHE_INT_PARSER); return new IntFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER);
case FLOAT: case FLOAT:
return new FloatFieldSource(field.getName(), TrieUtils.FIELD_CACHE_FLOAT_PARSER); return new FloatFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_FLOAT_PARSER);
case LONG:
return new LongFieldSource(field.getName(), TrieUtils.FIELD_CACHE_LONG_PARSER);
case DOUBLE:
return new DoubleFieldSource(field.getName(), TrieUtils.FIELD_CACHE_DOUBLE_PARSER);
case DATE: case DATE:
return new LongFieldSource(field.getName(), TrieUtils.FIELD_CACHE_LONG_PARSER); case LONG:
return new LongFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER);
case DOUBLE:
return new DoubleFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_DOUBLE_PARSER);
default: default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + field.name); throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + field.name);
} }
@ -167,31 +169,31 @@ public class TrieField extends FieldType {
Query query = null; Query query = null;
switch (type) { switch (type) {
case INTEGER: case INTEGER:
query = new IntTrieRangeQuery(field, precisionStep, query = NumericRangeQuery.newIntRange(field, precisionStep,
min == null ? null : Integer.parseInt(min), min == null ? null : Integer.parseInt(min),
max == null ? null : Integer.parseInt(max), max == null ? null : Integer.parseInt(max),
minInclusive, maxInclusive); minInclusive, maxInclusive);
break; break;
case FLOAT: case FLOAT:
query = new IntTrieRangeQuery(field, precisionStep, query = NumericRangeQuery.newFloatRange(field, precisionStep,
min == null ? null : TrieUtils.floatToSortableInt(Float.parseFloat(min)), min == null ? null : Float.parseFloat(min),
max == null ? null : TrieUtils.floatToSortableInt(Float.parseFloat(max)), max == null ? null : Float.parseFloat(max),
minInclusive, maxInclusive); minInclusive, maxInclusive);
break; break;
case LONG: case LONG:
query = new LongTrieRangeQuery(field, precisionStep, query = NumericRangeQuery.newLongRange(field, precisionStep,
min == null ? null : Long.parseLong(min), min == null ? null : Long.parseLong(min),
max == null ? null : Long.parseLong(max), max == null ? null : Long.parseLong(max),
minInclusive, maxInclusive); minInclusive, maxInclusive);
break; break;
case DOUBLE: case DOUBLE:
query = new LongTrieRangeQuery(field, precisionStep, query = NumericRangeQuery.newDoubleRange(field, precisionStep,
min == null ? null : TrieUtils.doubleToSortableLong(Double.parseDouble(min)), min == null ? null : Double.parseDouble(min),
max == null ? null : TrieUtils.doubleToSortableLong(Double.parseDouble(max)), max == null ? null : Double.parseDouble(max),
minInclusive, maxInclusive); minInclusive, maxInclusive);
break; break;
case DATE: case DATE:
query = new LongTrieRangeQuery(field, precisionStep, query = NumericRangeQuery.newLongRange(field, precisionStep,
min == null ? null : dateField.parseMath(null, min).getTime(), min == null ? null : dateField.parseMath(null, min).getTime(),
max == null ? null : dateField.parseMath(null, max).getTime(), max == null ? null : dateField.parseMath(null, max).getTime(),
minInclusive, maxInclusive); minInclusive, maxInclusive);

View File

@ -19,6 +19,7 @@ package org.apache.solr.search;
import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.OpenBitSetIterator; import org.apache.lucene.util.OpenBitSetIterator;
import org.apache.lucene.search.DocIdSetIterator;
/** /**
* <code>BitDocSet</code> represents an unordered set of Lucene Document Ids * <code>BitDocSet</code> represents an unordered set of Lucene Document Ids
@ -84,7 +85,7 @@ public class BitDocSet extends DocSetBase {
private final OpenBitSetIterator iter = new OpenBitSetIterator(bits); private final OpenBitSetIterator iter = new OpenBitSetIterator(bits);
private int pos = iter.nextDoc(); private int pos = iter.nextDoc();
public boolean hasNext() { public boolean hasNext() {
return pos>=0; return pos != DocIdSetIterator.NO_MORE_DOCS;
} }
public Integer next() { public Integer next() {

View File

@ -84,6 +84,10 @@ class DocSetCollector extends Collector {
public void setNextReader(IndexReader reader, int docBase) throws IOException { public void setNextReader(IndexReader reader, int docBase) throws IOException {
this.base = docBase; this.base = docBase;
} }
public boolean acceptsDocsOutOfOrder() {
return false;
}
} }
class DocSetDelegateCollector extends DocSetCollector { class DocSetDelegateCollector extends DocSetCollector {

View File

@ -317,13 +317,13 @@ public class QueryParsing {
Term t = q.getTerm(); Term t = q.getTerm();
FieldType ft = writeFieldName(t.field(), schema, out, flags); FieldType ft = writeFieldName(t.field(), schema, out, flags);
writeFieldVal(t.text(), ft, out, flags); writeFieldVal(t.text(), ft, out, flags);
} else if (query instanceof ConstantScoreRangeQuery) { } else if (query instanceof TermRangeQuery) {
ConstantScoreRangeQuery q = (ConstantScoreRangeQuery)query; TermRangeQuery q = (TermRangeQuery)query;
String fname = q.getField(); String fname = q.getField();
FieldType ft = writeFieldName(fname, schema, out, flags); FieldType ft = writeFieldName(fname, schema, out, flags);
out.append( q.includesLower() ? '[' : '{' ); out.append( q.includesLower() ? '[' : '{' );
String lt = q.getLowerVal(); String lt = q.getLowerTerm();
String ut = q.getUpperVal(); String ut = q.getUpperTerm();
if (lt==null) { if (lt==null) {
out.append('*'); out.append('*');
} else { } else {
@ -339,17 +339,17 @@ public class QueryParsing {
} }
out.append( q.includesUpper() ? ']' : '}' ); out.append( q.includesUpper() ? ']' : '}' );
} else if (query instanceof RangeQuery) { } else if (query instanceof NumericRangeQuery) {
RangeQuery q = (RangeQuery)query; NumericRangeQuery q = (NumericRangeQuery)query;
String fname = q.getField(); String fname = q.getField();
FieldType ft = writeFieldName(fname, schema, out, flags); FieldType ft = writeFieldName(fname, schema, out, flags);
out.append( q.isInclusive() ? '[' : '{' ); out.append( q.includesMin() ? '[' : '{' );
Term lt = q.getLowerTerm(); Number lt = q.getMin();
Term ut = q.getUpperTerm(); Number ut = q.getMax();
if (lt==null) { if (lt==null) {
out.append('*'); out.append('*');
} else { } else {
writeFieldVal(lt.text(), ft, out, flags); writeFieldVal(lt.toString(), ft, out, flags);
} }
out.append(" TO "); out.append(" TO ");
@ -357,11 +357,10 @@ public class QueryParsing {
if (ut==null) { if (ut==null) {
out.append('*'); out.append('*');
} else { } else {
writeFieldVal(ut.text(), ft, out, flags); writeFieldVal(ut.toString(), ft, out, flags);
} }
out.append( q.isInclusive() ? ']' : '}' ); out.append( q.includesMax() ? ']' : '}' );
} else if (query instanceof BooleanQuery) { } else if (query instanceof BooleanQuery) {
BooleanQuery q = (BooleanQuery)query; BooleanQuery q = (BooleanQuery)query;
boolean needParens=false; boolean needParens=false;

View File

@ -934,6 +934,9 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
} }
public void setNextReader(IndexReader reader, int docBase) throws IOException { public void setNextReader(IndexReader reader, int docBase) throws IOException {
} }
public boolean acceptsDocsOutOfOrder() {
return true;
}
}; };
} else { } else {
collector = new Collector() { collector = new Collector() {
@ -948,6 +951,9 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
} }
public void setNextReader(IndexReader reader, int docBase) throws IOException { public void setNextReader(IndexReader reader, int docBase) throws IOException {
} }
public boolean acceptsDocsOutOfOrder() {
return true;
}
}; };
} }
@ -1051,6 +1057,9 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
} }
public void setNextReader(IndexReader reader, int docBase) throws IOException { public void setNextReader(IndexReader reader, int docBase) throws IOException {
} }
public boolean acceptsDocsOutOfOrder() {
return false;
}
}); });
} }

View File

@ -18,7 +18,7 @@
package org.apache.solr.search.function; package org.apache.solr.search.function;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.ExtendedFieldCache; import org.apache.lucene.search.FieldCache;
import java.io.IOException; import java.io.IOException;
@ -31,13 +31,13 @@ import java.io.IOException;
*/ */
public class DoubleFieldSource extends FieldCacheSource { public class DoubleFieldSource extends FieldCacheSource {
protected ExtendedFieldCache.DoubleParser parser; protected FieldCache.DoubleParser parser;
public DoubleFieldSource(String field) { public DoubleFieldSource(String field) {
this(field, null); this(field, null);
} }
public DoubleFieldSource(String field, ExtendedFieldCache.DoubleParser parser) { public DoubleFieldSource(String field, FieldCache.DoubleParser parser) {
super(field); super(field);
this.parser = parser; this.parser = parser;
} }
@ -48,8 +48,8 @@ public class DoubleFieldSource extends FieldCacheSource {
public DocValues getValues(IndexReader reader) throws IOException { public DocValues getValues(IndexReader reader) throws IOException {
final double[] arr = (parser == null) ? final double[] arr = (parser == null) ?
((ExtendedFieldCache) cache).getDoubles(reader, field) : ((FieldCache) cache).getDoubles(reader, field) :
((ExtendedFieldCache) cache).getDoubles(reader, field, parser); ((FieldCache) cache).getDoubles(reader, field, parser);
return new DocValues() { return new DocValues() {
public float floatVal(int doc) { public float floatVal(int doc) {
return (float) arr[doc]; return (float) arr[doc];

View File

@ -18,7 +18,6 @@
package org.apache.solr.search.function; package org.apache.solr.search.function;
import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.ExtendedFieldCache;
/** /**
* A base class for ValueSource implementations that retrieve values for * A base class for ValueSource implementations that retrieve values for
@ -28,7 +27,7 @@ import org.apache.lucene.search.ExtendedFieldCache;
*/ */
public abstract class FieldCacheSource extends ValueSource { public abstract class FieldCacheSource extends ValueSource {
protected String field; protected String field;
protected FieldCache cache = ExtendedFieldCache.EXT_DEFAULT; protected FieldCache cache = FieldCache.DEFAULT;
public FieldCacheSource(String field) { public FieldCacheSource(String field) {
this.field=field; this.field=field;

View File

@ -18,7 +18,7 @@
package org.apache.solr.search.function; package org.apache.solr.search.function;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.ExtendedFieldCache; import org.apache.lucene.search.FieldCache;
import java.io.IOException; import java.io.IOException;
@ -32,13 +32,13 @@ import java.io.IOException;
*/ */
public class LongFieldSource extends FieldCacheSource { public class LongFieldSource extends FieldCacheSource {
protected ExtendedFieldCache.LongParser parser; protected FieldCache.LongParser parser;
public LongFieldSource(String field) { public LongFieldSource(String field) {
this(field, null); this(field, null);
} }
public LongFieldSource(String field, ExtendedFieldCache.LongParser parser) { public LongFieldSource(String field, FieldCache.LongParser parser) {
super(field); super(field);
this.parser = parser; this.parser = parser;
} }
@ -49,8 +49,8 @@ public class LongFieldSource extends FieldCacheSource {
public DocValues getValues(IndexReader reader) throws IOException { public DocValues getValues(IndexReader reader) throws IOException {
final long[] arr = (parser == null) ? final long[] arr = (parser == null) ?
((ExtendedFieldCache) cache).getLongs(reader, field) : ((FieldCache) cache).getLongs(reader, field) :
((ExtendedFieldCache) cache).getLongs(reader, field, parser); ((FieldCache) cache).getLongs(reader, field, parser);
return new DocValues() { return new DocValues() {
public float floatVal(int doc) { public float floatVal(int doc) {
return (float) arr[doc]; return (float) arr[doc];

View File

@ -18,7 +18,6 @@
package org.apache.solr.search.function; package org.apache.solr.search.function;
import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.ExtendedFieldCache;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import java.io.IOException; import java.io.IOException;

View File

@ -1,69 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.io.StringReader;
import junit.framework.TestCase;
public class TestCharFilter extends TestCase {
public void testCharFilter1() throws Exception {
CharStream cs = new CharFilter1( CharReader.get( new StringReader("") ) );
assertEquals( "corrected offset is invalid", 1, cs.correctOffset( 0 ) );
}
public void testCharFilter2() throws Exception {
CharStream cs = new CharFilter2( CharReader.get( new StringReader("") ) );
assertEquals( "corrected offset is invalid", 2, cs.correctOffset( 0 ) );
}
public void testCharFilter12() throws Exception {
CharStream cs = new CharFilter2( new CharFilter1( CharReader.get( new StringReader("") ) ) );
assertEquals( "corrected offset is invalid", 3, cs.correctOffset( 0 ) );
}
public void testCharFilter11() throws Exception {
CharStream cs = new CharFilter1( new CharFilter1( CharReader.get( new StringReader("") ) ) );
assertEquals( "corrected offset is invalid", 2, cs.correctOffset( 0 ) );
}
static class CharFilter1 extends CharFilter {
protected CharFilter1(CharStream in) {
super(in);
}
@Override
protected int correct(int currentOff) {
return currentOff + 1;
}
}
static class CharFilter2 extends CharFilter {
protected CharFilter2(CharStream in) {
super(in);
}
@Override
protected int correct(int currentOff) {
return currentOff + 2;
}
}
}

View File

@ -1,176 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.io.StringReader;
import java.util.List;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
public class TestMappingCharFilter extends BaseTokenTestCase {
NormalizeMap normMap;
public void setUp() throws Exception {
normMap = new NormalizeMap();
normMap.add( "aa", "a" );
normMap.add( "bbb", "b" );
normMap.add( "cccc", "cc" );
normMap.add( "h", "i" );
normMap.add( "j", "jj" );
normMap.add( "k", "kkk" );
normMap.add( "ll", "llll" );
normMap.add( "empty", "" );
}
public void testReaderReset() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "x" ) ) );
char[] buf = new char[10];
int len = cs.read(buf, 0, 10);
assertEquals( 1, len );
assertEquals( 'x', buf[0]) ;
len = cs.read(buf, 0, 10);
assertEquals( -1, len );
// rewind
cs.reset();
len = cs.read(buf, 0, 10);
assertEquals( 1, len );
assertEquals( 'x', buf[0]) ;
}
public void testNothingChange() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "x" ) ) );
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
List<Token> real = getTokens( ts );
List<Token> expect = tokens( "x" );
assertTokEqualOff( expect, real );
}
public void test1to1() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h" ) ) );
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
List<Token> real = getTokens( ts );
List<Token> expect = tokens( "i" );
assertTokEqualOff( expect, real );
}
public void test1to2() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "j" ) ) );
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
List<Token> real = getTokens( ts );
List<Token> expect = tokens( "jj,1,0,1" );
assertTokEqualOff( expect, real );
}
public void test1to3() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "k" ) ) );
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
List<Token> real = getTokens( ts );
List<Token> expect = tokens( "kkk,1,0,1" );
assertTokEqualOff( expect, real );
}
public void test2to4() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "ll" ) ) );
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
List<Token> real = getTokens( ts );
List<Token> expect = tokens( "llll,1,0,2" );
assertTokEqualOff( expect, real );
}
public void test2to1() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "aa" ) ) );
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
List<Token> real = getTokens( ts );
List<Token> expect = tokens( "a,1,0,2" );
assertTokEqualOff( expect, real );
}
public void test3to1() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "bbb" ) ) );
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
List<Token> real = getTokens( ts );
List<Token> expect = tokens( "b,1,0,3" );
assertTokEqualOff( expect, real );
}
public void test4to2() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "cccc" ) ) );
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
List<Token> real = getTokens( ts );
List<Token> expect = tokens( "cc,1,0,4" );
assertTokEqualOff( expect, real );
}
public void test5to0() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "empty" ) ) );
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
List<Token> real = getTokens( ts );
assertEquals( 0, real.size() );
}
//
// 1111111111222
// 01234567890123456789012
//(in) h i j k ll cccc bbb aa
//
// 1111111111222
// 01234567890123456789012
//(out) i i jj kkk llll cc b a
//
// h, 0, 1 => i, 0, 1
// i, 2, 3 => i, 2, 3
// j, 4, 5 => jj, 4, 5
// k, 6, 7 => kkk, 6, 7
// ll, 8,10 => llll, 8,10
// cccc,11,15 => cc,11,15
// bbb,16,19 => b,16,19
// aa,20,22 => a,20,22
//
public void testTokenStream() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h i j k ll cccc bbb aa" ) ) );
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
List<Token> real = getTokens( ts );
List<Token> expect = tokens( "i,1,0,1 i,1,2,3 jj,1,4,5 kkk,1,6,7 llll,1,8,10 cc,1,11,15 b,1,16,19 a,1,20,22" );
assertTokEqualOff( expect, real );
}
//
//
// 0123456789
//(in) aaaa ll h
//(out-1) aa llll i
//(out-2) a llllllll i
//
// aaaa,0,4 => a,0,4
// ll,5,7 => llllllll,5,7
// h,8,9 => i,8,9
public void testChained() throws Exception {
CharStream cs = new MappingCharFilter( normMap,
new MappingCharFilter( normMap, CharReader.get( new StringReader( "aaaa ll h" ) ) ) );
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
List<Token> real = getTokens( ts );
List<Token> expect = tokens( "a,1,0,4 llllllll,1,5,7 i,1,8,9" );
assertTokEqualOff( expect, real );
}
}

View File

@ -23,6 +23,7 @@ import java.util.Random;
import java.util.BitSet; import java.util.BitSet;
import org.apache.lucene.util.OpenBitSetIterator; import org.apache.lucene.util.OpenBitSetIterator;
import org.apache.lucene.search.DocIdSetIterator;
/** /**
* @deprecated * @deprecated
@ -62,7 +63,7 @@ public class TestOpenBitSet extends TestCase {
iterator.skipTo(bb+1); iterator.skipTo(bb+1);
bb = iterator.doc(); bb = iterator.doc();
} }
assertEquals(aa,bb); assertEquals(aa == -1 ? DocIdSetIterator.NO_MORE_DOCS : aa, bb);
} while (aa>=0); } while (aa>=0);
} }

View File

@ -19,8 +19,8 @@
org.apache.lucene.analysis.Token, org.apache.lucene.analysis.Token,
org.apache.lucene.analysis.TokenStream, org.apache.lucene.analysis.TokenStream,
org.apache.lucene.index.Payload, org.apache.lucene.index.Payload,
org.apache.solr.analysis.CharReader, org.apache.lucene.analysis.CharReader,
org.apache.solr.analysis.CharStream, org.apache.lucene.analysis.CharStream,
org.apache.solr.analysis.CharFilterFactory, org.apache.solr.analysis.CharFilterFactory,
org.apache.solr.analysis.TokenFilterFactory, org.apache.solr.analysis.TokenFilterFactory,
org.apache.solr.analysis.TokenizerChain, org.apache.solr.analysis.TokenizerChain,