mirror of https://github.com/apache/lucene.git
Upgraded to Lucene 2.9-dev r794238.
Other changes include: LUCENE-1614 - Use Lucene's DocIdSetIterator.NO_MORE_DOCS as the sentinel value. LUCENE-1630 - Add acceptsDocsOutOfOrder method to Collector implementations. LUCENE-1673, LUCENE-1701 - Trie has moved to Lucene core and renamed to NumericRangeQuery. LUCENE-1662, LUCENE-1687 - Replace usage of ExtendedFieldCache by FieldCache. SOLR-1241: Solr's CharFilter has been moved to Lucene. Remove CharFilter and related classes from Solr and use Lucene's corresponding code. SOLR-1261: Lucene trunk renamed RangeQuery & Co to TermRangeQuery. git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@794328 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c9eb4ea1ee
commit
03cf5cdad6
11
CHANGES.txt
11
CHANGES.txt
|
@ -552,6 +552,17 @@ Other Changes
|
|||
hitting "/admin/logging.jsp" should switch to "/admin/logging".
|
||||
(hossman)
|
||||
|
||||
42. Upgraded to Lucene 2.9-dev r794238. Other changes include:
|
||||
LUCENE-1614 - Use Lucene's DocIdSetIterator.NO_MORE_DOCS as the sentinel value.
|
||||
LUCENE-1630 - Add acceptsDocsOutOfOrder method to Collector implementations.
|
||||
LUCENE-1673, LUCENE-1701 - Trie has moved to Lucene core and renamed to NumericRangeQuery.
|
||||
LUCENE-1662, LUCENE-1687 - Replace usage of ExtendedFieldCache by FieldCache.
|
||||
(shalin)
|
||||
|
||||
42. SOLR-1241: Solr's CharFilter has been moved to Lucene. Remove CharFilter and related classes
|
||||
from Solr and use Lucene's corresponding code (koji via shalin)
|
||||
|
||||
43. SOLR-1261: Lucene trunk renamed RangeQuery & Co to TermRangeQuery (Uwe Schindler via shalin)
|
||||
|
||||
Build
|
||||
----------------------
|
||||
|
|
|
@ -252,12 +252,12 @@
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- charFilter + "CharStream aware" WhitespaceTokenizer -->
|
||||
<!-- charFilter + WhitespaceTokenizer -->
|
||||
<!--
|
||||
<fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" >
|
||||
<analyzer>
|
||||
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
|
||||
<tokenizer class="solr.CharStreamAwareWhitespaceTokenizerFactory"/>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
-->
|
||||
|
@ -347,7 +347,7 @@
|
|||
<!-- "default" values can be specified for fields, indicating which
|
||||
value should be used if no value is specified when adding a document.
|
||||
-->
|
||||
<field name="popularity" type="sint" indexed="true" stored="true" default="0"/>
|
||||
<field name="popularity" type="sint" indexed="true" stored="true"/>
|
||||
<field name="inStock" type="boolean" indexed="true" stored="true"/>
|
||||
|
||||
<!-- Some sample docs exists solely to demonstrate the spellchecker
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
AnyObjectId[2e6629706d0dc36f2a3d6bef6f6bbc2dec9716cb] was removed in git history.
|
||||
AnyObjectId[3913f541b7e2915956524f4fc7ee4254dabc1449] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -1,2 +1,2 @@
|
|||
AnyObjectId[e2eea446e51f23e97164689936003016f555f807] was removed in git history.
|
||||
AnyObjectId[51b1184b0a653dbe09561e08cb7bb30936ccdd19] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -1,2 +1,2 @@
|
|||
AnyObjectId[c072989b47055f39817417190760c4180da20bd1] was removed in git history.
|
||||
AnyObjectId[8732882f60d8c2c314257d02e1fb35e662313c14] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -1,2 +1,2 @@
|
|||
AnyObjectId[358e8188673e16f06243bfd926405a2a5659e0c8] was removed in git history.
|
||||
AnyObjectId[4a6bad8fd3391c2dabdd8762d7fdff47511c8012] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -1,2 +1,2 @@
|
|||
AnyObjectId[150d8a5ed1d794a503ded3becfaab7d0ef16a131] was removed in git history.
|
||||
AnyObjectId[89ffe35842473c57edcbecd24a116b6993826ae1] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -1,2 +1,2 @@
|
|||
AnyObjectId[4b73ea8711ca57acc3e4ad465e488dbed07880a2] was removed in git history.
|
||||
AnyObjectId[a2210e09cef58fe74c62b3cd67b995263477c999] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -1,2 +1,2 @@
|
|||
AnyObjectId[b039d8393c248d6d4bb093137d247d7ee157be68] was removed in git history.
|
||||
AnyObjectId[53b91de6e65f2610ba49f5870efb18df8b6f8398] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -1,2 +1,2 @@
|
|||
AnyObjectId[b4ae0e32b791ab75b21d23a27eae798a6fccc499] was removed in git history.
|
||||
AnyObjectId[668555685e6f196033d4aff7aaf22e1913205c23] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -1,75 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
* @version $Id$
|
||||
* @since Solr 1.4
|
||||
*
|
||||
*/
|
||||
public abstract class BaseCharFilter extends CharFilter {
|
||||
|
||||
private List<OffCorrectMap> pcmList;
|
||||
|
||||
public BaseCharFilter( CharStream in ){
|
||||
super(in);
|
||||
}
|
||||
|
||||
protected int correct( int currentOff ){
|
||||
if( pcmList == null || pcmList.isEmpty() ) return currentOff;
|
||||
for( int i = pcmList.size() - 1; i >= 0; i-- ){
|
||||
if( currentOff >= pcmList.get( i ).off )
|
||||
return currentOff + pcmList.get( i ).cumulativeDiff;
|
||||
}
|
||||
return currentOff;
|
||||
}
|
||||
|
||||
protected int getLastCumulativeDiff(){
|
||||
return pcmList == null || pcmList.isEmpty() ? 0 : pcmList.get( pcmList.size() - 1 ).cumulativeDiff;
|
||||
}
|
||||
|
||||
protected void addOffCorrectMap( int off, int cumulativeDiff ){
|
||||
if( pcmList == null ) pcmList = new ArrayList<OffCorrectMap>();
|
||||
pcmList.add( new OffCorrectMap( off, cumulativeDiff ) );
|
||||
}
|
||||
|
||||
static class OffCorrectMap {
|
||||
|
||||
int off;
|
||||
int cumulativeDiff;
|
||||
|
||||
OffCorrectMap( int off, int cumulativeDiff ){
|
||||
this.off = off;
|
||||
this.cumulativeDiff = cumulativeDiff;
|
||||
}
|
||||
|
||||
public String toString(){
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append('(');
|
||||
sb.append(off);
|
||||
sb.append(',');
|
||||
sb.append(cumulativeDiff);
|
||||
sb.append(')');
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,75 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
*
|
||||
* Subclasses of CharFilter can be chained to filter CharStream.
|
||||
*
|
||||
* @version $Id$
|
||||
* @since Solr 1.4
|
||||
*
|
||||
*/
|
||||
public abstract class CharFilter extends CharStream {
|
||||
|
||||
protected CharStream input;
|
||||
|
||||
protected CharFilter( CharStream in ){
|
||||
input = in;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Subclass may want to override to correct the current offset.
|
||||
*
|
||||
* @param currentOff current offset
|
||||
* @return corrected offset
|
||||
*/
|
||||
protected int correct( int currentOff ){
|
||||
return currentOff;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int correctOffset(int currentOff) {
|
||||
return input.correctOffset( correct( currentOff ) );
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
input.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(char[] cbuf, int off, int len) throws IOException {
|
||||
return input.read(cbuf, off, len);
|
||||
}
|
||||
|
||||
public boolean markSupported(){
|
||||
return input.markSupported();
|
||||
}
|
||||
|
||||
public void mark( int readAheadLimit ) throws IOException {
|
||||
input.mark(readAheadLimit);
|
||||
}
|
||||
|
||||
public void reset() throws IOException {
|
||||
input.reset();
|
||||
}
|
||||
}
|
|
@ -19,6 +19,8 @@ package org.apache.solr.analysis;
|
|||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.CharStream;
|
||||
|
||||
/**
|
||||
*
|
||||
* @version $Id$
|
||||
|
|
|
@ -1,69 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* CharReader is a Reader wrapper. It reads chars from Reader and outputs CharStream.
|
||||
*
|
||||
* @version $Id$
|
||||
* @since Solr 1.4
|
||||
*
|
||||
*/
|
||||
public final class CharReader extends CharStream {
|
||||
|
||||
protected Reader input;
|
||||
|
||||
public static CharStream get( Reader input ){
|
||||
return input instanceof CharStream ?
|
||||
(CharStream)input : new CharReader(input);
|
||||
}
|
||||
|
||||
private CharReader( Reader in ){
|
||||
input = in;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int correctOffset(int currentOff) {
|
||||
return currentOff;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
input.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(char[] cbuf, int off, int len) throws IOException {
|
||||
return input.read(cbuf, off, len );
|
||||
}
|
||||
|
||||
public boolean markSupported(){
|
||||
return input.markSupported();
|
||||
}
|
||||
|
||||
public void mark( int readAheadLimit ) throws IOException {
|
||||
input.mark(readAheadLimit);
|
||||
}
|
||||
|
||||
public void reset() throws IOException {
|
||||
input.reset();
|
||||
}
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* CharStream adds <a href="#correctOffset(int)">correctOffset</a> functionality over Reader.
|
||||
*
|
||||
* @version $Id$
|
||||
* @since Solr 1.4
|
||||
*
|
||||
*/
|
||||
public abstract class CharStream extends Reader {
|
||||
|
||||
/**
|
||||
* called by CharFilter(s) and Tokenizer to correct token offset.
|
||||
*
|
||||
* @param currentOff current offset
|
||||
* @return corrected token offset
|
||||
*/
|
||||
public abstract int correctOffset( int currentOff );
|
||||
}
|
|
@ -1,276 +0,0 @@
|
|||
package org.apache.solr.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
|
||||
/**
|
||||
* CJKTokenizer was modified from StopTokenizer which does a decent job for
|
||||
* most European languages. It performs other token methods for double-byte
|
||||
* Characters: the token will return at each two characters with overlap match.<br>
|
||||
* Example: "java C1C2C3C4" will be segment to: "java" "C1C2" "C2C3" "C3C4" it
|
||||
* also need filter filter zero length token ""<br>
|
||||
* for Digit: digit, '+', '#' will token as letter<br>
|
||||
* for more info on Asia language(Chinese Japanese Korean) text segmentation:
|
||||
* please search <a
|
||||
* href="http://www.google.com/search?q=word+chinese+segment">google</a>
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* LUCENE-973 is applied
|
||||
*/
|
||||
/**
|
||||
*
|
||||
* @version $Id$
|
||||
* @since Solr 1.4
|
||||
*
|
||||
*/
|
||||
public final class CharStreamAwareCJKTokenizer extends Tokenizer {
|
||||
//~ Static fields/initializers ---------------------------------------------
|
||||
/** Word token type */
|
||||
static final int WORD_TYPE = 0;
|
||||
|
||||
/** Single byte token type */
|
||||
static final int SINGLE_TOKEN_TYPE = 1;
|
||||
|
||||
/** Double byte token type */
|
||||
static final int DOUBLE_TOKEN_TYPE = 2;
|
||||
|
||||
/** Names for token types */
|
||||
static final String[] TOKEN_TYPE_NAMES = { "word", "single", "double" };
|
||||
|
||||
/** Max word length */
|
||||
private static final int MAX_WORD_LEN = 255;
|
||||
|
||||
/** buffer size: */
|
||||
private static final int IO_BUFFER_SIZE = 256;
|
||||
|
||||
//~ Instance fields --------------------------------------------------------
|
||||
|
||||
/** word offset, used to imply which character(in ) is parsed */
|
||||
private int offset = 0;
|
||||
|
||||
/** the index used only for ioBuffer */
|
||||
private int bufferIndex = 0;
|
||||
|
||||
/** data length */
|
||||
private int dataLen = 0;
|
||||
|
||||
/**
|
||||
* character buffer, store the characters which are used to compose <br>
|
||||
* the returned Token
|
||||
*/
|
||||
private final char[] buffer = new char[MAX_WORD_LEN];
|
||||
|
||||
/**
|
||||
* I/O buffer, used to store the content of the input(one of the <br>
|
||||
* members of Tokenizer)
|
||||
*/
|
||||
private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
|
||||
|
||||
/** word type: single=>ASCII double=>non-ASCII word=>default */
|
||||
private int tokenType = WORD_TYPE;
|
||||
|
||||
/**
|
||||
* tag: previous character is a cached double-byte character "C1C2C3C4"
|
||||
* ----(set the C1 isTokened) C1C2 "C2C3C4" ----(set the C2 isTokened)
|
||||
* C1C2 C2C3 "C3C4" ----(set the C3 isTokened) "C1C2 C2C3 C3C4"
|
||||
*/
|
||||
private boolean preIsTokened = false;
|
||||
|
||||
//~ Constructors -----------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Construct a token stream processing the given input.
|
||||
*
|
||||
* @param in I/O reader
|
||||
*/
|
||||
public CharStreamAwareCJKTokenizer(CharStream in) {
|
||||
input = in;
|
||||
}
|
||||
|
||||
//~ Methods ----------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns the next token in the stream, or null at EOS.
|
||||
* See http://java.sun.com/j2se/1.3/docs/api/java/lang/Character.UnicodeBlock.html
|
||||
* for detail.
|
||||
*
|
||||
* @param reusableToken a reusable token
|
||||
* @return Token
|
||||
*
|
||||
* @throws java.io.IOException - throw IOException when read error <br>
|
||||
* happened in the InputStream
|
||||
*
|
||||
*/
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
/** how many character(s) has been stored in buffer */
|
||||
assert reusableToken != null;
|
||||
int length = 0;
|
||||
|
||||
/** the position used to create Token */
|
||||
int start = offset;
|
||||
|
||||
while (true) {
|
||||
/** current character */
|
||||
char c;
|
||||
|
||||
/** unicode block of current character for detail */
|
||||
Character.UnicodeBlock ub;
|
||||
|
||||
offset++;
|
||||
|
||||
if (bufferIndex >= dataLen) {
|
||||
dataLen = input.read(ioBuffer);
|
||||
bufferIndex = 0;
|
||||
}
|
||||
|
||||
if (dataLen == -1) {
|
||||
if (length > 0) {
|
||||
if (preIsTokened == true) {
|
||||
length = 0;
|
||||
preIsTokened = false;
|
||||
}
|
||||
|
||||
break;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
//get current character
|
||||
c = ioBuffer[bufferIndex++];
|
||||
|
||||
//get the UnicodeBlock of the current character
|
||||
ub = Character.UnicodeBlock.of(c);
|
||||
}
|
||||
|
||||
//if the current character is ASCII or Extend ASCII
|
||||
if ((ub == Character.UnicodeBlock.BASIC_LATIN)
|
||||
|| (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
|
||||
) {
|
||||
if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
|
||||
// convert HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN
|
||||
int i = (int) c;
|
||||
i = i - 65248;
|
||||
c = (char) i;
|
||||
}
|
||||
|
||||
// if the current character is a letter or "_" "+" "#"
|
||||
if (Character.isLetterOrDigit(c)
|
||||
|| ((c == '_') || (c == '+') || (c == '#'))
|
||||
) {
|
||||
if (length == 0) {
|
||||
// "javaC1C2C3C4linux" <br>
|
||||
// ^--: the current character begin to token the ASCII
|
||||
// letter
|
||||
start = offset - 1;
|
||||
} else if (tokenType == DOUBLE_TOKEN_TYPE) {
|
||||
// "javaC1C2C3C4linux" <br>
|
||||
// ^--: the previous non-ASCII
|
||||
// : the current character
|
||||
offset--;
|
||||
bufferIndex--;
|
||||
|
||||
if (preIsTokened == true) {
|
||||
// there is only one non-ASCII has been stored
|
||||
length = 0;
|
||||
preIsTokened = false;
|
||||
break;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// store the LowerCase(c) in the buffer
|
||||
buffer[length++] = Character.toLowerCase(c);
|
||||
tokenType = SINGLE_TOKEN_TYPE;
|
||||
|
||||
// break the procedure if buffer overflowed!
|
||||
if (length == MAX_WORD_LEN) {
|
||||
break;
|
||||
}
|
||||
} else if (length > 0) {
|
||||
if (preIsTokened == true) {
|
||||
length = 0;
|
||||
preIsTokened = false;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// non-ASCII letter, e.g."C1C2C3C4"
|
||||
if (Character.isLetter(c)) {
|
||||
if (length == 0) {
|
||||
start = offset - 1;
|
||||
buffer[length++] = c;
|
||||
tokenType = DOUBLE_TOKEN_TYPE;
|
||||
} else {
|
||||
if (tokenType == SINGLE_TOKEN_TYPE) {
|
||||
offset--;
|
||||
bufferIndex--;
|
||||
|
||||
//return the previous ASCII characters
|
||||
break;
|
||||
} else {
|
||||
buffer[length++] = c;
|
||||
tokenType = DOUBLE_TOKEN_TYPE;
|
||||
|
||||
if (length == 2) {
|
||||
offset--;
|
||||
bufferIndex--;
|
||||
preIsTokened = true;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (length > 0) {
|
||||
if (preIsTokened == true) {
|
||||
// empty the buffer
|
||||
length = 0;
|
||||
preIsTokened = false;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (length > 0) {
|
||||
// Because of "CharStream aware" tokenizer, using correctOffset() to
|
||||
// correct start/end offsets
|
||||
return reusableToken.reinit
|
||||
(buffer, 0, length,
|
||||
((CharStream)input).correctOffset( start ),
|
||||
((CharStream)input).correctOffset( start+length ),
|
||||
TOKEN_TYPE_NAMES[tokenType]);
|
||||
} else if (dataLen != -1) {
|
||||
// Don't return an empty string - recurse to get the next token
|
||||
return next(reusableToken);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
*
|
||||
* @version $Id$
|
||||
* @since Solr 1.4
|
||||
*
|
||||
*/
|
||||
public class CharStreamAwareCJKTokenizerFactory extends BaseTokenizerFactory {
|
||||
|
||||
public CharStreamAwareCJKTokenizer create(Reader input) {
|
||||
return new CharStreamAwareCJKTokenizer( CharReader.get(input) );
|
||||
}
|
||||
}
|
|
@ -1,102 +0,0 @@
|
|||
package org.apache.solr.analysis;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
||||
/** An abstract base class for simple, character-oriented tokenizers.*/
|
||||
public abstract class CharStreamAwareCharTokenizer extends Tokenizer {
|
||||
public CharStreamAwareCharTokenizer(CharStream input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
private int offset = 0, bufferIndex = 0, dataLen = 0;
|
||||
private static final int MAX_WORD_LEN = 255;
|
||||
private static final int IO_BUFFER_SIZE = 4096;
|
||||
private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
|
||||
|
||||
/** Returns true iff a character should be included in a token. This
|
||||
* tokenizer generates as tokens adjacent sequences of characters which
|
||||
* satisfy this predicate. Characters for which this is false are used to
|
||||
* define token boundaries and are not included in tokens. */
|
||||
protected abstract boolean isTokenChar(char c);
|
||||
|
||||
/** Called on each token character to normalize it before it is added to the
|
||||
* token. The default implementation does nothing. Subclasses may use this
|
||||
* to, e.g., lowercase tokens. */
|
||||
protected char normalize(char c) {
|
||||
return c;
|
||||
}
|
||||
|
||||
public final Token next(final Token reusableToken) throws IOException {
|
||||
assert reusableToken != null;
|
||||
reusableToken.clear();
|
||||
int length = 0;
|
||||
int start = bufferIndex;
|
||||
char[] buffer = reusableToken.termBuffer();
|
||||
while (true) {
|
||||
|
||||
if (bufferIndex >= dataLen) {
|
||||
offset += dataLen;
|
||||
dataLen = input.read(ioBuffer);
|
||||
if (dataLen == -1) {
|
||||
if (length > 0)
|
||||
break;
|
||||
else
|
||||
return null;
|
||||
}
|
||||
bufferIndex = 0;
|
||||
}
|
||||
|
||||
final char c = ioBuffer[bufferIndex++];
|
||||
|
||||
if (isTokenChar(c)) { // if it's a token char
|
||||
|
||||
if (length == 0) // start of token
|
||||
start = offset + bufferIndex - 1;
|
||||
else if (length == buffer.length)
|
||||
buffer = reusableToken.resizeTermBuffer(1+length);
|
||||
|
||||
buffer[length++] = normalize(c); // buffer it, normalized
|
||||
|
||||
if (length == MAX_WORD_LEN) // buffer overflow!
|
||||
break;
|
||||
|
||||
} else if (length > 0) // at non-Letter w/ chars
|
||||
break; // return 'em
|
||||
}
|
||||
|
||||
reusableToken.setTermLength(length);
|
||||
// Because of "CharStream aware" tokenizer, using correctOffset() to
|
||||
// correct start/end offsets
|
||||
reusableToken.setStartOffset(((CharStream)input).correctOffset(start));
|
||||
reusableToken.setEndOffset(((CharStream)input).correctOffset(start+length));
|
||||
return reusableToken;
|
||||
}
|
||||
|
||||
public void reset(Reader input) throws IOException {
|
||||
super.reset(input);
|
||||
bufferIndex = 0;
|
||||
offset = 0;
|
||||
dataLen = 0;
|
||||
}
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
package org.apache.solr.analysis;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** A WhitespaceTokenizer is a tokenizer that divides text at whitespace.
|
||||
* Adjacent sequences of non-Whitespace characters form tokens. */
|
||||
public class CharStreamAwareWhitespaceTokenizer extends CharStreamAwareCharTokenizer {
|
||||
/** Construct a new WhitespaceTokenizer. */
|
||||
public CharStreamAwareWhitespaceTokenizer(CharStream in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
/** Collects only characters which do not satisfy
|
||||
* {@link Character#isWhitespace(char)}.*/
|
||||
protected boolean isTokenChar(char c) {
|
||||
return !Character.isWhitespace(c);
|
||||
}
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
*
|
||||
* @version $Id$
|
||||
* @since Solr 1.4
|
||||
*
|
||||
*/
|
||||
public class CharStreamAwareWhitespaceTokenizerFactory extends BaseTokenizerFactory {
|
||||
|
||||
public CharStreamAwareWhitespaceTokenizer create(Reader input) {
|
||||
return new CharStreamAwareWhitespaceTokenizer( CharReader.get(input) );
|
||||
}
|
||||
}
|
|
@ -1,123 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
*
|
||||
* @version $Id$
|
||||
* @since Solr 1.4
|
||||
*
|
||||
*/
|
||||
public class MappingCharFilter extends BaseCharFilter {
|
||||
|
||||
private final NormalizeMap normMap;
|
||||
private LinkedList<Character> buffer;
|
||||
private String replacement;
|
||||
private int charPointer;
|
||||
private int nextCharCounter;
|
||||
|
||||
public MappingCharFilter( NormalizeMap normMap, CharStream in ){
|
||||
super( in );
|
||||
this.normMap = normMap;
|
||||
}
|
||||
|
||||
public int read() throws IOException {
|
||||
while( true ){
|
||||
if( replacement != null && charPointer < replacement.length() )
|
||||
return replacement.charAt( charPointer++ );
|
||||
|
||||
int firstChar = nextChar();
|
||||
if( firstChar == -1 ) return -1;
|
||||
NormalizeMap nm = normMap.submap != null ?
|
||||
normMap.submap.get( (char)firstChar ) : null;
|
||||
if( nm == null ) return firstChar;
|
||||
NormalizeMap result = match( nm );
|
||||
if( result == null ) return firstChar;
|
||||
replacement = result.normStr;
|
||||
charPointer = 0;
|
||||
if( result.diff != 0 ){
|
||||
int prevCumulativeDiff = getLastCumulativeDiff();
|
||||
if( result.diff < 0 ){
|
||||
for( int i = 0; i < -result.diff ; i++ )
|
||||
addOffCorrectMap( nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i );
|
||||
}
|
||||
else{
|
||||
addOffCorrectMap( nextCharCounter - result.diff - prevCumulativeDiff, prevCumulativeDiff + result.diff ) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int nextChar() throws IOException {
|
||||
nextCharCounter++;
|
||||
if( buffer != null && !buffer.isEmpty() )
|
||||
return buffer.removeFirst();
|
||||
return input.read();
|
||||
}
|
||||
|
||||
private void pushChar( int c ){
|
||||
nextCharCounter--;
|
||||
if( buffer == null )
|
||||
buffer = new LinkedList<Character>();
|
||||
buffer.addFirst( (char)c );
|
||||
}
|
||||
|
||||
private void pushLastChar( int c ){
|
||||
if( buffer == null )
|
||||
buffer = new LinkedList<Character>();
|
||||
buffer.addLast( (char)c );
|
||||
}
|
||||
|
||||
private NormalizeMap match( NormalizeMap map ) throws IOException {
|
||||
NormalizeMap result = null;
|
||||
if( map.submap != null ){
|
||||
int chr = nextChar();
|
||||
if( chr != -1 ){
|
||||
NormalizeMap subMap = map.submap.get( (char)chr );
|
||||
if( subMap != null ){
|
||||
result = match( subMap );
|
||||
}
|
||||
if( result == null )
|
||||
pushChar( chr );
|
||||
}
|
||||
}
|
||||
if( result == null && map.normStr != null )
|
||||
result = map;
|
||||
return result;
|
||||
}
|
||||
|
||||
public int read( char[] cbuf, int off, int len ) throws IOException {
|
||||
char[] tmp = new char[len];
|
||||
int l = input.read( tmp, 0, len );
|
||||
if( l != -1 ){
|
||||
for( int i = 0; i < l; i++ )
|
||||
pushLastChar( tmp[i] );
|
||||
}
|
||||
l = 0;
|
||||
for( int i = off; i < off + len; i++ ){
|
||||
int c = read();
|
||||
if( c == -1 ) break;
|
||||
cbuf[i] = (char)c;
|
||||
l++;
|
||||
}
|
||||
return l == 0 ? -1 : l;
|
||||
}
|
||||
}
|
|
@ -24,6 +24,9 @@ import java.util.List;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.CharStream;
|
||||
import org.apache.lucene.analysis.MappingCharFilter;
|
||||
import org.apache.lucene.analysis.NormalizeCharMap;
|
||||
import org.apache.solr.common.ResourceLoader;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.util.plugin.ResourceLoaderAware;
|
||||
|
@ -37,7 +40,7 @@ import org.apache.solr.util.plugin.ResourceLoaderAware;
|
|||
public class MappingCharFilterFactory extends BaseCharFilterFactory implements
|
||||
ResourceLoaderAware {
|
||||
|
||||
protected NormalizeMap normMap;
|
||||
protected NormalizeCharMap normMap;
|
||||
private String mapping;
|
||||
|
||||
public void inform(ResourceLoader loader) {
|
||||
|
@ -62,7 +65,7 @@ public class MappingCharFilterFactory extends BaseCharFilterFactory implements
|
|||
catch( IOException e ){
|
||||
throw new RuntimeException( e );
|
||||
}
|
||||
normMap = new NormalizeMap();
|
||||
normMap = new NormalizeCharMap();
|
||||
parseRules( wlist, normMap );
|
||||
}
|
||||
}
|
||||
|
@ -73,8 +76,8 @@ public class MappingCharFilterFactory extends BaseCharFilterFactory implements
|
|||
|
||||
// "source" => "target"
|
||||
static Pattern p = Pattern.compile( "\"(.*)\"\\s*=>\\s*\"(.*)\"\\s*$" );
|
||||
|
||||
protected void parseRules( List<String> rules, NormalizeMap normMap ){
|
||||
|
||||
protected void parseRules( List<String> rules, NormalizeCharMap normMap ){
|
||||
for( String rule : rules ){
|
||||
Matcher m = p.matcher( rule );
|
||||
if( !m.find() )
|
||||
|
|
|
@ -1,55 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
* @version $Id$
|
||||
* @since Solr 1.4
|
||||
*
|
||||
*/
|
||||
public class NormalizeMap {
|
||||
|
||||
Map<Character, NormalizeMap> submap;
|
||||
String normStr;
|
||||
int diff;
|
||||
|
||||
public void add( String singleMatch, String replacement ){
|
||||
NormalizeMap currMap = this;
|
||||
for( int i = 0; i < singleMatch.length(); i++ ){
|
||||
char c = singleMatch.charAt( i );
|
||||
if( currMap.submap == null ){
|
||||
currMap.submap = new HashMap<Character, NormalizeMap>( 1 );
|
||||
}
|
||||
NormalizeMap map = currMap.submap.get( c );
|
||||
if( map == null ){
|
||||
map = new NormalizeMap();
|
||||
currMap.submap.put( c, map );
|
||||
}
|
||||
currMap = map;
|
||||
}
|
||||
if( currMap.normStr != null ){
|
||||
throw new RuntimeException( "MappingCharFilter: there is already a mapping for " + singleMatch );
|
||||
}
|
||||
currMap.normStr = replacement;
|
||||
currMap.diff = singleMatch.length() - replacement.length();
|
||||
}
|
||||
}
|
|
@ -18,7 +18,8 @@
|
|||
package org.apache.solr.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.solr.analysis.TokenizerFactory;
|
||||
import org.apache.lucene.analysis.CharStream;
|
||||
import org.apache.lucene.analysis.CharReader;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
|
|
|
@ -1,81 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.KeywordTokenizer;
|
||||
import org.apache.lucene.search.trie.TrieUtils;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.schema.DateField;
|
||||
import org.apache.solr.schema.TrieField;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
/**
|
||||
* Query time tokenizer for trie fields. It uses methods in TrieUtils to create a prefix coded representation of the
|
||||
* given number which is used for term queries.
|
||||
* <p/>
|
||||
* Note that queries on trie date types are not tokenized and returned as is.
|
||||
*
|
||||
* @version $Id$
|
||||
* @see org.apache.lucene.search.trie.TrieUtils
|
||||
* @see org.apache.solr.schema.TrieField
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class TrieQueryTokenizerFactory extends BaseTokenizerFactory {
|
||||
protected static final DateField dateField = new DateField();
|
||||
protected final TrieField.TrieTypes type;
|
||||
|
||||
public TrieQueryTokenizerFactory(TrieField.TrieTypes type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public TokenStream create(Reader reader) {
|
||||
try {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
char[] buf = new char[8];
|
||||
int len;
|
||||
while ((len = reader.read(buf)) != -1)
|
||||
builder.append(buf, 0, len);
|
||||
String value, number = builder.toString();
|
||||
switch (type) {
|
||||
case INTEGER:
|
||||
value = TrieUtils.intToPrefixCoded(Integer.parseInt(number));
|
||||
break;
|
||||
case FLOAT:
|
||||
value = TrieUtils.intToPrefixCoded(TrieUtils.floatToSortableInt(Float.parseFloat(number)));
|
||||
break;
|
||||
case LONG:
|
||||
value = TrieUtils.longToPrefixCoded(Long.parseLong(number));
|
||||
break;
|
||||
case DOUBLE:
|
||||
value = TrieUtils.longToPrefixCoded(TrieUtils.doubleToSortableLong(Double.parseDouble(number)));
|
||||
break;
|
||||
case DATE:
|
||||
value = TrieUtils.longToPrefixCoded(dateField.parseMath(null, number).getTime());
|
||||
break;
|
||||
default:
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field");
|
||||
}
|
||||
return new KeywordTokenizer(new StringReader(value));
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unable to create trie query tokenizer", e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -16,12 +16,8 @@
|
|||
*/
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.search.trie.TrieUtils;
|
||||
import org.apache.lucene.search.trie.IntTrieTokenStream;
|
||||
import org.apache.lucene.search.trie.LongTrieTokenStream;
|
||||
import org.apache.lucene.analysis.NumericTokenStream;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.schema.DateField;
|
||||
import static org.apache.solr.schema.TrieField.TrieTypes;
|
||||
|
@ -30,22 +26,23 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* Index time tokenizer for trie fields. It uses methods in TrieUtils to create multiple trie encoded string per number.
|
||||
* Tokenizer for trie fields. It uses NumericTokenStream to create multiple trie encoded string per number.
|
||||
* Each string created by this tokenizer for a given number differs from the previous by the given precisionStep.
|
||||
* For query time token streams that only contain the highest precision term, use 32/64 as precisionStep.
|
||||
* <p/>
|
||||
* Refer to {@linkplain org.apache.lucene.search.trie package description} for more details.
|
||||
* Refer to {@link org.apache.lucene.search.NumericRangeQuery} for more details.
|
||||
*
|
||||
* @version $Id$
|
||||
* @see org.apache.lucene.search.trie.TrieUtils
|
||||
* @see org.apache.lucene.search.NumericRangeQuery
|
||||
* @see org.apache.solr.schema.TrieField
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class TrieIndexTokenizerFactory extends BaseTokenizerFactory {
|
||||
public class TrieTokenizerFactory extends BaseTokenizerFactory {
|
||||
protected static final DateField dateField = new DateField();
|
||||
protected final int precisionStep;
|
||||
protected final TrieTypes type;
|
||||
|
||||
public TrieIndexTokenizerFactory(TrieTypes type, int precisionStep) {
|
||||
public TrieTokenizerFactory(TrieTypes type, int precisionStep) {
|
||||
this.type = type;
|
||||
this.precisionStep = precisionStep;
|
||||
}
|
||||
|
@ -59,15 +56,15 @@ public class TrieIndexTokenizerFactory extends BaseTokenizerFactory {
|
|||
builder.append(buf, 0, len);
|
||||
switch (type) {
|
||||
case INTEGER:
|
||||
return new IntTrieTokenStream(Integer.parseInt(builder.toString()), precisionStep);
|
||||
return new NumericTokenStream(precisionStep).setIntValue(Integer.parseInt(builder.toString()));
|
||||
case FLOAT:
|
||||
return new IntTrieTokenStream(TrieUtils.floatToSortableInt(Float.parseFloat(builder.toString())), precisionStep);
|
||||
return new NumericTokenStream(precisionStep).setFloatValue(Float.parseFloat(builder.toString()));
|
||||
case LONG:
|
||||
return new LongTrieTokenStream(Long.parseLong(builder.toString()), precisionStep);
|
||||
return new NumericTokenStream(precisionStep).setLongValue(Long.parseLong(builder.toString()));
|
||||
case DOUBLE:
|
||||
return new LongTrieTokenStream(TrieUtils.doubleToSortableLong(Double.parseDouble(builder.toString())), precisionStep);
|
||||
return new NumericTokenStream(precisionStep).setDoubleValue(Double.parseDouble(builder.toString()));
|
||||
case DATE:
|
||||
return new LongTrieTokenStream(dateField.parseMath(null, builder.toString()).getTime(), precisionStep);
|
||||
return new NumericTokenStream(precisionStep).setLongValue(dateField.parseMath(null, builder.toString()).getTime());
|
||||
default:
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field");
|
||||
}
|
|
@ -21,8 +21,6 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.NIOFSDirectory;
|
||||
import org.apache.lucene.util.Constants;
|
||||
|
||||
/**
|
||||
* Directory provider which mimics original Solr FSDirectory based behavior.
|
||||
|
@ -31,10 +29,6 @@ import org.apache.lucene.util.Constants;
|
|||
public class StandardDirectoryFactory extends DirectoryFactory {
|
||||
|
||||
public Directory open(String path) throws IOException {
|
||||
if (!Constants.WINDOWS) {
|
||||
return new NIOFSDirectory(new File(path), null);
|
||||
}
|
||||
|
||||
return new FSDirectory(new File(path), null);
|
||||
return FSDirectory.open(new File(path));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,12 +21,11 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.ExtendedFieldCache.DoubleParser;
|
||||
import org.apache.lucene.search.ExtendedFieldCache.LongParser;
|
||||
import org.apache.lucene.search.FieldCache.DoubleParser;
|
||||
import org.apache.lucene.search.FieldCache.LongParser;
|
||||
import org.apache.lucene.search.FieldCache.FloatParser;
|
||||
import org.apache.lucene.search.FieldCache.IntParser;
|
||||
import org.apache.lucene.search.FieldCache.Parser;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
|
@ -45,7 +44,6 @@ import org.apache.solr.search.*;
|
|||
import org.apache.solr.util.SolrPluginUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.net.URL;
|
||||
import java.util.*;
|
||||
import java.text.Collator;
|
||||
|
@ -615,7 +613,7 @@ public class QueryComponent extends SearchComponent
|
|||
static ScoreDocComparator comparatorLong (final IndexReader reader, final String fieldname, Parser parser)
|
||||
throws IOException {
|
||||
final String field = fieldname.intern();
|
||||
final long[] fieldOrder = parser == null ? ExtendedFieldCache.EXT_DEFAULT.getLongs(reader, field) : ExtendedFieldCache.EXT_DEFAULT.getLongs(reader, field, (LongParser) parser);
|
||||
final long[] fieldOrder = parser == null ? FieldCache.DEFAULT.getLongs(reader, field) : FieldCache.DEFAULT.getLongs(reader, field, (LongParser) parser);
|
||||
return new ScoreDocComparator() {
|
||||
|
||||
public final int compare (final ScoreDoc i, final ScoreDoc j) {
|
||||
|
@ -680,7 +678,7 @@ public class QueryComponent extends SearchComponent
|
|||
static ScoreDocComparator comparatorDouble(final IndexReader reader, final String fieldname, Parser parser)
|
||||
throws IOException {
|
||||
final String field = fieldname.intern();
|
||||
final double[] fieldOrder = parser == null ? ExtendedFieldCache.EXT_DEFAULT.getDoubles(reader, field) : ExtendedFieldCache.EXT_DEFAULT.getDoubles(reader, field, (DoubleParser) parser);
|
||||
final double[] fieldOrder = parser == null ? FieldCache.DEFAULT.getDoubles(reader, field) : FieldCache.DEFAULT.getDoubles(reader, field, (DoubleParser) parser);
|
||||
return new ScoreDocComparator () {
|
||||
|
||||
public final int compare (final ScoreDoc i, final ScoreDoc j) {
|
||||
|
|
|
@ -309,7 +309,7 @@ public class SimpleFacets {
|
|||
throws IOException {
|
||||
|
||||
DocSet hasVal = searcher.getDocSet
|
||||
(new ConstantScoreRangeQuery(fieldName, null, null, false, false));
|
||||
(new TermRangeQuery(fieldName, null, null, false, false));
|
||||
return docs.andNotSize(hasVal);
|
||||
}
|
||||
|
||||
|
@ -665,14 +665,13 @@ public class SimpleFacets {
|
|||
}
|
||||
|
||||
/**
|
||||
* Macro for getting the numDocs of a ConstantScoreRangeQuery over docs
|
||||
* Macro for getting the numDocs of a TermRangeQuery over docs
|
||||
* @see SolrIndexSearcher#numDocs
|
||||
* @see ConstantScoreRangeQuery
|
||||
* @see TermRangeQuery
|
||||
*/
|
||||
protected int rangeCount(String field, String low, String high,
|
||||
boolean iLow, boolean iHigh) throws IOException {
|
||||
return searcher.numDocs(new ConstantScoreRangeQuery(field,low,high,
|
||||
iLow,iHigh),
|
||||
return searcher.numDocs(new TermRangeQuery(field,low,high,iLow,iHigh),
|
||||
base);
|
||||
}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RangeQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.solr.search.function.ValueSource;
|
||||
import org.apache.solr.search.function.OrdFieldSource;
|
||||
import org.apache.solr.search.Sorting;
|
||||
|
@ -436,23 +436,22 @@ public abstract class FieldType extends FieldProperties {
|
|||
* handle nulls in part1 and/or part2 as well as unequal minInclusive and maxInclusive parameters gracefully.
|
||||
*
|
||||
* @param parser
|
||||
*@param field the name of the field
|
||||
* @param field the name of the field
|
||||
* @param part1 the lower boundary of the range, nulls are allowed.
|
||||
* @param part2 the upper boundary of the range, nulls are allowed
|
||||
* @param minInclusive whether the minimum of the range is inclusive or not
|
||||
* @param maxInclusive whether the maximum of the range is inclusive or not
|
||||
* @return a Query instance to perform range search according to given parameters
|
||||
* @return a Query instance to perform range search according to given parameters
|
||||
*
|
||||
* @see org.apache.solr.search.SolrQueryParser#getRangeQuery(String, String, String, boolean)
|
||||
*/
|
||||
public Query getRangeQuery(QParser parser, String field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
|
||||
RangeQuery rangeQuery = new RangeQuery(
|
||||
// constant score mode is now enabled per default
|
||||
return new TermRangeQuery(
|
||||
field,
|
||||
part1 == null ? null : toInternal(part1),
|
||||
part2 == null ? null : toInternal(part2),
|
||||
minInclusive, maxInclusive);
|
||||
rangeQuery.setConstantScoreRewrite(true);
|
||||
return rangeQuery;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -19,9 +19,8 @@ package org.apache.solr.schema;
|
|||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.trie.IntTrieRangeQuery;
|
||||
import org.apache.lucene.search.trie.LongTrieRangeQuery;
|
||||
import org.apache.lucene.search.trie.TrieUtils;
|
||||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.solr.analysis.*;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.request.TextResponseWriter;
|
||||
|
@ -33,8 +32,9 @@ import java.io.IOException;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Provides field types to support for Lucene's Trie Range Queries. See {@linkplain org.apache.lucene.search.trie
|
||||
* package description} for more details. It supports integer, float, long, double and date types.
|
||||
* Provides field types to support for Lucene's Trie Range Queries.
|
||||
* See {@link org.apache.lucene.search.NumericRangeQuery} for more details.
|
||||
* It supports integer, float, long, double and date types.
|
||||
* <p/>
|
||||
* For each number being added to this field, multiple terms are generated as per the algorithm described in the above
|
||||
* link. The possible number of terms increases dramatically with higher precision steps (factor 2^precisionStep). For
|
||||
|
@ -46,7 +46,7 @@ import java.util.Map;
|
|||
* generated, range search will be no faster than any other number field, but sorting will be possible.
|
||||
*
|
||||
* @version $Id$
|
||||
* @see org.apache.lucene.search.trie.TrieUtils
|
||||
* @see org.apache.lucene.search.NumericRangeQuery
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class TrieField extends FieldType {
|
||||
|
@ -78,11 +78,12 @@ public class TrieField extends FieldType {
|
|||
"Invalid type specified in schema.xml for field: " + args.get("name"), e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CharFilterFactory[] filterFactories = new CharFilterFactory[0];
|
||||
TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
|
||||
analyzer = new TokenizerChain(filterFactories, new TrieIndexTokenizerFactory(type, precisionStep), tokenFilterFactories);
|
||||
queryAnalyzer = new TokenizerChain(filterFactories, new TrieQueryTokenizerFactory(type), tokenFilterFactories);
|
||||
analyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(type, precisionStep), tokenFilterFactories);
|
||||
// for query time we only need one token, so we use the biggest possible precisionStep:
|
||||
queryAnalyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(type, Integer.MAX_VALUE), tokenFilterFactories);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -107,12 +108,14 @@ public class TrieField extends FieldType {
|
|||
public SortField getSortField(SchemaField field, boolean top) {
|
||||
switch (type) {
|
||||
case INTEGER:
|
||||
return new SortField(field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER, top);
|
||||
case FLOAT:
|
||||
return TrieUtils.getIntSortField(field.getName(), top);
|
||||
case LONG:
|
||||
case DOUBLE:
|
||||
return new SortField(field.getName(), FieldCache.NUMERIC_UTILS_FLOAT_PARSER, top);
|
||||
case DATE:
|
||||
return TrieUtils.getLongSortField(field.getName(), top);
|
||||
case LONG:
|
||||
return new SortField(field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER, top);
|
||||
case DOUBLE:
|
||||
return new SortField(field.getName(), FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, top);
|
||||
default:
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + field.name);
|
||||
}
|
||||
|
@ -121,15 +124,14 @@ public class TrieField extends FieldType {
|
|||
public ValueSource getValueSource(SchemaField field) {
|
||||
switch (type) {
|
||||
case INTEGER:
|
||||
return new IntFieldSource(field.getName(), TrieUtils.FIELD_CACHE_INT_PARSER);
|
||||
return new IntFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER);
|
||||
case FLOAT:
|
||||
return new FloatFieldSource(field.getName(), TrieUtils.FIELD_CACHE_FLOAT_PARSER);
|
||||
case LONG:
|
||||
return new LongFieldSource(field.getName(), TrieUtils.FIELD_CACHE_LONG_PARSER);
|
||||
case DOUBLE:
|
||||
return new DoubleFieldSource(field.getName(), TrieUtils.FIELD_CACHE_DOUBLE_PARSER);
|
||||
return new FloatFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_FLOAT_PARSER);
|
||||
case DATE:
|
||||
return new LongFieldSource(field.getName(), TrieUtils.FIELD_CACHE_LONG_PARSER);
|
||||
case LONG:
|
||||
return new LongFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER);
|
||||
case DOUBLE:
|
||||
return new DoubleFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_DOUBLE_PARSER);
|
||||
default:
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + field.name);
|
||||
}
|
||||
|
@ -167,31 +169,31 @@ public class TrieField extends FieldType {
|
|||
Query query = null;
|
||||
switch (type) {
|
||||
case INTEGER:
|
||||
query = new IntTrieRangeQuery(field, precisionStep,
|
||||
query = NumericRangeQuery.newIntRange(field, precisionStep,
|
||||
min == null ? null : Integer.parseInt(min),
|
||||
max == null ? null : Integer.parseInt(max),
|
||||
minInclusive, maxInclusive);
|
||||
break;
|
||||
case FLOAT:
|
||||
query = new IntTrieRangeQuery(field, precisionStep,
|
||||
min == null ? null : TrieUtils.floatToSortableInt(Float.parseFloat(min)),
|
||||
max == null ? null : TrieUtils.floatToSortableInt(Float.parseFloat(max)),
|
||||
query = NumericRangeQuery.newFloatRange(field, precisionStep,
|
||||
min == null ? null : Float.parseFloat(min),
|
||||
max == null ? null : Float.parseFloat(max),
|
||||
minInclusive, maxInclusive);
|
||||
break;
|
||||
case LONG:
|
||||
query = new LongTrieRangeQuery(field, precisionStep,
|
||||
query = NumericRangeQuery.newLongRange(field, precisionStep,
|
||||
min == null ? null : Long.parseLong(min),
|
||||
max == null ? null : Long.parseLong(max),
|
||||
minInclusive, maxInclusive);
|
||||
break;
|
||||
case DOUBLE:
|
||||
query = new LongTrieRangeQuery(field, precisionStep,
|
||||
min == null ? null : TrieUtils.doubleToSortableLong(Double.parseDouble(min)),
|
||||
max == null ? null : TrieUtils.doubleToSortableLong(Double.parseDouble(max)),
|
||||
query = NumericRangeQuery.newDoubleRange(field, precisionStep,
|
||||
min == null ? null : Double.parseDouble(min),
|
||||
max == null ? null : Double.parseDouble(max),
|
||||
minInclusive, maxInclusive);
|
||||
break;
|
||||
case DATE:
|
||||
query = new LongTrieRangeQuery(field, precisionStep,
|
||||
query = NumericRangeQuery.newLongRange(field, precisionStep,
|
||||
min == null ? null : dateField.parseMath(null, min).getTime(),
|
||||
max == null ? null : dateField.parseMath(null, max).getTime(),
|
||||
minInclusive, maxInclusive);
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.search;
|
|||
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.OpenBitSetIterator;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
/**
|
||||
* <code>BitDocSet</code> represents an unordered set of Lucene Document Ids
|
||||
|
@ -84,7 +85,7 @@ public class BitDocSet extends DocSetBase {
|
|||
private final OpenBitSetIterator iter = new OpenBitSetIterator(bits);
|
||||
private int pos = iter.nextDoc();
|
||||
public boolean hasNext() {
|
||||
return pos>=0;
|
||||
return pos != DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
public Integer next() {
|
||||
|
|
|
@ -84,6 +84,10 @@ class DocSetCollector extends Collector {
|
|||
public void setNextReader(IndexReader reader, int docBase) throws IOException {
|
||||
this.base = docBase;
|
||||
}
|
||||
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
class DocSetDelegateCollector extends DocSetCollector {
|
||||
|
@ -136,4 +140,4 @@ class DocSetDelegateCollector extends DocSetCollector {
|
|||
collector.setNextReader(reader, docBase);
|
||||
this.base = docBase;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -317,13 +317,13 @@ public class QueryParsing {
|
|||
Term t = q.getTerm();
|
||||
FieldType ft = writeFieldName(t.field(), schema, out, flags);
|
||||
writeFieldVal(t.text(), ft, out, flags);
|
||||
} else if (query instanceof ConstantScoreRangeQuery) {
|
||||
ConstantScoreRangeQuery q = (ConstantScoreRangeQuery)query;
|
||||
} else if (query instanceof TermRangeQuery) {
|
||||
TermRangeQuery q = (TermRangeQuery)query;
|
||||
String fname = q.getField();
|
||||
FieldType ft = writeFieldName(fname, schema, out, flags);
|
||||
out.append( q.includesLower() ? '[' : '{' );
|
||||
String lt = q.getLowerVal();
|
||||
String ut = q.getUpperVal();
|
||||
String lt = q.getLowerTerm();
|
||||
String ut = q.getUpperTerm();
|
||||
if (lt==null) {
|
||||
out.append('*');
|
||||
} else {
|
||||
|
@ -339,17 +339,17 @@ public class QueryParsing {
|
|||
}
|
||||
|
||||
out.append( q.includesUpper() ? ']' : '}' );
|
||||
} else if (query instanceof RangeQuery) {
|
||||
RangeQuery q = (RangeQuery)query;
|
||||
} else if (query instanceof NumericRangeQuery) {
|
||||
NumericRangeQuery q = (NumericRangeQuery)query;
|
||||
String fname = q.getField();
|
||||
FieldType ft = writeFieldName(fname, schema, out, flags);
|
||||
out.append( q.isInclusive() ? '[' : '{' );
|
||||
Term lt = q.getLowerTerm();
|
||||
Term ut = q.getUpperTerm();
|
||||
out.append( q.includesMin() ? '[' : '{' );
|
||||
Number lt = q.getMin();
|
||||
Number ut = q.getMax();
|
||||
if (lt==null) {
|
||||
out.append('*');
|
||||
} else {
|
||||
writeFieldVal(lt.text(), ft, out, flags);
|
||||
writeFieldVal(lt.toString(), ft, out, flags);
|
||||
}
|
||||
|
||||
out.append(" TO ");
|
||||
|
@ -357,11 +357,10 @@ public class QueryParsing {
|
|||
if (ut==null) {
|
||||
out.append('*');
|
||||
} else {
|
||||
writeFieldVal(ut.text(), ft, out, flags);
|
||||
writeFieldVal(ut.toString(), ft, out, flags);
|
||||
}
|
||||
|
||||
out.append( q.isInclusive() ? ']' : '}' );
|
||||
|
||||
out.append( q.includesMax() ? ']' : '}' );
|
||||
} else if (query instanceof BooleanQuery) {
|
||||
BooleanQuery q = (BooleanQuery)query;
|
||||
boolean needParens=false;
|
||||
|
|
|
@ -934,6 +934,9 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
}
|
||||
public void setNextReader(IndexReader reader, int docBase) throws IOException {
|
||||
}
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
} else {
|
||||
collector = new Collector() {
|
||||
|
@ -948,6 +951,9 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
}
|
||||
public void setNextReader(IndexReader reader, int docBase) throws IOException {
|
||||
}
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -1051,6 +1057,9 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
}
|
||||
public void setNextReader(IndexReader reader, int docBase) throws IOException {
|
||||
}
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
package org.apache.solr.search.function;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.ExtendedFieldCache;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
@ -31,13 +31,13 @@ import java.io.IOException;
|
|||
*/
|
||||
|
||||
public class DoubleFieldSource extends FieldCacheSource {
|
||||
protected ExtendedFieldCache.DoubleParser parser;
|
||||
protected FieldCache.DoubleParser parser;
|
||||
|
||||
public DoubleFieldSource(String field) {
|
||||
this(field, null);
|
||||
}
|
||||
|
||||
public DoubleFieldSource(String field, ExtendedFieldCache.DoubleParser parser) {
|
||||
public DoubleFieldSource(String field, FieldCache.DoubleParser parser) {
|
||||
super(field);
|
||||
this.parser = parser;
|
||||
}
|
||||
|
@ -48,8 +48,8 @@ public class DoubleFieldSource extends FieldCacheSource {
|
|||
|
||||
public DocValues getValues(IndexReader reader) throws IOException {
|
||||
final double[] arr = (parser == null) ?
|
||||
((ExtendedFieldCache) cache).getDoubles(reader, field) :
|
||||
((ExtendedFieldCache) cache).getDoubles(reader, field, parser);
|
||||
((FieldCache) cache).getDoubles(reader, field) :
|
||||
((FieldCache) cache).getDoubles(reader, field, parser);
|
||||
return new DocValues() {
|
||||
public float floatVal(int doc) {
|
||||
return (float) arr[doc];
|
||||
|
@ -152,4 +152,4 @@ public class DoubleFieldSource extends FieldCacheSource {
|
|||
return h;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
package org.apache.solr.search.function;
|
||||
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.ExtendedFieldCache;
|
||||
|
||||
/**
|
||||
* A base class for ValueSource implementations that retrieve values for
|
||||
|
@ -28,7 +27,7 @@ import org.apache.lucene.search.ExtendedFieldCache;
|
|||
*/
|
||||
public abstract class FieldCacheSource extends ValueSource {
|
||||
protected String field;
|
||||
protected FieldCache cache = ExtendedFieldCache.EXT_DEFAULT;
|
||||
protected FieldCache cache = FieldCache.DEFAULT;
|
||||
|
||||
public FieldCacheSource(String field) {
|
||||
this.field=field;
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
package org.apache.solr.search.function;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.ExtendedFieldCache;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -32,13 +32,13 @@ import java.io.IOException;
|
|||
*/
|
||||
|
||||
public class LongFieldSource extends FieldCacheSource {
|
||||
protected ExtendedFieldCache.LongParser parser;
|
||||
protected FieldCache.LongParser parser;
|
||||
|
||||
public LongFieldSource(String field) {
|
||||
this(field, null);
|
||||
}
|
||||
|
||||
public LongFieldSource(String field, ExtendedFieldCache.LongParser parser) {
|
||||
public LongFieldSource(String field, FieldCache.LongParser parser) {
|
||||
super(field);
|
||||
this.parser = parser;
|
||||
}
|
||||
|
@ -49,8 +49,8 @@ public class LongFieldSource extends FieldCacheSource {
|
|||
|
||||
public DocValues getValues(IndexReader reader) throws IOException {
|
||||
final long[] arr = (parser == null) ?
|
||||
((ExtendedFieldCache) cache).getLongs(reader, field) :
|
||||
((ExtendedFieldCache) cache).getLongs(reader, field, parser);
|
||||
((FieldCache) cache).getLongs(reader, field) :
|
||||
((FieldCache) cache).getLongs(reader, field, parser);
|
||||
return new DocValues() {
|
||||
public float floatVal(int doc) {
|
||||
return (float) arr[doc];
|
||||
|
@ -129,4 +129,4 @@ public class LongFieldSource extends FieldCacheSource {
|
|||
return h;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
package org.apache.solr.search.function;
|
||||
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.ExtendedFieldCache;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
import java.io.IOException;
|
||||
|
|
|
@ -1,69 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
public class TestCharFilter extends TestCase {
|
||||
|
||||
public void testCharFilter1() throws Exception {
|
||||
CharStream cs = new CharFilter1( CharReader.get( new StringReader("") ) );
|
||||
assertEquals( "corrected offset is invalid", 1, cs.correctOffset( 0 ) );
|
||||
}
|
||||
|
||||
public void testCharFilter2() throws Exception {
|
||||
CharStream cs = new CharFilter2( CharReader.get( new StringReader("") ) );
|
||||
assertEquals( "corrected offset is invalid", 2, cs.correctOffset( 0 ) );
|
||||
}
|
||||
|
||||
public void testCharFilter12() throws Exception {
|
||||
CharStream cs = new CharFilter2( new CharFilter1( CharReader.get( new StringReader("") ) ) );
|
||||
assertEquals( "corrected offset is invalid", 3, cs.correctOffset( 0 ) );
|
||||
}
|
||||
|
||||
public void testCharFilter11() throws Exception {
|
||||
CharStream cs = new CharFilter1( new CharFilter1( CharReader.get( new StringReader("") ) ) );
|
||||
assertEquals( "corrected offset is invalid", 2, cs.correctOffset( 0 ) );
|
||||
}
|
||||
|
||||
static class CharFilter1 extends CharFilter {
|
||||
|
||||
protected CharFilter1(CharStream in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int correct(int currentOff) {
|
||||
return currentOff + 1;
|
||||
}
|
||||
}
|
||||
|
||||
static class CharFilter2 extends CharFilter {
|
||||
|
||||
protected CharFilter2(CharStream in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int correct(int currentOff) {
|
||||
return currentOff + 2;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,176 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
public class TestMappingCharFilter extends BaseTokenTestCase {
|
||||
|
||||
NormalizeMap normMap;
|
||||
|
||||
public void setUp() throws Exception {
|
||||
normMap = new NormalizeMap();
|
||||
|
||||
normMap.add( "aa", "a" );
|
||||
normMap.add( "bbb", "b" );
|
||||
normMap.add( "cccc", "cc" );
|
||||
|
||||
normMap.add( "h", "i" );
|
||||
normMap.add( "j", "jj" );
|
||||
normMap.add( "k", "kkk" );
|
||||
normMap.add( "ll", "llll" );
|
||||
|
||||
normMap.add( "empty", "" );
|
||||
}
|
||||
|
||||
public void testReaderReset() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "x" ) ) );
|
||||
char[] buf = new char[10];
|
||||
int len = cs.read(buf, 0, 10);
|
||||
assertEquals( 1, len );
|
||||
assertEquals( 'x', buf[0]) ;
|
||||
len = cs.read(buf, 0, 10);
|
||||
assertEquals( -1, len );
|
||||
|
||||
// rewind
|
||||
cs.reset();
|
||||
len = cs.read(buf, 0, 10);
|
||||
assertEquals( 1, len );
|
||||
assertEquals( 'x', buf[0]) ;
|
||||
}
|
||||
|
||||
public void testNothingChange() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "x" ) ) );
|
||||
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
|
||||
List<Token> real = getTokens( ts );
|
||||
List<Token> expect = tokens( "x" );
|
||||
assertTokEqualOff( expect, real );
|
||||
}
|
||||
|
||||
public void test1to1() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h" ) ) );
|
||||
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
|
||||
List<Token> real = getTokens( ts );
|
||||
List<Token> expect = tokens( "i" );
|
||||
assertTokEqualOff( expect, real );
|
||||
}
|
||||
|
||||
public void test1to2() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "j" ) ) );
|
||||
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
|
||||
List<Token> real = getTokens( ts );
|
||||
List<Token> expect = tokens( "jj,1,0,1" );
|
||||
assertTokEqualOff( expect, real );
|
||||
}
|
||||
|
||||
public void test1to3() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "k" ) ) );
|
||||
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
|
||||
List<Token> real = getTokens( ts );
|
||||
List<Token> expect = tokens( "kkk,1,0,1" );
|
||||
assertTokEqualOff( expect, real );
|
||||
}
|
||||
|
||||
public void test2to4() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "ll" ) ) );
|
||||
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
|
||||
List<Token> real = getTokens( ts );
|
||||
List<Token> expect = tokens( "llll,1,0,2" );
|
||||
assertTokEqualOff( expect, real );
|
||||
}
|
||||
|
||||
public void test2to1() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "aa" ) ) );
|
||||
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
|
||||
List<Token> real = getTokens( ts );
|
||||
List<Token> expect = tokens( "a,1,0,2" );
|
||||
assertTokEqualOff( expect, real );
|
||||
}
|
||||
|
||||
public void test3to1() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "bbb" ) ) );
|
||||
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
|
||||
List<Token> real = getTokens( ts );
|
||||
List<Token> expect = tokens( "b,1,0,3" );
|
||||
assertTokEqualOff( expect, real );
|
||||
}
|
||||
|
||||
public void test4to2() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "cccc" ) ) );
|
||||
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
|
||||
List<Token> real = getTokens( ts );
|
||||
List<Token> expect = tokens( "cc,1,0,4" );
|
||||
assertTokEqualOff( expect, real );
|
||||
}
|
||||
|
||||
public void test5to0() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "empty" ) ) );
|
||||
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
|
||||
List<Token> real = getTokens( ts );
|
||||
assertEquals( 0, real.size() );
|
||||
}
|
||||
|
||||
//
|
||||
// 1111111111222
|
||||
// 01234567890123456789012
|
||||
//(in) h i j k ll cccc bbb aa
|
||||
//
|
||||
// 1111111111222
|
||||
// 01234567890123456789012
|
||||
//(out) i i jj kkk llll cc b a
|
||||
//
|
||||
// h, 0, 1 => i, 0, 1
|
||||
// i, 2, 3 => i, 2, 3
|
||||
// j, 4, 5 => jj, 4, 5
|
||||
// k, 6, 7 => kkk, 6, 7
|
||||
// ll, 8,10 => llll, 8,10
|
||||
// cccc,11,15 => cc,11,15
|
||||
// bbb,16,19 => b,16,19
|
||||
// aa,20,22 => a,20,22
|
||||
//
|
||||
public void testTokenStream() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h i j k ll cccc bbb aa" ) ) );
|
||||
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
|
||||
List<Token> real = getTokens( ts );
|
||||
List<Token> expect = tokens( "i,1,0,1 i,1,2,3 jj,1,4,5 kkk,1,6,7 llll,1,8,10 cc,1,11,15 b,1,16,19 a,1,20,22" );
|
||||
assertTokEqualOff( expect, real );
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
// 0123456789
|
||||
//(in) aaaa ll h
|
||||
//(out-1) aa llll i
|
||||
//(out-2) a llllllll i
|
||||
//
|
||||
// aaaa,0,4 => a,0,4
|
||||
// ll,5,7 => llllllll,5,7
|
||||
// h,8,9 => i,8,9
|
||||
public void testChained() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap,
|
||||
new MappingCharFilter( normMap, CharReader.get( new StringReader( "aaaa ll h" ) ) ) );
|
||||
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );
|
||||
List<Token> real = getTokens( ts );
|
||||
List<Token> expect = tokens( "a,1,0,4 llllllll,1,5,7 i,1,8,9" );
|
||||
assertTokEqualOff( expect, real );
|
||||
}
|
||||
}
|
|
@ -23,6 +23,7 @@ import java.util.Random;
|
|||
import java.util.BitSet;
|
||||
|
||||
import org.apache.lucene.util.OpenBitSetIterator;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
/**
|
||||
* @deprecated
|
||||
|
@ -62,7 +63,7 @@ public class TestOpenBitSet extends TestCase {
|
|||
iterator.skipTo(bb+1);
|
||||
bb = iterator.doc();
|
||||
}
|
||||
assertEquals(aa,bb);
|
||||
assertEquals(aa == -1 ? DocIdSetIterator.NO_MORE_DOCS : aa, bb);
|
||||
} while (aa>=0);
|
||||
}
|
||||
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
org.apache.lucene.analysis.Token,
|
||||
org.apache.lucene.analysis.TokenStream,
|
||||
org.apache.lucene.index.Payload,
|
||||
org.apache.solr.analysis.CharReader,
|
||||
org.apache.solr.analysis.CharStream,
|
||||
org.apache.lucene.analysis.CharReader,
|
||||
org.apache.lucene.analysis.CharStream,
|
||||
org.apache.solr.analysis.CharFilterFactory,
|
||||
org.apache.solr.analysis.TokenFilterFactory,
|
||||
org.apache.solr.analysis.TokenizerChain,
|
||||
|
|
Loading…
Reference in New Issue