mirror of https://github.com/apache/lucene.git
Set eol-style to native for all files in src/java and src/test that did not
have this property set before. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@658136 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
90be0daa46
commit
4eb8692588
|
@ -1,72 +1,72 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* This class can be used if the Tokens of a TokenStream
|
||||
* are intended to be consumed more than once. It caches
|
||||
* all Tokens locally in a List.
|
||||
*
|
||||
* CachingTokenFilter implements the optional method
|
||||
* {@link TokenStream#reset()}, which repositions the
|
||||
* stream to the first Token.
|
||||
*
|
||||
*/
|
||||
public class CachingTokenFilter extends TokenFilter {
|
||||
private List cache;
|
||||
private Iterator iterator;
|
||||
|
||||
public CachingTokenFilter(TokenStream input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
public Token next() throws IOException {
|
||||
if (cache == null) {
|
||||
// fill cache lazily
|
||||
cache = new LinkedList();
|
||||
fillCache();
|
||||
iterator = cache.iterator();
|
||||
}
|
||||
|
||||
if (!iterator.hasNext()) {
|
||||
// the cache is exhausted, return null
|
||||
return null;
|
||||
}
|
||||
|
||||
return (Token) iterator.next();
|
||||
}
|
||||
|
||||
public void reset() throws IOException {
|
||||
if(cache != null) {
|
||||
iterator = cache.iterator();
|
||||
}
|
||||
}
|
||||
|
||||
private void fillCache() throws IOException {
|
||||
Token token;
|
||||
while ( (token = input.next()) != null) {
|
||||
cache.add(token);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
package org.apache.lucene.analysis;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* This class can be used if the Tokens of a TokenStream
|
||||
* are intended to be consumed more than once. It caches
|
||||
* all Tokens locally in a List.
|
||||
*
|
||||
* CachingTokenFilter implements the optional method
|
||||
* {@link TokenStream#reset()}, which repositions the
|
||||
* stream to the first Token.
|
||||
*
|
||||
*/
|
||||
public class CachingTokenFilter extends TokenFilter {
|
||||
private List cache;
|
||||
private Iterator iterator;
|
||||
|
||||
public CachingTokenFilter(TokenStream input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
public Token next() throws IOException {
|
||||
if (cache == null) {
|
||||
// fill cache lazily
|
||||
cache = new LinkedList();
|
||||
fillCache();
|
||||
iterator = cache.iterator();
|
||||
}
|
||||
|
||||
if (!iterator.hasNext()) {
|
||||
// the cache is exhausted, return null
|
||||
return null;
|
||||
}
|
||||
|
||||
return (Token) iterator.next();
|
||||
}
|
||||
|
||||
public void reset() throws IOException {
|
||||
if(cache != null) {
|
||||
iterator = cache.iterator();
|
||||
}
|
||||
}
|
||||
|
||||
private void fillCache() throws IOException {
|
||||
Token token;
|
||||
while ( (token = input.next()) != null) {
|
||||
cache.add(token);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,274 +1,274 @@
|
|||
package org.apache.lucene.document;
|
||||
/**
|
||||
* Copyright 2006 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
public abstract class AbstractField implements Fieldable {
|
||||
|
||||
protected String name = "body";
|
||||
protected boolean storeTermVector = false;
|
||||
protected boolean storeOffsetWithTermVector = false;
|
||||
protected boolean storePositionWithTermVector = false;
|
||||
protected boolean omitNorms = false;
|
||||
protected boolean isStored = false;
|
||||
protected boolean isIndexed = true;
|
||||
protected boolean isTokenized = true;
|
||||
protected boolean isBinary = false;
|
||||
protected boolean isCompressed = false;
|
||||
protected boolean lazy = false;
|
||||
protected float boost = 1.0f;
|
||||
// the one and only data object for all different kind of field values
|
||||
protected Object fieldsData = null;
|
||||
|
||||
protected AbstractField()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
|
||||
if (name == null)
|
||||
throw new NullPointerException("name cannot be null");
|
||||
this.name = name.intern(); // field names are interned
|
||||
|
||||
if (store == Field.Store.YES){
|
||||
this.isStored = true;
|
||||
this.isCompressed = false;
|
||||
}
|
||||
else if (store == Field.Store.COMPRESS) {
|
||||
this.isStored = true;
|
||||
this.isCompressed = true;
|
||||
}
|
||||
else if (store == Field.Store.NO){
|
||||
this.isStored = false;
|
||||
this.isCompressed = false;
|
||||
}
|
||||
else
|
||||
throw new IllegalArgumentException("unknown store parameter " + store);
|
||||
|
||||
if (index == Field.Index.NO) {
|
||||
this.isIndexed = false;
|
||||
this.isTokenized = false;
|
||||
} else if (index == Field.Index.TOKENIZED) {
|
||||
this.isIndexed = true;
|
||||
this.isTokenized = true;
|
||||
} else if (index == Field.Index.UN_TOKENIZED) {
|
||||
this.isIndexed = true;
|
||||
this.isTokenized = false;
|
||||
} else if (index == Field.Index.NO_NORMS) {
|
||||
this.isIndexed = true;
|
||||
this.isTokenized = false;
|
||||
this.omitNorms = true;
|
||||
} else {
|
||||
throw new IllegalArgumentException("unknown index parameter " + index);
|
||||
}
|
||||
|
||||
this.isBinary = false;
|
||||
|
||||
setStoreTermVector(termVector);
|
||||
}
|
||||
|
||||
/** Sets the boost factor hits on this field. This value will be
|
||||
* multiplied into the score of all hits on this this field of this
|
||||
* document.
|
||||
*
|
||||
* <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
|
||||
* containing this field. If a document has multiple fields with the same
|
||||
* name, all such values are multiplied together. This product is then
|
||||
* multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and
|
||||
* rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the
|
||||
* index. One should attempt to ensure that this product does not overflow
|
||||
* the range of that encoding.
|
||||
*
|
||||
* @see org.apache.lucene.document.Document#setBoost(float)
|
||||
* @see org.apache.lucene.search.Similarity#lengthNorm(String, int)
|
||||
* @see org.apache.lucene.search.Similarity#encodeNorm(float)
|
||||
*/
|
||||
public void setBoost(float boost) {
|
||||
this.boost = boost;
|
||||
}
|
||||
|
||||
/** Returns the boost factor for hits for this field.
|
||||
*
|
||||
* <p>The default value is 1.0.
|
||||
*
|
||||
* <p>Note: this value is not stored directly with the document in the index.
|
||||
* Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
|
||||
* {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when
|
||||
* this field was indexed.
|
||||
*
|
||||
* @see #setBoost(float)
|
||||
*/
|
||||
public float getBoost() {
|
||||
return boost;
|
||||
}
|
||||
|
||||
/** Returns the name of the field as an interned string.
|
||||
* For example "date", "title", "body", ...
|
||||
*/
|
||||
public String name() { return name; }
|
||||
|
||||
protected void setStoreTermVector(Field.TermVector termVector) {
|
||||
if (termVector == Field.TermVector.NO) {
|
||||
this.storeTermVector = false;
|
||||
this.storePositionWithTermVector = false;
|
||||
this.storeOffsetWithTermVector = false;
|
||||
}
|
||||
else if (termVector == Field.TermVector.YES) {
|
||||
this.storeTermVector = true;
|
||||
this.storePositionWithTermVector = false;
|
||||
this.storeOffsetWithTermVector = false;
|
||||
}
|
||||
else if (termVector == Field.TermVector.WITH_POSITIONS) {
|
||||
this.storeTermVector = true;
|
||||
this.storePositionWithTermVector = true;
|
||||
this.storeOffsetWithTermVector = false;
|
||||
}
|
||||
else if (termVector == Field.TermVector.WITH_OFFSETS) {
|
||||
this.storeTermVector = true;
|
||||
this.storePositionWithTermVector = false;
|
||||
this.storeOffsetWithTermVector = true;
|
||||
}
|
||||
else if (termVector == Field.TermVector.WITH_POSITIONS_OFFSETS) {
|
||||
this.storeTermVector = true;
|
||||
this.storePositionWithTermVector = true;
|
||||
this.storeOffsetWithTermVector = true;
|
||||
}
|
||||
else {
|
||||
throw new IllegalArgumentException("unknown termVector parameter " + termVector);
|
||||
}
|
||||
}
|
||||
|
||||
/** True iff the value of the field is to be stored in the index for return
|
||||
with search hits. It is an error for this to be true if a field is
|
||||
Reader-valued. */
|
||||
public final boolean isStored() { return isStored; }
|
||||
|
||||
/** True iff the value of the field is to be indexed, so that it may be
|
||||
searched on. */
|
||||
public final boolean isIndexed() { return isIndexed; }
|
||||
|
||||
/** True iff the value of the field should be tokenized as text prior to
|
||||
indexing. Un-tokenized fields are indexed as a single word and may not be
|
||||
Reader-valued. */
|
||||
public final boolean isTokenized() { return isTokenized; }
|
||||
|
||||
/** True if the value of the field is stored and compressed within the index */
|
||||
public final boolean isCompressed() { return isCompressed; }
|
||||
|
||||
/** True iff the term or terms used to index this field are stored as a term
|
||||
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
|
||||
* These methods do not provide access to the original content of the field,
|
||||
* only to terms used to index it. If the original content must be
|
||||
* preserved, use the <code>stored</code> attribute instead.
|
||||
*
|
||||
* @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
|
||||
*/
|
||||
public final boolean isTermVectorStored() { return storeTermVector; }
|
||||
|
||||
/**
|
||||
* True iff terms are stored as term vector together with their offsets
|
||||
* (start and end positon in source text).
|
||||
*/
|
||||
public boolean isStoreOffsetWithTermVector(){
|
||||
return storeOffsetWithTermVector;
|
||||
}
|
||||
|
||||
/**
|
||||
* True iff terms are stored as term vector together with their token positions.
|
||||
*/
|
||||
public boolean isStorePositionWithTermVector(){
|
||||
return storePositionWithTermVector;
|
||||
}
|
||||
|
||||
/** True iff the value of the filed is stored as binary */
|
||||
public final boolean isBinary() { return isBinary; }
|
||||
|
||||
/** True if norms are omitted for this indexed field */
|
||||
public boolean getOmitNorms() { return omitNorms; }
|
||||
|
||||
/** Expert:
|
||||
*
|
||||
* If set, omit normalization factors associated with this indexed field.
|
||||
* This effectively disables indexing boosts and length normalization for this field.
|
||||
*/
|
||||
public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; }
|
||||
|
||||
public boolean isLazy() {
|
||||
return lazy;
|
||||
}
|
||||
|
||||
/** Prints a Field for human consumption. */
|
||||
public final String toString() {
|
||||
StringBuffer result = new StringBuffer();
|
||||
if (isStored) {
|
||||
result.append("stored");
|
||||
if (isCompressed)
|
||||
result.append("/compressed");
|
||||
else
|
||||
result.append("/uncompressed");
|
||||
}
|
||||
if (isIndexed) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("indexed");
|
||||
}
|
||||
if (isTokenized) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("tokenized");
|
||||
}
|
||||
if (storeTermVector) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVector");
|
||||
}
|
||||
if (storeOffsetWithTermVector) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVectorOffsets");
|
||||
}
|
||||
if (storePositionWithTermVector) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVectorPosition");
|
||||
}
|
||||
if (isBinary) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("binary");
|
||||
}
|
||||
if (omitNorms) {
|
||||
result.append(",omitNorms");
|
||||
}
|
||||
if (lazy){
|
||||
result.append(",lazy");
|
||||
}
|
||||
result.append('<');
|
||||
result.append(name);
|
||||
result.append(':');
|
||||
|
||||
if (fieldsData != null && lazy == false) {
|
||||
result.append(fieldsData);
|
||||
}
|
||||
|
||||
result.append('>');
|
||||
return result.toString();
|
||||
}
|
||||
}
|
||||
package org.apache.lucene.document;
|
||||
/**
|
||||
* Copyright 2006 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
public abstract class AbstractField implements Fieldable {
|
||||
|
||||
protected String name = "body";
|
||||
protected boolean storeTermVector = false;
|
||||
protected boolean storeOffsetWithTermVector = false;
|
||||
protected boolean storePositionWithTermVector = false;
|
||||
protected boolean omitNorms = false;
|
||||
protected boolean isStored = false;
|
||||
protected boolean isIndexed = true;
|
||||
protected boolean isTokenized = true;
|
||||
protected boolean isBinary = false;
|
||||
protected boolean isCompressed = false;
|
||||
protected boolean lazy = false;
|
||||
protected float boost = 1.0f;
|
||||
// the one and only data object for all different kind of field values
|
||||
protected Object fieldsData = null;
|
||||
|
||||
protected AbstractField()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
|
||||
if (name == null)
|
||||
throw new NullPointerException("name cannot be null");
|
||||
this.name = name.intern(); // field names are interned
|
||||
|
||||
if (store == Field.Store.YES){
|
||||
this.isStored = true;
|
||||
this.isCompressed = false;
|
||||
}
|
||||
else if (store == Field.Store.COMPRESS) {
|
||||
this.isStored = true;
|
||||
this.isCompressed = true;
|
||||
}
|
||||
else if (store == Field.Store.NO){
|
||||
this.isStored = false;
|
||||
this.isCompressed = false;
|
||||
}
|
||||
else
|
||||
throw new IllegalArgumentException("unknown store parameter " + store);
|
||||
|
||||
if (index == Field.Index.NO) {
|
||||
this.isIndexed = false;
|
||||
this.isTokenized = false;
|
||||
} else if (index == Field.Index.TOKENIZED) {
|
||||
this.isIndexed = true;
|
||||
this.isTokenized = true;
|
||||
} else if (index == Field.Index.UN_TOKENIZED) {
|
||||
this.isIndexed = true;
|
||||
this.isTokenized = false;
|
||||
} else if (index == Field.Index.NO_NORMS) {
|
||||
this.isIndexed = true;
|
||||
this.isTokenized = false;
|
||||
this.omitNorms = true;
|
||||
} else {
|
||||
throw new IllegalArgumentException("unknown index parameter " + index);
|
||||
}
|
||||
|
||||
this.isBinary = false;
|
||||
|
||||
setStoreTermVector(termVector);
|
||||
}
|
||||
|
||||
/** Sets the boost factor hits on this field. This value will be
|
||||
* multiplied into the score of all hits on this this field of this
|
||||
* document.
|
||||
*
|
||||
* <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
|
||||
* containing this field. If a document has multiple fields with the same
|
||||
* name, all such values are multiplied together. This product is then
|
||||
* multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and
|
||||
* rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the
|
||||
* index. One should attempt to ensure that this product does not overflow
|
||||
* the range of that encoding.
|
||||
*
|
||||
* @see org.apache.lucene.document.Document#setBoost(float)
|
||||
* @see org.apache.lucene.search.Similarity#lengthNorm(String, int)
|
||||
* @see org.apache.lucene.search.Similarity#encodeNorm(float)
|
||||
*/
|
||||
public void setBoost(float boost) {
|
||||
this.boost = boost;
|
||||
}
|
||||
|
||||
/** Returns the boost factor for hits for this field.
|
||||
*
|
||||
* <p>The default value is 1.0.
|
||||
*
|
||||
* <p>Note: this value is not stored directly with the document in the index.
|
||||
* Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
|
||||
* {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when
|
||||
* this field was indexed.
|
||||
*
|
||||
* @see #setBoost(float)
|
||||
*/
|
||||
public float getBoost() {
|
||||
return boost;
|
||||
}
|
||||
|
||||
/** Returns the name of the field as an interned string.
|
||||
* For example "date", "title", "body", ...
|
||||
*/
|
||||
public String name() { return name; }
|
||||
|
||||
protected void setStoreTermVector(Field.TermVector termVector) {
|
||||
if (termVector == Field.TermVector.NO) {
|
||||
this.storeTermVector = false;
|
||||
this.storePositionWithTermVector = false;
|
||||
this.storeOffsetWithTermVector = false;
|
||||
}
|
||||
else if (termVector == Field.TermVector.YES) {
|
||||
this.storeTermVector = true;
|
||||
this.storePositionWithTermVector = false;
|
||||
this.storeOffsetWithTermVector = false;
|
||||
}
|
||||
else if (termVector == Field.TermVector.WITH_POSITIONS) {
|
||||
this.storeTermVector = true;
|
||||
this.storePositionWithTermVector = true;
|
||||
this.storeOffsetWithTermVector = false;
|
||||
}
|
||||
else if (termVector == Field.TermVector.WITH_OFFSETS) {
|
||||
this.storeTermVector = true;
|
||||
this.storePositionWithTermVector = false;
|
||||
this.storeOffsetWithTermVector = true;
|
||||
}
|
||||
else if (termVector == Field.TermVector.WITH_POSITIONS_OFFSETS) {
|
||||
this.storeTermVector = true;
|
||||
this.storePositionWithTermVector = true;
|
||||
this.storeOffsetWithTermVector = true;
|
||||
}
|
||||
else {
|
||||
throw new IllegalArgumentException("unknown termVector parameter " + termVector);
|
||||
}
|
||||
}
|
||||
|
||||
/** True iff the value of the field is to be stored in the index for return
|
||||
with search hits. It is an error for this to be true if a field is
|
||||
Reader-valued. */
|
||||
public final boolean isStored() { return isStored; }
|
||||
|
||||
/** True iff the value of the field is to be indexed, so that it may be
|
||||
searched on. */
|
||||
public final boolean isIndexed() { return isIndexed; }
|
||||
|
||||
/** True iff the value of the field should be tokenized as text prior to
|
||||
indexing. Un-tokenized fields are indexed as a single word and may not be
|
||||
Reader-valued. */
|
||||
public final boolean isTokenized() { return isTokenized; }
|
||||
|
||||
/** True if the value of the field is stored and compressed within the index */
|
||||
public final boolean isCompressed() { return isCompressed; }
|
||||
|
||||
/** True iff the term or terms used to index this field are stored as a term
|
||||
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
|
||||
* These methods do not provide access to the original content of the field,
|
||||
* only to terms used to index it. If the original content must be
|
||||
* preserved, use the <code>stored</code> attribute instead.
|
||||
*
|
||||
* @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
|
||||
*/
|
||||
public final boolean isTermVectorStored() { return storeTermVector; }
|
||||
|
||||
/**
|
||||
* True iff terms are stored as term vector together with their offsets
|
||||
* (start and end positon in source text).
|
||||
*/
|
||||
public boolean isStoreOffsetWithTermVector(){
|
||||
return storeOffsetWithTermVector;
|
||||
}
|
||||
|
||||
/**
|
||||
* True iff terms are stored as term vector together with their token positions.
|
||||
*/
|
||||
public boolean isStorePositionWithTermVector(){
|
||||
return storePositionWithTermVector;
|
||||
}
|
||||
|
||||
/** True iff the value of the filed is stored as binary */
|
||||
public final boolean isBinary() { return isBinary; }
|
||||
|
||||
/** True if norms are omitted for this indexed field */
|
||||
public boolean getOmitNorms() { return omitNorms; }
|
||||
|
||||
/** Expert:
|
||||
*
|
||||
* If set, omit normalization factors associated with this indexed field.
|
||||
* This effectively disables indexing boosts and length normalization for this field.
|
||||
*/
|
||||
public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; }
|
||||
|
||||
public boolean isLazy() {
|
||||
return lazy;
|
||||
}
|
||||
|
||||
/** Prints a Field for human consumption. */
|
||||
public final String toString() {
|
||||
StringBuffer result = new StringBuffer();
|
||||
if (isStored) {
|
||||
result.append("stored");
|
||||
if (isCompressed)
|
||||
result.append("/compressed");
|
||||
else
|
||||
result.append("/uncompressed");
|
||||
}
|
||||
if (isIndexed) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("indexed");
|
||||
}
|
||||
if (isTokenized) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("tokenized");
|
||||
}
|
||||
if (storeTermVector) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVector");
|
||||
}
|
||||
if (storeOffsetWithTermVector) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVectorOffsets");
|
||||
}
|
||||
if (storePositionWithTermVector) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVectorPosition");
|
||||
}
|
||||
if (isBinary) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("binary");
|
||||
}
|
||||
if (omitNorms) {
|
||||
result.append(",omitNorms");
|
||||
}
|
||||
if (lazy){
|
||||
result.append(",lazy");
|
||||
}
|
||||
result.append('<');
|
||||
result.append(name);
|
||||
result.append(':');
|
||||
|
||||
if (fieldsData != null && lazy == false) {
|
||||
result.append(fieldsData);
|
||||
}
|
||||
|
||||
result.append('>');
|
||||
return result.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,34 +1,34 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
import java.io.Serializable;
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Similar to a {@link java.io.FileFilter}, the FieldSelector allows one to make decisions about
|
||||
* what Fields get loaded on a {@link Document} by {@link org.apache.lucene.index.IndexReader#document(int,org.apache.lucene.document.FieldSelector)}
|
||||
*
|
||||
**/
|
||||
public interface FieldSelector extends Serializable {
|
||||
|
||||
/**
|
||||
*
|
||||
* @param fieldName the field to accept or reject
|
||||
* @return an instance of {@link FieldSelectorResult}
|
||||
* if the {@link Field} named <code>fieldName</code> should be loaded.
|
||||
*/
|
||||
FieldSelectorResult accept(String fieldName);
|
||||
}
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import java.io.Serializable;
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Similar to a {@link java.io.FileFilter}, the FieldSelector allows one to make decisions about
|
||||
* what Fields get loaded on a {@link Document} by {@link org.apache.lucene.index.IndexReader#document(int,org.apache.lucene.document.FieldSelector)}
|
||||
*
|
||||
**/
|
||||
public interface FieldSelector extends Serializable {
|
||||
|
||||
/**
|
||||
*
|
||||
* @param fieldName the field to accept or reject
|
||||
* @return an instance of {@link FieldSelectorResult}
|
||||
* if the {@link Field} named <code>fieldName</code> should be loaded.
|
||||
*/
|
||||
FieldSelectorResult accept(String fieldName);
|
||||
}
|
||||
|
|
|
@ -1,96 +1,96 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
import java.io.Serializable;
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Provides information about what should be done with this Field
|
||||
*
|
||||
**/
|
||||
//Replace with an enumerated type in 1.5
|
||||
public final class FieldSelectorResult implements Serializable {
|
||||
|
||||
/**
|
||||
* Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is encounterd.
|
||||
* {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null.
|
||||
*<p/>
|
||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
*/
|
||||
public transient static final FieldSelectorResult LOAD = new FieldSelectorResult(0);
|
||||
/**
|
||||
* Lazily load this {@link Field}. This means the {@link Field} is valid, but it may not actually contain its data until
|
||||
* invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link Document#getFieldable(String)} is safe to use and should
|
||||
* return a valid instance of a {@link Fieldable}.
|
||||
*<p/>
|
||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
*/
|
||||
public transient static final FieldSelectorResult LAZY_LOAD = new FieldSelectorResult(1);
|
||||
/**
|
||||
* Do not load the {@link Field}. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should return null.
|
||||
* {@link Document#add(Fieldable)} is not called.
|
||||
* <p/>
|
||||
* {@link Document#add(Fieldable)} should not be called by the Reader.
|
||||
*/
|
||||
public transient static final FieldSelectorResult NO_LOAD = new FieldSelectorResult(2);
|
||||
/**
|
||||
* Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading for the {@link Document}. Thus, the
|
||||
* Document may not have its complete set of Fields. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should
|
||||
* both be valid for this {@link Field}
|
||||
* <p/>
|
||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
*/
|
||||
public transient static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3);
|
||||
/**
|
||||
* Behaves much like {@link #LOAD} but does not uncompress any compressed data. This is used for internal purposes.
|
||||
* {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null.
|
||||
* <p/>
|
||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
*/
|
||||
public transient static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);
|
||||
|
||||
/** Expert: Load the size of this {@link Field} rather than its value.
|
||||
* Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.
|
||||
* The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0]
|
||||
*/
|
||||
public transient static final FieldSelectorResult SIZE = new FieldSelectorResult(5);
|
||||
|
||||
/** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e., stop loading further fields, after the size is loaded */
|
||||
public transient static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6);
|
||||
|
||||
|
||||
|
||||
private int id;
|
||||
|
||||
private FieldSelectorResult(int id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
final FieldSelectorResult that = (FieldSelectorResult) o;
|
||||
|
||||
if (id != that.id) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return id;
|
||||
}
|
||||
}
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import java.io.Serializable;
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Provides information about what should be done with this Field
|
||||
*
|
||||
**/
|
||||
//Replace with an enumerated type in 1.5
|
||||
public final class FieldSelectorResult implements Serializable {
|
||||
|
||||
/**
|
||||
* Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is encounterd.
|
||||
* {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null.
|
||||
*<p/>
|
||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
*/
|
||||
public transient static final FieldSelectorResult LOAD = new FieldSelectorResult(0);
|
||||
/**
|
||||
* Lazily load this {@link Field}. This means the {@link Field} is valid, but it may not actually contain its data until
|
||||
* invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link Document#getFieldable(String)} is safe to use and should
|
||||
* return a valid instance of a {@link Fieldable}.
|
||||
*<p/>
|
||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
*/
|
||||
public transient static final FieldSelectorResult LAZY_LOAD = new FieldSelectorResult(1);
|
||||
/**
|
||||
* Do not load the {@link Field}. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should return null.
|
||||
* {@link Document#add(Fieldable)} is not called.
|
||||
* <p/>
|
||||
* {@link Document#add(Fieldable)} should not be called by the Reader.
|
||||
*/
|
||||
public transient static final FieldSelectorResult NO_LOAD = new FieldSelectorResult(2);
|
||||
/**
|
||||
* Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading for the {@link Document}. Thus, the
|
||||
* Document may not have its complete set of Fields. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should
|
||||
* both be valid for this {@link Field}
|
||||
* <p/>
|
||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
*/
|
||||
public transient static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3);
|
||||
/**
|
||||
* Behaves much like {@link #LOAD} but does not uncompress any compressed data. This is used for internal purposes.
|
||||
* {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null.
|
||||
* <p/>
|
||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
*/
|
||||
public transient static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);
|
||||
|
||||
/** Expert: Load the size of this {@link Field} rather than its value.
|
||||
* Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.
|
||||
* The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0]
|
||||
*/
|
||||
public transient static final FieldSelectorResult SIZE = new FieldSelectorResult(5);
|
||||
|
||||
/** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e., stop loading further fields, after the size is loaded */
|
||||
public transient static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6);
|
||||
|
||||
|
||||
|
||||
private int id;
|
||||
|
||||
private FieldSelectorResult(int id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
final FieldSelectorResult that = (FieldSelectorResult) o;
|
||||
|
||||
if (id != that.id) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return id;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,144 +1,144 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/**
|
||||
* Synonymous with {@link Field}.
|
||||
*
|
||||
**/
|
||||
public interface Fieldable extends Serializable {
|
||||
/** Sets the boost factor hits on this field. This value will be
|
||||
* multiplied into the score of all hits on this this field of this
|
||||
* document.
|
||||
*
|
||||
* <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
|
||||
* containing this field. If a document has multiple fields with the same
|
||||
* name, all such values are multiplied together. This product is then
|
||||
* multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and
|
||||
* rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the
|
||||
* index. One should attempt to ensure that this product does not overflow
|
||||
* the range of that encoding.
|
||||
*
|
||||
* @see org.apache.lucene.document.Document#setBoost(float)
|
||||
* @see org.apache.lucene.search.Similarity#lengthNorm(String, int)
|
||||
* @see org.apache.lucene.search.Similarity#encodeNorm(float)
|
||||
*/
|
||||
void setBoost(float boost);
|
||||
|
||||
/** Returns the boost factor for hits for this field.
|
||||
*
|
||||
* <p>The default value is 1.0.
|
||||
*
|
||||
* <p>Note: this value is not stored directly with the document in the index.
|
||||
* Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
|
||||
* {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when
|
||||
* this field was indexed.
|
||||
*
|
||||
* @see #setBoost(float)
|
||||
*/
|
||||
float getBoost();
|
||||
|
||||
/** Returns the name of the field as an interned string.
|
||||
* For example "date", "title", "body", ...
|
||||
*/
|
||||
String name();
|
||||
|
||||
/** The value of the field as a String, or null. If null, the Reader value,
|
||||
* binary value, or TokenStream value is used. Exactly one of stringValue(),
|
||||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
public String stringValue();
|
||||
|
||||
/** The value of the field as a Reader, or null. If null, the String value,
|
||||
* binary value, or TokenStream value is used. Exactly one of stringValue(),
|
||||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
public Reader readerValue();
|
||||
|
||||
/** The value of the field in Binary, or null. If null, the Reader value,
|
||||
* String value, or TokenStream value is used. Exactly one of stringValue(),
|
||||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
public byte[] binaryValue();
|
||||
|
||||
/** The value of the field as a TokenStream, or null. If null, the Reader value,
|
||||
* String value, or binary value is used. Exactly one of stringValue(),
|
||||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
public TokenStream tokenStreamValue();
|
||||
|
||||
/** True iff the value of the field is to be stored in the index for return
|
||||
with search hits. It is an error for this to be true if a field is
|
||||
Reader-valued. */
|
||||
boolean isStored();
|
||||
|
||||
/** True iff the value of the field is to be indexed, so that it may be
|
||||
searched on. */
|
||||
boolean isIndexed();
|
||||
|
||||
/** True iff the value of the field should be tokenized as text prior to
|
||||
indexing. Un-tokenized fields are indexed as a single word and may not be
|
||||
Reader-valued. */
|
||||
boolean isTokenized();
|
||||
|
||||
/** True if the value of the field is stored and compressed within the index */
|
||||
boolean isCompressed();
|
||||
|
||||
/** True iff the term or terms used to index this field are stored as a term
|
||||
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
|
||||
* These methods do not provide access to the original content of the field,
|
||||
* only to terms used to index it. If the original content must be
|
||||
* preserved, use the <code>stored</code> attribute instead.
|
||||
*
|
||||
* @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
|
||||
*/
|
||||
boolean isTermVectorStored();
|
||||
|
||||
/**
|
||||
* True iff terms are stored as term vector together with their offsets
|
||||
* (start and end positon in source text).
|
||||
*/
|
||||
boolean isStoreOffsetWithTermVector();
|
||||
|
||||
/**
|
||||
* True iff terms are stored as term vector together with their token positions.
|
||||
*/
|
||||
boolean isStorePositionWithTermVector();
|
||||
|
||||
/** True iff the value of the filed is stored as binary */
|
||||
boolean isBinary();
|
||||
|
||||
/** True if norms are omitted for this indexed field */
|
||||
boolean getOmitNorms();
|
||||
|
||||
/** Expert:
|
||||
*
|
||||
* If set, omit normalization factors associated with this indexed field.
|
||||
* This effectively disables indexing boosts and length normalization for this field.
|
||||
*/
|
||||
void setOmitNorms(boolean omitNorms);
|
||||
|
||||
/**
|
||||
* Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving
|
||||
* it's values via {@link #stringValue()} or {@link #binaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that
|
||||
* retrieved the {@link Document} is still open.
|
||||
*
|
||||
* @return true if this field can be loaded lazily
|
||||
*/
|
||||
boolean isLazy();
|
||||
}
|
||||
package org.apache.lucene.document;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/**
|
||||
* Synonymous with {@link Field}.
|
||||
*
|
||||
**/
|
||||
public interface Fieldable extends Serializable {
|
||||
/** Sets the boost factor hits on this field. This value will be
|
||||
* multiplied into the score of all hits on this this field of this
|
||||
* document.
|
||||
*
|
||||
* <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
|
||||
* containing this field. If a document has multiple fields with the same
|
||||
* name, all such values are multiplied together. This product is then
|
||||
* multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and
|
||||
* rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the
|
||||
* index. One should attempt to ensure that this product does not overflow
|
||||
* the range of that encoding.
|
||||
*
|
||||
* @see org.apache.lucene.document.Document#setBoost(float)
|
||||
* @see org.apache.lucene.search.Similarity#lengthNorm(String, int)
|
||||
* @see org.apache.lucene.search.Similarity#encodeNorm(float)
|
||||
*/
|
||||
void setBoost(float boost);
|
||||
|
||||
/** Returns the boost factor for hits for this field.
|
||||
*
|
||||
* <p>The default value is 1.0.
|
||||
*
|
||||
* <p>Note: this value is not stored directly with the document in the index.
|
||||
* Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
|
||||
* {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when
|
||||
* this field was indexed.
|
||||
*
|
||||
* @see #setBoost(float)
|
||||
*/
|
||||
float getBoost();
|
||||
|
||||
/** Returns the name of the field as an interned string.
|
||||
* For example "date", "title", "body", ...
|
||||
*/
|
||||
String name();
|
||||
|
||||
/** The value of the field as a String, or null. If null, the Reader value,
|
||||
* binary value, or TokenStream value is used. Exactly one of stringValue(),
|
||||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
public String stringValue();
|
||||
|
||||
/** The value of the field as a Reader, or null. If null, the String value,
|
||||
* binary value, or TokenStream value is used. Exactly one of stringValue(),
|
||||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
public Reader readerValue();
|
||||
|
||||
/** The value of the field in Binary, or null. If null, the Reader value,
|
||||
* String value, or TokenStream value is used. Exactly one of stringValue(),
|
||||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
public byte[] binaryValue();
|
||||
|
||||
/** The value of the field as a TokenStream, or null. If null, the Reader value,
|
||||
* String value, or binary value is used. Exactly one of stringValue(),
|
||||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
public TokenStream tokenStreamValue();
|
||||
|
||||
/** True iff the value of the field is to be stored in the index for return
|
||||
with search hits. It is an error for this to be true if a field is
|
||||
Reader-valued. */
|
||||
boolean isStored();
|
||||
|
||||
/** True iff the value of the field is to be indexed, so that it may be
|
||||
searched on. */
|
||||
boolean isIndexed();
|
||||
|
||||
/** True iff the value of the field should be tokenized as text prior to
|
||||
indexing. Un-tokenized fields are indexed as a single word and may not be
|
||||
Reader-valued. */
|
||||
boolean isTokenized();
|
||||
|
||||
/** True if the value of the field is stored and compressed within the index */
|
||||
boolean isCompressed();
|
||||
|
||||
/** True iff the term or terms used to index this field are stored as a term
|
||||
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
|
||||
* These methods do not provide access to the original content of the field,
|
||||
* only to terms used to index it. If the original content must be
|
||||
* preserved, use the <code>stored</code> attribute instead.
|
||||
*
|
||||
* @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
|
||||
*/
|
||||
boolean isTermVectorStored();
|
||||
|
||||
/**
|
||||
* True iff terms are stored as term vector together with their offsets
|
||||
* (start and end positon in source text).
|
||||
*/
|
||||
boolean isStoreOffsetWithTermVector();
|
||||
|
||||
/**
|
||||
* True iff terms are stored as term vector together with their token positions.
|
||||
*/
|
||||
boolean isStorePositionWithTermVector();
|
||||
|
||||
/** True iff the value of the filed is stored as binary */
|
||||
boolean isBinary();
|
||||
|
||||
/** True if norms are omitted for this indexed field */
|
||||
boolean getOmitNorms();
|
||||
|
||||
/** Expert:
|
||||
*
|
||||
* If set, omit normalization factors associated with this indexed field.
|
||||
* This effectively disables indexing boosts and length normalization for this field.
|
||||
*/
|
||||
void setOmitNorms(boolean omitNorms);
|
||||
|
||||
/**
|
||||
* Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving
|
||||
* it's values via {@link #stringValue()} or {@link #binaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that
|
||||
* retrieved the {@link Document} is still open.
|
||||
*
|
||||
* @return true if this field can be loaded lazily
|
||||
*/
|
||||
boolean isLazy();
|
||||
}
|
||||
|
|
|
@ -1,29 +1,29 @@
|
|||
package org.apache.lucene.document;
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Load the First field and break.
|
||||
* <p/>
|
||||
* See {@link FieldSelectorResult#LOAD_AND_BREAK}
|
||||
*/
|
||||
public class LoadFirstFieldSelector implements FieldSelector {
|
||||
|
||||
public FieldSelectorResult accept(String fieldName) {
|
||||
return FieldSelectorResult.LOAD_AND_BREAK;
|
||||
}
|
||||
package org.apache.lucene.document;
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Load the First field and break.
|
||||
* <p/>
|
||||
* See {@link FieldSelectorResult#LOAD_AND_BREAK}
|
||||
*/
|
||||
public class LoadFirstFieldSelector implements FieldSelector {
|
||||
|
||||
public FieldSelectorResult accept(String fieldName) {
|
||||
return FieldSelectorResult.LOAD_AND_BREAK;
|
||||
}
|
||||
}
|
|
@ -1,60 +1,60 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
import java.util.Set;
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Declare what fields to load normally and what fields to load lazily
|
||||
*
|
||||
**/
|
||||
public class SetBasedFieldSelector implements FieldSelector {
|
||||
|
||||
private Set fieldsToLoad;
|
||||
private Set lazyFieldsToLoad;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Pass in the Set of {@link Field} names to load and the Set of {@link Field} names to load lazily. If both are null, the
|
||||
* Document will not have any {@link Field} on it.
|
||||
* @param fieldsToLoad A Set of {@link String} field names to load. May be empty, but not null
|
||||
* @param lazyFieldsToLoad A Set of {@link String} field names to load lazily. May be empty, but not null
|
||||
*/
|
||||
public SetBasedFieldSelector(Set fieldsToLoad, Set lazyFieldsToLoad) {
|
||||
this.fieldsToLoad = fieldsToLoad;
|
||||
this.lazyFieldsToLoad = lazyFieldsToLoad;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicate whether to load the field with the given name or not. If the {@link Field#name()} is not in either of the
|
||||
* initializing Sets, then {@link org.apache.lucene.document.FieldSelectorResult#NO_LOAD} is returned. If a Field name
|
||||
* is in both <code>fieldsToLoad</code> and <code>lazyFieldsToLoad</code>, lazy has precedence.
|
||||
*
|
||||
* @param fieldName The {@link Field} name to check
|
||||
* @return The {@link FieldSelectorResult}
|
||||
*/
|
||||
public FieldSelectorResult accept(String fieldName) {
|
||||
FieldSelectorResult result = FieldSelectorResult.NO_LOAD;
|
||||
if (fieldsToLoad.contains(fieldName) == true){
|
||||
result = FieldSelectorResult.LOAD;
|
||||
}
|
||||
if (lazyFieldsToLoad.contains(fieldName) == true){
|
||||
result = FieldSelectorResult.LAZY_LOAD;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import java.util.Set;
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Declare what fields to load normally and what fields to load lazily
|
||||
*
|
||||
**/
|
||||
public class SetBasedFieldSelector implements FieldSelector {
|
||||
|
||||
private Set fieldsToLoad;
|
||||
private Set lazyFieldsToLoad;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Pass in the Set of {@link Field} names to load and the Set of {@link Field} names to load lazily. If both are null, the
|
||||
* Document will not have any {@link Field} on it.
|
||||
* @param fieldsToLoad A Set of {@link String} field names to load. May be empty, but not null
|
||||
* @param lazyFieldsToLoad A Set of {@link String} field names to load lazily. May be empty, but not null
|
||||
*/
|
||||
public SetBasedFieldSelector(Set fieldsToLoad, Set lazyFieldsToLoad) {
|
||||
this.fieldsToLoad = fieldsToLoad;
|
||||
this.lazyFieldsToLoad = lazyFieldsToLoad;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicate whether to load the field with the given name or not. If the {@link Field#name()} is not in either of the
|
||||
* initializing Sets, then {@link org.apache.lucene.document.FieldSelectorResult#NO_LOAD} is returned. If a Field name
|
||||
* is in both <code>fieldsToLoad</code> and <code>lazyFieldsToLoad</code>, lazy has precedence.
|
||||
*
|
||||
* @param fieldName The {@link Field} name to check
|
||||
* @return The {@link FieldSelectorResult}
|
||||
*/
|
||||
public FieldSelectorResult accept(String fieldName) {
|
||||
FieldSelectorResult result = FieldSelectorResult.NO_LOAD;
|
||||
if (fieldsToLoad.contains(fieldName) == true){
|
||||
result = FieldSelectorResult.LOAD;
|
||||
}
|
||||
if (lazyFieldsToLoad.contains(fieldName) == true){
|
||||
result = FieldSelectorResult.LAZY_LOAD;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -1,114 +1,114 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/**
|
||||
* Implements the skip list reader for the default posting list format
|
||||
* that stores positions and payloads.
|
||||
*
|
||||
*/
|
||||
class DefaultSkipListReader extends MultiLevelSkipListReader {
|
||||
private boolean currentFieldStoresPayloads;
|
||||
private long freqPointer[];
|
||||
private long proxPointer[];
|
||||
private int payloadLength[];
|
||||
|
||||
private long lastFreqPointer;
|
||||
private long lastProxPointer;
|
||||
private int lastPayloadLength;
|
||||
|
||||
|
||||
DefaultSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) {
|
||||
super(skipStream, maxSkipLevels, skipInterval);
|
||||
freqPointer = new long[maxSkipLevels];
|
||||
proxPointer = new long[maxSkipLevels];
|
||||
payloadLength = new int[maxSkipLevels];
|
||||
}
|
||||
|
||||
void init(long skipPointer, long freqBasePointer, long proxBasePointer, int df, boolean storesPayloads) {
|
||||
super.init(skipPointer, df);
|
||||
this.currentFieldStoresPayloads = storesPayloads;
|
||||
lastFreqPointer = freqBasePointer;
|
||||
lastProxPointer = proxBasePointer;
|
||||
|
||||
Arrays.fill(freqPointer, freqBasePointer);
|
||||
Arrays.fill(proxPointer, proxBasePointer);
|
||||
Arrays.fill(payloadLength, 0);
|
||||
}
|
||||
|
||||
/** Returns the freq pointer of the doc to which the last call of
|
||||
* {@link MultiLevelSkipListReader#skipTo(int)} has skipped. */
|
||||
long getFreqPointer() {
|
||||
return lastFreqPointer;
|
||||
}
|
||||
|
||||
/** Returns the prox pointer of the doc to which the last call of
|
||||
* {@link MultiLevelSkipListReader#skipTo(int)} has skipped. */
|
||||
long getProxPointer() {
|
||||
return lastProxPointer;
|
||||
}
|
||||
|
||||
/** Returns the payload length of the payload stored just before
|
||||
* the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)}
|
||||
* has skipped. */
|
||||
int getPayloadLength() {
|
||||
return lastPayloadLength;
|
||||
}
|
||||
|
||||
protected void seekChild(int level) throws IOException {
|
||||
super.seekChild(level);
|
||||
freqPointer[level] = lastFreqPointer;
|
||||
proxPointer[level] = lastProxPointer;
|
||||
payloadLength[level] = lastPayloadLength;
|
||||
}
|
||||
|
||||
protected void setLastSkipData(int level) {
|
||||
super.setLastSkipData(level);
|
||||
lastFreqPointer = freqPointer[level];
|
||||
lastProxPointer = proxPointer[level];
|
||||
lastPayloadLength = payloadLength[level];
|
||||
}
|
||||
|
||||
|
||||
protected int readSkipData(int level, IndexInput skipStream) throws IOException {
|
||||
int delta;
|
||||
if (currentFieldStoresPayloads) {
|
||||
// the current field stores payloads.
|
||||
// if the doc delta is odd then we have
|
||||
// to read the current payload length
|
||||
// because it differs from the length of the
|
||||
// previous payload
|
||||
delta = skipStream.readVInt();
|
||||
if ((delta & 1) != 0) {
|
||||
payloadLength[level] = skipStream.readVInt();
|
||||
}
|
||||
delta >>>= 1;
|
||||
} else {
|
||||
delta = skipStream.readVInt();
|
||||
}
|
||||
freqPointer[level] += skipStream.readVInt();
|
||||
proxPointer[level] += skipStream.readVInt();
|
||||
|
||||
return delta;
|
||||
}
|
||||
}
|
||||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/**
|
||||
* Implements the skip list reader for the default posting list format
|
||||
* that stores positions and payloads.
|
||||
*
|
||||
*/
|
||||
class DefaultSkipListReader extends MultiLevelSkipListReader {
|
||||
private boolean currentFieldStoresPayloads;
|
||||
private long freqPointer[];
|
||||
private long proxPointer[];
|
||||
private int payloadLength[];
|
||||
|
||||
private long lastFreqPointer;
|
||||
private long lastProxPointer;
|
||||
private int lastPayloadLength;
|
||||
|
||||
|
||||
DefaultSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) {
|
||||
super(skipStream, maxSkipLevels, skipInterval);
|
||||
freqPointer = new long[maxSkipLevels];
|
||||
proxPointer = new long[maxSkipLevels];
|
||||
payloadLength = new int[maxSkipLevels];
|
||||
}
|
||||
|
||||
void init(long skipPointer, long freqBasePointer, long proxBasePointer, int df, boolean storesPayloads) {
|
||||
super.init(skipPointer, df);
|
||||
this.currentFieldStoresPayloads = storesPayloads;
|
||||
lastFreqPointer = freqBasePointer;
|
||||
lastProxPointer = proxBasePointer;
|
||||
|
||||
Arrays.fill(freqPointer, freqBasePointer);
|
||||
Arrays.fill(proxPointer, proxBasePointer);
|
||||
Arrays.fill(payloadLength, 0);
|
||||
}
|
||||
|
||||
/** Returns the freq pointer of the doc to which the last call of
|
||||
* {@link MultiLevelSkipListReader#skipTo(int)} has skipped. */
|
||||
long getFreqPointer() {
|
||||
return lastFreqPointer;
|
||||
}
|
||||
|
||||
/** Returns the prox pointer of the doc to which the last call of
|
||||
* {@link MultiLevelSkipListReader#skipTo(int)} has skipped. */
|
||||
long getProxPointer() {
|
||||
return lastProxPointer;
|
||||
}
|
||||
|
||||
/** Returns the payload length of the payload stored just before
|
||||
* the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)}
|
||||
* has skipped. */
|
||||
int getPayloadLength() {
|
||||
return lastPayloadLength;
|
||||
}
|
||||
|
||||
protected void seekChild(int level) throws IOException {
|
||||
super.seekChild(level);
|
||||
freqPointer[level] = lastFreqPointer;
|
||||
proxPointer[level] = lastProxPointer;
|
||||
payloadLength[level] = lastPayloadLength;
|
||||
}
|
||||
|
||||
protected void setLastSkipData(int level) {
|
||||
super.setLastSkipData(level);
|
||||
lastFreqPointer = freqPointer[level];
|
||||
lastProxPointer = proxPointer[level];
|
||||
lastPayloadLength = payloadLength[level];
|
||||
}
|
||||
|
||||
|
||||
protected int readSkipData(int level, IndexInput skipStream) throws IOException {
|
||||
int delta;
|
||||
if (currentFieldStoresPayloads) {
|
||||
// the current field stores payloads.
|
||||
// if the doc delta is odd then we have
|
||||
// to read the current payload length
|
||||
// because it differs from the length of the
|
||||
// previous payload
|
||||
delta = skipStream.readVInt();
|
||||
if ((delta & 1) != 0) {
|
||||
payloadLength[level] = skipStream.readVInt();
|
||||
}
|
||||
delta >>>= 1;
|
||||
} else {
|
||||
delta = skipStream.readVInt();
|
||||
}
|
||||
freqPointer[level] += skipStream.readVInt();
|
||||
proxPointer[level] += skipStream.readVInt();
|
||||
|
||||
return delta;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,124 +1,124 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
|
||||
/**
|
||||
* Implements the skip list writer for the default posting list format
|
||||
* that stores positions and payloads.
|
||||
*
|
||||
*/
|
||||
class DefaultSkipListWriter extends MultiLevelSkipListWriter {
|
||||
private int[] lastSkipDoc;
|
||||
private int[] lastSkipPayloadLength;
|
||||
private long[] lastSkipFreqPointer;
|
||||
private long[] lastSkipProxPointer;
|
||||
|
||||
private IndexOutput freqOutput;
|
||||
private IndexOutput proxOutput;
|
||||
|
||||
private int curDoc;
|
||||
private boolean curStorePayloads;
|
||||
private int curPayloadLength;
|
||||
private long curFreqPointer;
|
||||
private long curProxPointer;
|
||||
|
||||
DefaultSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput) {
|
||||
super(skipInterval, numberOfSkipLevels, docCount);
|
||||
this.freqOutput = freqOutput;
|
||||
this.proxOutput = proxOutput;
|
||||
|
||||
lastSkipDoc = new int[numberOfSkipLevels];
|
||||
lastSkipPayloadLength = new int[numberOfSkipLevels];
|
||||
lastSkipFreqPointer = new long[numberOfSkipLevels];
|
||||
lastSkipProxPointer = new long[numberOfSkipLevels];
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the values for the current skip data.
|
||||
*/
|
||||
void setSkipData(int doc, boolean storePayloads, int payloadLength) {
|
||||
this.curDoc = doc;
|
||||
this.curStorePayloads = storePayloads;
|
||||
this.curPayloadLength = payloadLength;
|
||||
this.curFreqPointer = freqOutput.getFilePointer();
|
||||
this.curProxPointer = proxOutput.getFilePointer();
|
||||
}
|
||||
|
||||
protected void resetSkip() {
|
||||
super.resetSkip();
|
||||
Arrays.fill(lastSkipDoc, 0);
|
||||
Arrays.fill(lastSkipPayloadLength, -1); // we don't have to write the first length in the skip list
|
||||
Arrays.fill(lastSkipFreqPointer, freqOutput.getFilePointer());
|
||||
Arrays.fill(lastSkipProxPointer, proxOutput.getFilePointer());
|
||||
}
|
||||
|
||||
protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException {
|
||||
// To efficiently store payloads in the posting lists we do not store the length of
|
||||
// every payload. Instead we omit the length for a payload if the previous payload had
|
||||
// the same length.
|
||||
// However, in order to support skipping the payload length at every skip point must be known.
|
||||
// So we use the same length encoding that we use for the posting lists for the skip data as well:
|
||||
// Case 1: current field does not store payloads
|
||||
// SkipDatum --> DocSkip, FreqSkip, ProxSkip
|
||||
// DocSkip,FreqSkip,ProxSkip --> VInt
|
||||
// DocSkip records the document number before every SkipInterval th document in TermFreqs.
|
||||
// Document numbers are represented as differences from the previous value in the sequence.
|
||||
// Case 2: current field stores payloads
|
||||
// SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip
|
||||
// DocSkip,FreqSkip,ProxSkip --> VInt
|
||||
// PayloadLength --> VInt
|
||||
// In this case DocSkip/2 is the difference between
|
||||
// the current and the previous value. If DocSkip
|
||||
// is odd, then a PayloadLength encoded as VInt follows,
|
||||
// if DocSkip is even, then it is assumed that the
|
||||
// current payload length equals the length at the previous
|
||||
// skip point
|
||||
if (curStorePayloads) {
|
||||
int delta = curDoc - lastSkipDoc[level];
|
||||
if (curPayloadLength == lastSkipPayloadLength[level]) {
|
||||
// the current payload length equals the length at the previous skip point,
|
||||
// so we don't store the length again
|
||||
skipBuffer.writeVInt(delta * 2);
|
||||
} else {
|
||||
// the payload length is different from the previous one. We shift the DocSkip,
|
||||
// set the lowest bit and store the current payload length as VInt.
|
||||
skipBuffer.writeVInt(delta * 2 + 1);
|
||||
skipBuffer.writeVInt(curPayloadLength);
|
||||
lastSkipPayloadLength[level] = curPayloadLength;
|
||||
}
|
||||
} else {
|
||||
// current field does not store payloads
|
||||
skipBuffer.writeVInt(curDoc - lastSkipDoc[level]);
|
||||
}
|
||||
skipBuffer.writeVInt((int) (curFreqPointer - lastSkipFreqPointer[level]));
|
||||
skipBuffer.writeVInt((int) (curProxPointer - lastSkipProxPointer[level]));
|
||||
|
||||
lastSkipDoc[level] = curDoc;
|
||||
//System.out.println("write doc at level " + level + ": " + curDoc);
|
||||
|
||||
lastSkipFreqPointer[level] = curFreqPointer;
|
||||
lastSkipProxPointer[level] = curProxPointer;
|
||||
}
|
||||
|
||||
}
|
||||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
|
||||
/**
|
||||
* Implements the skip list writer for the default posting list format
|
||||
* that stores positions and payloads.
|
||||
*
|
||||
*/
|
||||
class DefaultSkipListWriter extends MultiLevelSkipListWriter {
|
||||
private int[] lastSkipDoc;
|
||||
private int[] lastSkipPayloadLength;
|
||||
private long[] lastSkipFreqPointer;
|
||||
private long[] lastSkipProxPointer;
|
||||
|
||||
private IndexOutput freqOutput;
|
||||
private IndexOutput proxOutput;
|
||||
|
||||
private int curDoc;
|
||||
private boolean curStorePayloads;
|
||||
private int curPayloadLength;
|
||||
private long curFreqPointer;
|
||||
private long curProxPointer;
|
||||
|
||||
DefaultSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput) {
|
||||
super(skipInterval, numberOfSkipLevels, docCount);
|
||||
this.freqOutput = freqOutput;
|
||||
this.proxOutput = proxOutput;
|
||||
|
||||
lastSkipDoc = new int[numberOfSkipLevels];
|
||||
lastSkipPayloadLength = new int[numberOfSkipLevels];
|
||||
lastSkipFreqPointer = new long[numberOfSkipLevels];
|
||||
lastSkipProxPointer = new long[numberOfSkipLevels];
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the values for the current skip data.
|
||||
*/
|
||||
void setSkipData(int doc, boolean storePayloads, int payloadLength) {
|
||||
this.curDoc = doc;
|
||||
this.curStorePayloads = storePayloads;
|
||||
this.curPayloadLength = payloadLength;
|
||||
this.curFreqPointer = freqOutput.getFilePointer();
|
||||
this.curProxPointer = proxOutput.getFilePointer();
|
||||
}
|
||||
|
||||
protected void resetSkip() {
|
||||
super.resetSkip();
|
||||
Arrays.fill(lastSkipDoc, 0);
|
||||
Arrays.fill(lastSkipPayloadLength, -1); // we don't have to write the first length in the skip list
|
||||
Arrays.fill(lastSkipFreqPointer, freqOutput.getFilePointer());
|
||||
Arrays.fill(lastSkipProxPointer, proxOutput.getFilePointer());
|
||||
}
|
||||
|
||||
protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException {
|
||||
// To efficiently store payloads in the posting lists we do not store the length of
|
||||
// every payload. Instead we omit the length for a payload if the previous payload had
|
||||
// the same length.
|
||||
// However, in order to support skipping the payload length at every skip point must be known.
|
||||
// So we use the same length encoding that we use for the posting lists for the skip data as well:
|
||||
// Case 1: current field does not store payloads
|
||||
// SkipDatum --> DocSkip, FreqSkip, ProxSkip
|
||||
// DocSkip,FreqSkip,ProxSkip --> VInt
|
||||
// DocSkip records the document number before every SkipInterval th document in TermFreqs.
|
||||
// Document numbers are represented as differences from the previous value in the sequence.
|
||||
// Case 2: current field stores payloads
|
||||
// SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip
|
||||
// DocSkip,FreqSkip,ProxSkip --> VInt
|
||||
// PayloadLength --> VInt
|
||||
// In this case DocSkip/2 is the difference between
|
||||
// the current and the previous value. If DocSkip
|
||||
// is odd, then a PayloadLength encoded as VInt follows,
|
||||
// if DocSkip is even, then it is assumed that the
|
||||
// current payload length equals the length at the previous
|
||||
// skip point
|
||||
if (curStorePayloads) {
|
||||
int delta = curDoc - lastSkipDoc[level];
|
||||
if (curPayloadLength == lastSkipPayloadLength[level]) {
|
||||
// the current payload length equals the length at the previous skip point,
|
||||
// so we don't store the length again
|
||||
skipBuffer.writeVInt(delta * 2);
|
||||
} else {
|
||||
// the payload length is different from the previous one. We shift the DocSkip,
|
||||
// set the lowest bit and store the current payload length as VInt.
|
||||
skipBuffer.writeVInt(delta * 2 + 1);
|
||||
skipBuffer.writeVInt(curPayloadLength);
|
||||
lastSkipPayloadLength[level] = curPayloadLength;
|
||||
}
|
||||
} else {
|
||||
// current field does not store payloads
|
||||
skipBuffer.writeVInt(curDoc - lastSkipDoc[level]);
|
||||
}
|
||||
skipBuffer.writeVInt((int) (curFreqPointer - lastSkipFreqPointer[level]));
|
||||
skipBuffer.writeVInt((int) (curProxPointer - lastSkipProxPointer[level]));
|
||||
|
||||
lastSkipDoc[level] = curDoc;
|
||||
//System.out.println("write doc at level " + level + ": " + curDoc);
|
||||
|
||||
lastSkipFreqPointer[level] = curFreqPointer;
|
||||
lastSkipProxPointer[level] = curProxPointer;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,79 +1,79 @@
|
|||
package org.apache.lucene.index;
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
public class FieldReaderException extends RuntimeException{
|
||||
/**
|
||||
* Constructs a new runtime exception with <code>null</code> as its
|
||||
* detail message. The cause is not initialized, and may subsequently be
|
||||
* initialized by a call to {@link #initCause}.
|
||||
*/
|
||||
public FieldReaderException() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new runtime exception with the specified cause and a
|
||||
* detail message of <tt>(cause==null ? null : cause.toString())</tt>
|
||||
* (which typically contains the class and detail message of
|
||||
* <tt>cause</tt>).
|
||||
* <p>
|
||||
* This constructor is useful for runtime exceptions
|
||||
* that are little more than wrappers for other throwables.
|
||||
*
|
||||
* @param cause the cause (which is saved for later retrieval by the
|
||||
* {@link #getCause()} method). (A <tt>null</tt> value is
|
||||
* permitted, and indicates that the cause is nonexistent or
|
||||
* unknown.)
|
||||
* @since 1.4
|
||||
*/
|
||||
public FieldReaderException(Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new runtime exception with the specified detail message.
|
||||
* The cause is not initialized, and may subsequently be initialized by a
|
||||
* call to {@link #initCause}.
|
||||
*
|
||||
* @param message the detail message. The detail message is saved for
|
||||
* later retrieval by the {@link #getMessage()} method.
|
||||
*/
|
||||
public FieldReaderException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new runtime exception with the specified detail message and
|
||||
* cause. <p>Note that the detail message associated with
|
||||
* <code>cause</code> is <i>not</i> automatically incorporated in
|
||||
* this runtime exception's detail message.
|
||||
*
|
||||
* @param message the detail message (which is saved for later retrieval
|
||||
* by the {@link #getMessage()} method).
|
||||
* @param cause the cause (which is saved for later retrieval by the
|
||||
* {@link #getCause()} method). (A <tt>null</tt> value is
|
||||
* permitted, and indicates that the cause is nonexistent or
|
||||
* unknown.)
|
||||
* @since 1.4
|
||||
*/
|
||||
public FieldReaderException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
}
|
||||
package org.apache.lucene.index;
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
public class FieldReaderException extends RuntimeException{
|
||||
/**
|
||||
* Constructs a new runtime exception with <code>null</code> as its
|
||||
* detail message. The cause is not initialized, and may subsequently be
|
||||
* initialized by a call to {@link #initCause}.
|
||||
*/
|
||||
public FieldReaderException() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new runtime exception with the specified cause and a
|
||||
* detail message of <tt>(cause==null ? null : cause.toString())</tt>
|
||||
* (which typically contains the class and detail message of
|
||||
* <tt>cause</tt>).
|
||||
* <p>
|
||||
* This constructor is useful for runtime exceptions
|
||||
* that are little more than wrappers for other throwables.
|
||||
*
|
||||
* @param cause the cause (which is saved for later retrieval by the
|
||||
* {@link #getCause()} method). (A <tt>null</tt> value is
|
||||
* permitted, and indicates that the cause is nonexistent or
|
||||
* unknown.)
|
||||
* @since 1.4
|
||||
*/
|
||||
public FieldReaderException(Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new runtime exception with the specified detail message.
|
||||
* The cause is not initialized, and may subsequently be initialized by a
|
||||
* call to {@link #initCause}.
|
||||
*
|
||||
* @param message the detail message. The detail message is saved for
|
||||
* later retrieval by the {@link #getMessage()} method.
|
||||
*/
|
||||
public FieldReaderException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new runtime exception with the specified detail message and
|
||||
* cause. <p>Note that the detail message associated with
|
||||
* <code>cause</code> is <i>not</i> automatically incorporated in
|
||||
* this runtime exception's detail message.
|
||||
*
|
||||
* @param message the detail message (which is saved for later retrieval
|
||||
* by the {@link #getMessage()} method).
|
||||
* @param cause the cause (which is saved for later retrieval by the
|
||||
* {@link #getCause()} method). (A <tt>null</tt> value is
|
||||
* permitted, and indicates that the cause is nonexistent or
|
||||
* unknown.)
|
||||
* @since 1.4
|
||||
*/
|
||||
public FieldReaderException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,273 +1,273 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.store.BufferedIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/**
|
||||
* This abstract class reads skip lists with multiple levels.
|
||||
*
|
||||
* See {@link MultiLevelSkipListWriter} for the information about the encoding
|
||||
* of the multi level skip lists.
|
||||
*
|
||||
* Subclasses must implement the abstract method {@link #readSkipData(int, IndexInput)}
|
||||
* which defines the actual format of the skip data.
|
||||
*/
|
||||
abstract class MultiLevelSkipListReader {
|
||||
// the maximum number of skip levels possible for this index
|
||||
private int maxNumberOfSkipLevels;
|
||||
|
||||
// number of levels in this skip list
|
||||
private int numberOfSkipLevels;
|
||||
|
||||
// Expert: defines the number of top skip levels to buffer in memory.
|
||||
// Reducing this number results in less memory usage, but possibly
|
||||
// slower performance due to more random I/Os.
|
||||
// Please notice that the space each level occupies is limited by
|
||||
// the skipInterval. The top level can not contain more than
|
||||
// skipLevel entries, the second top level can not contain more
|
||||
// than skipLevel^2 entries and so forth.
|
||||
private int numberOfLevelsToBuffer = 1;
|
||||
|
||||
private int docCount;
|
||||
private boolean haveSkipped;
|
||||
|
||||
private IndexInput[] skipStream; // skipStream for each level
|
||||
private long skipPointer[]; // the start pointer of each skip level
|
||||
private int skipInterval[]; // skipInterval of each level
|
||||
private int[] numSkipped; // number of docs skipped per level
|
||||
|
||||
private int[] skipDoc; // doc id of current skip entry per level
|
||||
private int lastDoc; // doc id of last read skip entry with docId <= target
|
||||
private long[] childPointer; // child pointer of current skip entry per level
|
||||
private long lastChildPointer; // childPointer of last read skip entry with docId <= target
|
||||
|
||||
private boolean inputIsBuffered;
|
||||
|
||||
public MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) {
|
||||
this.skipStream = new IndexInput[maxSkipLevels];
|
||||
this.skipPointer = new long[maxSkipLevels];
|
||||
this.childPointer = new long[maxSkipLevels];
|
||||
this.numSkipped = new int[maxSkipLevels];
|
||||
this.maxNumberOfSkipLevels = maxSkipLevels;
|
||||
this.skipInterval = new int[maxSkipLevels];
|
||||
this.skipStream [0]= skipStream;
|
||||
this.inputIsBuffered = (skipStream instanceof BufferedIndexInput);
|
||||
this.skipInterval[0] = skipInterval;
|
||||
for (int i = 1; i < maxSkipLevels; i++) {
|
||||
// cache skip intervals
|
||||
this.skipInterval[i] = this.skipInterval[i - 1] * skipInterval;
|
||||
}
|
||||
skipDoc = new int[maxSkipLevels];
|
||||
}
|
||||
|
||||
|
||||
/** Returns the id of the doc to which the last call of {@link #skipTo(int)}
|
||||
* has skipped. */
|
||||
int getDoc() {
|
||||
return lastDoc;
|
||||
}
|
||||
|
||||
|
||||
/** Skips entries to the first beyond the current whose document number is
|
||||
* greater than or equal to <i>target</i>. Returns the current doc count.
|
||||
*/
|
||||
int skipTo(int target) throws IOException {
|
||||
if (!haveSkipped) {
|
||||
// first time, load skip levels
|
||||
loadSkipLevels();
|
||||
haveSkipped = true;
|
||||
}
|
||||
|
||||
// walk up the levels until highest level is found that has a skip
|
||||
// for this target
|
||||
int level = 0;
|
||||
while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
|
||||
level++;
|
||||
}
|
||||
|
||||
while (level >= 0) {
|
||||
if (target > skipDoc[level]) {
|
||||
if (!loadNextSkip(level)) {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// no more skips on this level, go down one level
|
||||
if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
|
||||
seekChild(level - 1);
|
||||
}
|
||||
level--;
|
||||
}
|
||||
}
|
||||
|
||||
return numSkipped[0] - skipInterval[0] - 1;
|
||||
}
|
||||
|
||||
private boolean loadNextSkip(int level) throws IOException {
|
||||
// we have to skip, the target document is greater than the current
|
||||
// skip list entry
|
||||
setLastSkipData(level);
|
||||
|
||||
numSkipped[level] += skipInterval[level];
|
||||
|
||||
if (numSkipped[level] > docCount) {
|
||||
// this skip list is exhausted
|
||||
skipDoc[level] = Integer.MAX_VALUE;
|
||||
if (numberOfSkipLevels > level) numberOfSkipLevels = level;
|
||||
return false;
|
||||
}
|
||||
|
||||
// read next skip entry
|
||||
skipDoc[level] += readSkipData(level, skipStream[level]);
|
||||
|
||||
if (level != 0) {
|
||||
// read the child pointer if we are not on the leaf level
|
||||
childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
/** Seeks the skip entry on the given level */
|
||||
protected void seekChild(int level) throws IOException {
|
||||
skipStream[level].seek(lastChildPointer);
|
||||
numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1];
|
||||
skipDoc[level] = lastDoc;
|
||||
if (level > 0) {
|
||||
childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
|
||||
}
|
||||
}
|
||||
|
||||
void close() throws IOException {
|
||||
for (int i = 1; i < skipStream.length; i++) {
|
||||
if (skipStream[i] != null) {
|
||||
skipStream[i].close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** initializes the reader */
|
||||
void init(long skipPointer, int df) {
|
||||
this.skipPointer[0] = skipPointer;
|
||||
this.docCount = df;
|
||||
Arrays.fill(skipDoc, 0);
|
||||
Arrays.fill(numSkipped, 0);
|
||||
Arrays.fill(childPointer, 0);
|
||||
|
||||
haveSkipped = false;
|
||||
for (int i = 1; i < numberOfSkipLevels; i++) {
|
||||
skipStream[i] = null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Loads the skip levels */
|
||||
private void loadSkipLevels() throws IOException {
|
||||
numberOfSkipLevels = docCount == 0 ? 0 : (int) Math.floor(Math.log(docCount) / Math.log(skipInterval[0]));
|
||||
if (numberOfSkipLevels > maxNumberOfSkipLevels) {
|
||||
numberOfSkipLevels = maxNumberOfSkipLevels;
|
||||
}
|
||||
|
||||
skipStream[0].seek(skipPointer[0]);
|
||||
|
||||
int toBuffer = numberOfLevelsToBuffer;
|
||||
|
||||
for (int i = numberOfSkipLevels - 1; i > 0; i--) {
|
||||
// the length of the current level
|
||||
long length = skipStream[0].readVLong();
|
||||
|
||||
// the start pointer of the current level
|
||||
skipPointer[i] = skipStream[0].getFilePointer();
|
||||
if (toBuffer > 0) {
|
||||
// buffer this level
|
||||
skipStream[i] = new SkipBuffer(skipStream[0], (int) length);
|
||||
toBuffer--;
|
||||
} else {
|
||||
// clone this stream, it is already at the start of the current level
|
||||
skipStream[i] = (IndexInput) skipStream[0].clone();
|
||||
if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) {
|
||||
((BufferedIndexInput) skipStream[i]).setBufferSize((int) length);
|
||||
}
|
||||
|
||||
// move base stream beyond the current level
|
||||
skipStream[0].seek(skipStream[0].getFilePointer() + length);
|
||||
}
|
||||
}
|
||||
|
||||
// use base stream for the lowest level
|
||||
skipPointer[0] = skipStream[0].getFilePointer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Subclasses must implement the actual skip data encoding in this method.
|
||||
*
|
||||
* @param level the level skip data shall be read from
|
||||
* @param skipStream the skip stream to read from
|
||||
*/
|
||||
protected abstract int readSkipData(int level, IndexInput skipStream) throws IOException;
|
||||
|
||||
/** Copies the values of the last read skip entry on this level */
|
||||
protected void setLastSkipData(int level) {
|
||||
lastDoc = skipDoc[level];
|
||||
lastChildPointer = childPointer[level];
|
||||
}
|
||||
|
||||
|
||||
/** used to buffer the top skip levels */
|
||||
private final static class SkipBuffer extends IndexInput {
|
||||
private byte[] data;
|
||||
private long pointer;
|
||||
private int pos;
|
||||
|
||||
SkipBuffer(IndexInput input, int length) throws IOException {
|
||||
data = new byte[length];
|
||||
pointer = input.getFilePointer();
|
||||
input.readBytes(data, 0, length);
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
data = null;
|
||||
}
|
||||
|
||||
public long getFilePointer() {
|
||||
return pointer + pos;
|
||||
}
|
||||
|
||||
public long length() {
|
||||
return data.length;
|
||||
}
|
||||
|
||||
public byte readByte() throws IOException {
|
||||
return data[pos++];
|
||||
}
|
||||
|
||||
public void readBytes(byte[] b, int offset, int len) throws IOException {
|
||||
System.arraycopy(data, pos, b, offset, len);
|
||||
pos += len;
|
||||
}
|
||||
|
||||
public void seek(long pos) throws IOException {
|
||||
this.pos = (int) (pos - pointer);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.store.BufferedIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/**
|
||||
* This abstract class reads skip lists with multiple levels.
|
||||
*
|
||||
* See {@link MultiLevelSkipListWriter} for the information about the encoding
|
||||
* of the multi level skip lists.
|
||||
*
|
||||
* Subclasses must implement the abstract method {@link #readSkipData(int, IndexInput)}
|
||||
* which defines the actual format of the skip data.
|
||||
*/
|
||||
abstract class MultiLevelSkipListReader {
|
||||
// the maximum number of skip levels possible for this index
|
||||
private int maxNumberOfSkipLevels;
|
||||
|
||||
// number of levels in this skip list
|
||||
private int numberOfSkipLevels;
|
||||
|
||||
// Expert: defines the number of top skip levels to buffer in memory.
|
||||
// Reducing this number results in less memory usage, but possibly
|
||||
// slower performance due to more random I/Os.
|
||||
// Please notice that the space each level occupies is limited by
|
||||
// the skipInterval. The top level can not contain more than
|
||||
// skipLevel entries, the second top level can not contain more
|
||||
// than skipLevel^2 entries and so forth.
|
||||
private int numberOfLevelsToBuffer = 1;
|
||||
|
||||
private int docCount;
|
||||
private boolean haveSkipped;
|
||||
|
||||
private IndexInput[] skipStream; // skipStream for each level
|
||||
private long skipPointer[]; // the start pointer of each skip level
|
||||
private int skipInterval[]; // skipInterval of each level
|
||||
private int[] numSkipped; // number of docs skipped per level
|
||||
|
||||
private int[] skipDoc; // doc id of current skip entry per level
|
||||
private int lastDoc; // doc id of last read skip entry with docId <= target
|
||||
private long[] childPointer; // child pointer of current skip entry per level
|
||||
private long lastChildPointer; // childPointer of last read skip entry with docId <= target
|
||||
|
||||
private boolean inputIsBuffered;
|
||||
|
||||
public MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) {
|
||||
this.skipStream = new IndexInput[maxSkipLevels];
|
||||
this.skipPointer = new long[maxSkipLevels];
|
||||
this.childPointer = new long[maxSkipLevels];
|
||||
this.numSkipped = new int[maxSkipLevels];
|
||||
this.maxNumberOfSkipLevels = maxSkipLevels;
|
||||
this.skipInterval = new int[maxSkipLevels];
|
||||
this.skipStream [0]= skipStream;
|
||||
this.inputIsBuffered = (skipStream instanceof BufferedIndexInput);
|
||||
this.skipInterval[0] = skipInterval;
|
||||
for (int i = 1; i < maxSkipLevels; i++) {
|
||||
// cache skip intervals
|
||||
this.skipInterval[i] = this.skipInterval[i - 1] * skipInterval;
|
||||
}
|
||||
skipDoc = new int[maxSkipLevels];
|
||||
}
|
||||
|
||||
|
||||
/** Returns the id of the doc to which the last call of {@link #skipTo(int)}
|
||||
* has skipped. */
|
||||
int getDoc() {
|
||||
return lastDoc;
|
||||
}
|
||||
|
||||
|
||||
/** Skips entries to the first beyond the current whose document number is
|
||||
* greater than or equal to <i>target</i>. Returns the current doc count.
|
||||
*/
|
||||
int skipTo(int target) throws IOException {
|
||||
if (!haveSkipped) {
|
||||
// first time, load skip levels
|
||||
loadSkipLevels();
|
||||
haveSkipped = true;
|
||||
}
|
||||
|
||||
// walk up the levels until highest level is found that has a skip
|
||||
// for this target
|
||||
int level = 0;
|
||||
while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
|
||||
level++;
|
||||
}
|
||||
|
||||
while (level >= 0) {
|
||||
if (target > skipDoc[level]) {
|
||||
if (!loadNextSkip(level)) {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// no more skips on this level, go down one level
|
||||
if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
|
||||
seekChild(level - 1);
|
||||
}
|
||||
level--;
|
||||
}
|
||||
}
|
||||
|
||||
return numSkipped[0] - skipInterval[0] - 1;
|
||||
}
|
||||
|
||||
private boolean loadNextSkip(int level) throws IOException {
|
||||
// we have to skip, the target document is greater than the current
|
||||
// skip list entry
|
||||
setLastSkipData(level);
|
||||
|
||||
numSkipped[level] += skipInterval[level];
|
||||
|
||||
if (numSkipped[level] > docCount) {
|
||||
// this skip list is exhausted
|
||||
skipDoc[level] = Integer.MAX_VALUE;
|
||||
if (numberOfSkipLevels > level) numberOfSkipLevels = level;
|
||||
return false;
|
||||
}
|
||||
|
||||
// read next skip entry
|
||||
skipDoc[level] += readSkipData(level, skipStream[level]);
|
||||
|
||||
if (level != 0) {
|
||||
// read the child pointer if we are not on the leaf level
|
||||
childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
/** Seeks the skip entry on the given level */
|
||||
protected void seekChild(int level) throws IOException {
|
||||
skipStream[level].seek(lastChildPointer);
|
||||
numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1];
|
||||
skipDoc[level] = lastDoc;
|
||||
if (level > 0) {
|
||||
childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
|
||||
}
|
||||
}
|
||||
|
||||
void close() throws IOException {
|
||||
for (int i = 1; i < skipStream.length; i++) {
|
||||
if (skipStream[i] != null) {
|
||||
skipStream[i].close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** initializes the reader */
|
||||
void init(long skipPointer, int df) {
|
||||
this.skipPointer[0] = skipPointer;
|
||||
this.docCount = df;
|
||||
Arrays.fill(skipDoc, 0);
|
||||
Arrays.fill(numSkipped, 0);
|
||||
Arrays.fill(childPointer, 0);
|
||||
|
||||
haveSkipped = false;
|
||||
for (int i = 1; i < numberOfSkipLevels; i++) {
|
||||
skipStream[i] = null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Loads the skip levels */
|
||||
private void loadSkipLevels() throws IOException {
|
||||
numberOfSkipLevels = docCount == 0 ? 0 : (int) Math.floor(Math.log(docCount) / Math.log(skipInterval[0]));
|
||||
if (numberOfSkipLevels > maxNumberOfSkipLevels) {
|
||||
numberOfSkipLevels = maxNumberOfSkipLevels;
|
||||
}
|
||||
|
||||
skipStream[0].seek(skipPointer[0]);
|
||||
|
||||
int toBuffer = numberOfLevelsToBuffer;
|
||||
|
||||
for (int i = numberOfSkipLevels - 1; i > 0; i--) {
|
||||
// the length of the current level
|
||||
long length = skipStream[0].readVLong();
|
||||
|
||||
// the start pointer of the current level
|
||||
skipPointer[i] = skipStream[0].getFilePointer();
|
||||
if (toBuffer > 0) {
|
||||
// buffer this level
|
||||
skipStream[i] = new SkipBuffer(skipStream[0], (int) length);
|
||||
toBuffer--;
|
||||
} else {
|
||||
// clone this stream, it is already at the start of the current level
|
||||
skipStream[i] = (IndexInput) skipStream[0].clone();
|
||||
if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) {
|
||||
((BufferedIndexInput) skipStream[i]).setBufferSize((int) length);
|
||||
}
|
||||
|
||||
// move base stream beyond the current level
|
||||
skipStream[0].seek(skipStream[0].getFilePointer() + length);
|
||||
}
|
||||
}
|
||||
|
||||
// use base stream for the lowest level
|
||||
skipPointer[0] = skipStream[0].getFilePointer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Subclasses must implement the actual skip data encoding in this method.
|
||||
*
|
||||
* @param level the level skip data shall be read from
|
||||
* @param skipStream the skip stream to read from
|
||||
*/
|
||||
protected abstract int readSkipData(int level, IndexInput skipStream) throws IOException;
|
||||
|
||||
/** Copies the values of the last read skip entry on this level */
|
||||
protected void setLastSkipData(int level) {
|
||||
lastDoc = skipDoc[level];
|
||||
lastChildPointer = childPointer[level];
|
||||
}
|
||||
|
||||
|
||||
/** used to buffer the top skip levels */
|
||||
private final static class SkipBuffer extends IndexInput {
|
||||
private byte[] data;
|
||||
private long pointer;
|
||||
private int pos;
|
||||
|
||||
SkipBuffer(IndexInput input, int length) throws IOException {
|
||||
data = new byte[length];
|
||||
pointer = input.getFilePointer();
|
||||
input.readBytes(data, 0, length);
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
data = null;
|
||||
}
|
||||
|
||||
public long getFilePointer() {
|
||||
return pointer + pos;
|
||||
}
|
||||
|
||||
public long length() {
|
||||
return data.length;
|
||||
}
|
||||
|
||||
public byte readByte() throws IOException {
|
||||
return data[pos++];
|
||||
}
|
||||
|
||||
public void readBytes(byte[] b, int offset, int len) throws IOException {
|
||||
System.arraycopy(data, pos, b, offset, len);
|
||||
pos += len;
|
||||
}
|
||||
|
||||
public void seek(long pos) throws IOException {
|
||||
this.pos = (int) (pos - pointer);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,151 +1,151 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.RAMOutputStream;
|
||||
|
||||
/**
|
||||
* This abstract class writes skip lists with multiple levels.
|
||||
*
|
||||
* Example for skipInterval = 3:
|
||||
* c (skip level 2)
|
||||
* c c c (skip level 1)
|
||||
* x x x x x x x x x x (skip level 0)
|
||||
* d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list)
|
||||
* 3 6 9 12 15 18 21 24 27 30 (df)
|
||||
*
|
||||
* d - document
|
||||
* x - skip data
|
||||
* c - skip data with child pointer
|
||||
*
|
||||
* Skip level i contains every skipInterval-th entry from skip level i-1.
|
||||
* Therefore the number of entries on level i is: floor(df / ((skipInterval ^ (i + 1))).
|
||||
*
|
||||
* Each skip entry on a level i>0 contains a pointer to the corresponding skip entry in list i-1.
|
||||
* This guarantess a logarithmic amount of skips to find the target document.
|
||||
*
|
||||
* While this class takes care of writing the different skip levels,
|
||||
* subclasses must define the actual format of the skip data.
|
||||
*
|
||||
*/
|
||||
abstract class MultiLevelSkipListWriter {
|
||||
// number of levels in this skip list
|
||||
private int numberOfSkipLevels;
|
||||
|
||||
// the skip interval in the list with level = 0
|
||||
private int skipInterval;
|
||||
|
||||
// for every skip level a different buffer is used
|
||||
private RAMOutputStream[] skipBuffer;
|
||||
|
||||
protected MultiLevelSkipListWriter(int skipInterval, int maxSkipLevels, int df) {
|
||||
this.skipInterval = skipInterval;
|
||||
|
||||
// calculate the maximum number of skip levels for this document frequency
|
||||
numberOfSkipLevels = df == 0 ? 0 : (int) Math.floor(Math.log(df) / Math.log(skipInterval));
|
||||
|
||||
// make sure it does not exceed maxSkipLevels
|
||||
if (numberOfSkipLevels > maxSkipLevels) {
|
||||
numberOfSkipLevels = maxSkipLevels;
|
||||
}
|
||||
}
|
||||
|
||||
protected void init() {
|
||||
skipBuffer = new RAMOutputStream[numberOfSkipLevels];
|
||||
for (int i = 0; i < numberOfSkipLevels; i++) {
|
||||
skipBuffer[i] = new RAMOutputStream();
|
||||
}
|
||||
}
|
||||
|
||||
protected void resetSkip() {
|
||||
// creates new buffers or empties the existing ones
|
||||
if (skipBuffer == null) {
|
||||
init();
|
||||
} else {
|
||||
for (int i = 0; i < skipBuffer.length; i++) {
|
||||
skipBuffer[i].reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Subclasses must implement the actual skip data encoding in this method.
|
||||
*
|
||||
* @param level the level skip data shall be writting for
|
||||
* @param skipBuffer the skip buffer to write to
|
||||
*/
|
||||
protected abstract void writeSkipData(int level, IndexOutput skipBuffer) throws IOException;
|
||||
|
||||
/**
|
||||
* Writes the current skip data to the buffers. The current document frequency determines
|
||||
* the max level is skip data is to be written to.
|
||||
*
|
||||
* @param df the current document frequency
|
||||
* @throws IOException
|
||||
*/
|
||||
void bufferSkip(int df) throws IOException {
|
||||
int numLevels;
|
||||
|
||||
// determine max level
|
||||
for (numLevels = 0; (df % skipInterval) == 0 && numLevels < numberOfSkipLevels; df /= skipInterval) {
|
||||
numLevels++;
|
||||
}
|
||||
|
||||
long childPointer = 0;
|
||||
|
||||
for (int level = 0; level < numLevels; level++) {
|
||||
writeSkipData(level, skipBuffer[level]);
|
||||
|
||||
long newChildPointer = skipBuffer[level].getFilePointer();
|
||||
|
||||
if (level != 0) {
|
||||
// store child pointers for all levels except the lowest
|
||||
skipBuffer[level].writeVLong(childPointer);
|
||||
}
|
||||
|
||||
//remember the childPointer for the next level
|
||||
childPointer = newChildPointer;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the buffered skip lists to the given output.
|
||||
*
|
||||
* @param output the IndexOutput the skip lists shall be written to
|
||||
* @return the pointer the skip list starts
|
||||
*/
|
||||
long writeSkip(IndexOutput output) throws IOException {
|
||||
long skipPointer = output.getFilePointer();
|
||||
if (skipBuffer == null || skipBuffer.length == 0) return skipPointer;
|
||||
|
||||
for (int level = numberOfSkipLevels - 1; level > 0; level--) {
|
||||
long length = skipBuffer[level].getFilePointer();
|
||||
if (length > 0) {
|
||||
output.writeVLong(length);
|
||||
skipBuffer[level].writeTo(output);
|
||||
}
|
||||
}
|
||||
skipBuffer[0].writeTo(output);
|
||||
|
||||
return skipPointer;
|
||||
}
|
||||
|
||||
}
|
||||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.RAMOutputStream;
|
||||
|
||||
/**
|
||||
* This abstract class writes skip lists with multiple levels.
|
||||
*
|
||||
* Example for skipInterval = 3:
|
||||
* c (skip level 2)
|
||||
* c c c (skip level 1)
|
||||
* x x x x x x x x x x (skip level 0)
|
||||
* d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list)
|
||||
* 3 6 9 12 15 18 21 24 27 30 (df)
|
||||
*
|
||||
* d - document
|
||||
* x - skip data
|
||||
* c - skip data with child pointer
|
||||
*
|
||||
* Skip level i contains every skipInterval-th entry from skip level i-1.
|
||||
* Therefore the number of entries on level i is: floor(df / ((skipInterval ^ (i + 1))).
|
||||
*
|
||||
* Each skip entry on a level i>0 contains a pointer to the corresponding skip entry in list i-1.
|
||||
* This guarantess a logarithmic amount of skips to find the target document.
|
||||
*
|
||||
* While this class takes care of writing the different skip levels,
|
||||
* subclasses must define the actual format of the skip data.
|
||||
*
|
||||
*/
|
||||
abstract class MultiLevelSkipListWriter {
|
||||
// number of levels in this skip list
|
||||
private int numberOfSkipLevels;
|
||||
|
||||
// the skip interval in the list with level = 0
|
||||
private int skipInterval;
|
||||
|
||||
// for every skip level a different buffer is used
|
||||
private RAMOutputStream[] skipBuffer;
|
||||
|
||||
protected MultiLevelSkipListWriter(int skipInterval, int maxSkipLevels, int df) {
|
||||
this.skipInterval = skipInterval;
|
||||
|
||||
// calculate the maximum number of skip levels for this document frequency
|
||||
numberOfSkipLevels = df == 0 ? 0 : (int) Math.floor(Math.log(df) / Math.log(skipInterval));
|
||||
|
||||
// make sure it does not exceed maxSkipLevels
|
||||
if (numberOfSkipLevels > maxSkipLevels) {
|
||||
numberOfSkipLevels = maxSkipLevels;
|
||||
}
|
||||
}
|
||||
|
||||
protected void init() {
|
||||
skipBuffer = new RAMOutputStream[numberOfSkipLevels];
|
||||
for (int i = 0; i < numberOfSkipLevels; i++) {
|
||||
skipBuffer[i] = new RAMOutputStream();
|
||||
}
|
||||
}
|
||||
|
||||
protected void resetSkip() {
|
||||
// creates new buffers or empties the existing ones
|
||||
if (skipBuffer == null) {
|
||||
init();
|
||||
} else {
|
||||
for (int i = 0; i < skipBuffer.length; i++) {
|
||||
skipBuffer[i].reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Subclasses must implement the actual skip data encoding in this method.
|
||||
*
|
||||
* @param level the level skip data shall be writting for
|
||||
* @param skipBuffer the skip buffer to write to
|
||||
*/
|
||||
protected abstract void writeSkipData(int level, IndexOutput skipBuffer) throws IOException;
|
||||
|
||||
/**
|
||||
* Writes the current skip data to the buffers. The current document frequency determines
|
||||
* the max level is skip data is to be written to.
|
||||
*
|
||||
* @param df the current document frequency
|
||||
* @throws IOException
|
||||
*/
|
||||
void bufferSkip(int df) throws IOException {
|
||||
int numLevels;
|
||||
|
||||
// determine max level
|
||||
for (numLevels = 0; (df % skipInterval) == 0 && numLevels < numberOfSkipLevels; df /= skipInterval) {
|
||||
numLevels++;
|
||||
}
|
||||
|
||||
long childPointer = 0;
|
||||
|
||||
for (int level = 0; level < numLevels; level++) {
|
||||
writeSkipData(level, skipBuffer[level]);
|
||||
|
||||
long newChildPointer = skipBuffer[level].getFilePointer();
|
||||
|
||||
if (level != 0) {
|
||||
// store child pointers for all levels except the lowest
|
||||
skipBuffer[level].writeVLong(childPointer);
|
||||
}
|
||||
|
||||
//remember the childPointer for the next level
|
||||
childPointer = newChildPointer;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the buffered skip lists to the given output.
|
||||
*
|
||||
* @param output the IndexOutput the skip lists shall be written to
|
||||
* @return the pointer the skip list starts
|
||||
*/
|
||||
long writeSkip(IndexOutput output) throws IOException {
|
||||
long skipPointer = output.getFilePointer();
|
||||
if (skipBuffer == null || skipBuffer.length == 0) return skipPointer;
|
||||
|
||||
for (int level = numberOfSkipLevels - 1; level > 0; level--) {
|
||||
long length = skipBuffer[level].getFilePointer();
|
||||
if (length > 0) {
|
||||
output.writeVLong(length);
|
||||
skipBuffer[level].writeTo(output);
|
||||
}
|
||||
}
|
||||
skipBuffer[0].writeTo(output);
|
||||
|
||||
return skipPointer;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,163 +1,163 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/**
|
||||
* A Payload is metadata that can be stored together with each occurrence
|
||||
* of a term. This metadata is stored inline in the posting list of the
|
||||
* specific term.
|
||||
* <p>
|
||||
* To store payloads in the index a {@link TokenStream} has to be used that
|
||||
* produces {@link Token}s containing payload data.
|
||||
* <p>
|
||||
* Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)}
|
||||
* to retrieve the payloads from the index.<br>
|
||||
*
|
||||
*/
|
||||
public class Payload implements Serializable, Cloneable {
|
||||
/** the byte array containing the payload data */
|
||||
protected byte[] data;
|
||||
|
||||
/** the offset within the byte array */
|
||||
protected int offset;
|
||||
|
||||
/** the length of the payload data */
|
||||
protected int length;
|
||||
|
||||
/** Creates an empty payload and does not allocate a byte array. */
|
||||
public Payload() {
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new payload with the the given array as data.
|
||||
* A reference to the passed-in array is held, i. e. no
|
||||
* copy is made.
|
||||
*
|
||||
* @param data the data of this payload
|
||||
*/
|
||||
public Payload(byte[] data) {
|
||||
this(data, 0, data.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new payload with the the given array as data.
|
||||
* A reference to the passed-in array is held, i. e. no
|
||||
* copy is made.
|
||||
*
|
||||
* @param data the data of this payload
|
||||
* @param offset the offset in the data byte array
|
||||
* @param length the length of the data
|
||||
*/
|
||||
public Payload(byte[] data, int offset, int length) {
|
||||
if (offset < 0 || offset + length > data.length) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
this.data = data;
|
||||
this.offset = offset;
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets this payloads data.
|
||||
* A reference to the passed-in array is held, i. e. no
|
||||
* copy is made.
|
||||
*/
|
||||
public void setData(byte[] data) {
|
||||
setData(data, 0, data.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets this payloads data.
|
||||
* A reference to the passed-in array is held, i. e. no
|
||||
* copy is made.
|
||||
*/
|
||||
public void setData(byte[] data, int offset, int length) {
|
||||
this.data = data;
|
||||
this.offset = offset;
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a reference to the underlying byte array
|
||||
* that holds this payloads data.
|
||||
*/
|
||||
public byte[] getData() {
|
||||
return this.data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the offset in the underlying byte array
|
||||
*/
|
||||
public int getOffset() {
|
||||
return this.offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the length of the payload data.
|
||||
*/
|
||||
public int length() {
|
||||
return this.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the byte at the given index.
|
||||
*/
|
||||
public byte byteAt(int index) {
|
||||
if (0 <= index && index < this.length) {
|
||||
return this.data[this.offset + index];
|
||||
}
|
||||
throw new ArrayIndexOutOfBoundsException(index);
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocates a new byte array, copies the payload data into it and returns it.
|
||||
*/
|
||||
public byte[] toByteArray() {
|
||||
byte[] retArray = new byte[this.length];
|
||||
System.arraycopy(this.data, this.offset, retArray, 0, this.length);
|
||||
return retArray;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the payload data to a byte array.
|
||||
*
|
||||
* @param target the target byte array
|
||||
* @param targetOffset the offset in the target byte array
|
||||
*/
|
||||
public void copyTo(byte[] target, int targetOffset) {
|
||||
if (this.length > target.length + targetOffset) {
|
||||
throw new ArrayIndexOutOfBoundsException();
|
||||
}
|
||||
System.arraycopy(this.data, this.offset, target, targetOffset, this.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clones this payload by creating a copy of the underlying
|
||||
* byte array.
|
||||
*/
|
||||
public Object clone() {
|
||||
Payload clone = new Payload(this.toByteArray());
|
||||
return clone;
|
||||
}
|
||||
}
|
||||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/**
|
||||
* A Payload is metadata that can be stored together with each occurrence
|
||||
* of a term. This metadata is stored inline in the posting list of the
|
||||
* specific term.
|
||||
* <p>
|
||||
* To store payloads in the index a {@link TokenStream} has to be used that
|
||||
* produces {@link Token}s containing payload data.
|
||||
* <p>
|
||||
* Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)}
|
||||
* to retrieve the payloads from the index.<br>
|
||||
*
|
||||
*/
|
||||
public class Payload implements Serializable, Cloneable {
|
||||
/** the byte array containing the payload data */
|
||||
protected byte[] data;
|
||||
|
||||
/** the offset within the byte array */
|
||||
protected int offset;
|
||||
|
||||
/** the length of the payload data */
|
||||
protected int length;
|
||||
|
||||
/** Creates an empty payload and does not allocate a byte array. */
|
||||
public Payload() {
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new payload with the the given array as data.
|
||||
* A reference to the passed-in array is held, i. e. no
|
||||
* copy is made.
|
||||
*
|
||||
* @param data the data of this payload
|
||||
*/
|
||||
public Payload(byte[] data) {
|
||||
this(data, 0, data.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new payload with the the given array as data.
|
||||
* A reference to the passed-in array is held, i. e. no
|
||||
* copy is made.
|
||||
*
|
||||
* @param data the data of this payload
|
||||
* @param offset the offset in the data byte array
|
||||
* @param length the length of the data
|
||||
*/
|
||||
public Payload(byte[] data, int offset, int length) {
|
||||
if (offset < 0 || offset + length > data.length) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
this.data = data;
|
||||
this.offset = offset;
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets this payloads data.
|
||||
* A reference to the passed-in array is held, i. e. no
|
||||
* copy is made.
|
||||
*/
|
||||
public void setData(byte[] data) {
|
||||
setData(data, 0, data.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets this payloads data.
|
||||
* A reference to the passed-in array is held, i. e. no
|
||||
* copy is made.
|
||||
*/
|
||||
public void setData(byte[] data, int offset, int length) {
|
||||
this.data = data;
|
||||
this.offset = offset;
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a reference to the underlying byte array
|
||||
* that holds this payloads data.
|
||||
*/
|
||||
public byte[] getData() {
|
||||
return this.data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the offset in the underlying byte array
|
||||
*/
|
||||
public int getOffset() {
|
||||
return this.offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the length of the payload data.
|
||||
*/
|
||||
public int length() {
|
||||
return this.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the byte at the given index.
|
||||
*/
|
||||
public byte byteAt(int index) {
|
||||
if (0 <= index && index < this.length) {
|
||||
return this.data[this.offset + index];
|
||||
}
|
||||
throw new ArrayIndexOutOfBoundsException(index);
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocates a new byte array, copies the payload data into it and returns it.
|
||||
*/
|
||||
public byte[] toByteArray() {
|
||||
byte[] retArray = new byte[this.length];
|
||||
System.arraycopy(this.data, this.offset, retArray, 0, this.length);
|
||||
return retArray;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the payload data to a byte array.
|
||||
*
|
||||
* @param target the target byte array
|
||||
* @param targetOffset the offset in the target byte array
|
||||
*/
|
||||
public void copyTo(byte[] target, int targetOffset) {
|
||||
if (this.length > target.length + targetOffset) {
|
||||
throw new ArrayIndexOutOfBoundsException();
|
||||
}
|
||||
System.arraycopy(this.data, this.offset, target, targetOffset, this.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clones this payload by creating a copy of the underlying
|
||||
* byte array.
|
||||
*/
|
||||
public Object clone() {
|
||||
Payload clone = new Payload(this.toByteArray());
|
||||
return clone;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,257 +1,257 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum
|
||||
* score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries.
|
||||
* This is useful when searching for a word in multiple fields with different boost factors (so that the fields cannot be
|
||||
* combined equivalently into a single search field). We want the primary score to be the one associated with the highest boost,
|
||||
* not the sum of the field scores (as BooleanQuery would give).
|
||||
* If the query is "albino elephant" this ensures that "albino" matching one field and "elephant" matching
|
||||
* another gets a higher score than "albino" matching both fields.
|
||||
* To get this result, use both BooleanQuery and DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in
|
||||
* each field, while the set of these DisjunctionMaxQuery's is combined into a BooleanQuery.
|
||||
* The tie breaker capability allows results that include the same term in multiple fields to be judged better than results that
|
||||
* include this term in only the best of those multiple fields, without confusing this with the better case of two different terms
|
||||
* in the multiple fields.
|
||||
* @author Chuck Williams
|
||||
*/
|
||||
public class DisjunctionMaxQuery extends Query {
|
||||
|
||||
/* The subqueries */
|
||||
private ArrayList disjuncts = new ArrayList();
|
||||
|
||||
/* Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. */
|
||||
private float tieBreakerMultiplier = 0.0f;
|
||||
|
||||
/** Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries.
|
||||
* @param tieBreakerMultiplier this score of each non-maximum disjunct for a document is multiplied by this weight
|
||||
* and added into the final score. If non-zero, the value should be small, on the order of 0.1, which says that
|
||||
* 10 occurrences of word in a lower-scored field that is also in a higher scored field is just as good as a unique
|
||||
* word in the lower scored field (i.e., one that is not in any higher scored field.
|
||||
*/
|
||||
public DisjunctionMaxQuery(float tieBreakerMultiplier) {
|
||||
this.tieBreakerMultiplier = tieBreakerMultiplier;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new DisjunctionMaxQuery
|
||||
* @param disjuncts a Collection<Query> of all the disjuncts to add
|
||||
* @param tieBreakerMultiplier the weight to give to each matching non-maximum disjunct
|
||||
*/
|
||||
public DisjunctionMaxQuery(Collection disjuncts, float tieBreakerMultiplier) {
|
||||
this.tieBreakerMultiplier = tieBreakerMultiplier;
|
||||
add(disjuncts);
|
||||
}
|
||||
|
||||
/** Add a subquery to this disjunction
|
||||
* @param query the disjunct added
|
||||
*/
|
||||
public void add(Query query) {
|
||||
disjuncts.add(query);
|
||||
}
|
||||
|
||||
/** Add a collection of disjuncts to this disjunction
|
||||
* via Iterable<Query>
|
||||
*/
|
||||
public void add(Collection disjuncts) {
|
||||
this.disjuncts.addAll(disjuncts);
|
||||
}
|
||||
|
||||
/** An Iterator<Query> over the disjuncts */
|
||||
public Iterator iterator() {
|
||||
return disjuncts.iterator();
|
||||
}
|
||||
|
||||
/* The Weight for DisjunctionMaxQuery's, used to normalize, score and explain these queries */
|
||||
private class DisjunctionMaxWeight implements Weight {
|
||||
|
||||
private Similarity similarity; // The similarity which we are associated.
|
||||
private ArrayList weights = new ArrayList(); // The Weight's for our subqueries, in 1-1 correspondence with disjuncts
|
||||
|
||||
/* Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */
|
||||
public DisjunctionMaxWeight(Searcher searcher) throws IOException {
|
||||
this.similarity = searcher.getSimilarity();
|
||||
for (int i = 0; i < disjuncts.size(); i++)
|
||||
weights.add(((Query) disjuncts.get(i)).createWeight(searcher));
|
||||
}
|
||||
|
||||
/* Return our associated DisjunctionMaxQuery */
|
||||
public Query getQuery() { return DisjunctionMaxQuery.this; }
|
||||
|
||||
/* Return our boost */
|
||||
public float getValue() { return getBoost(); }
|
||||
|
||||
/* Compute the sub of squared weights of us applied to our subqueries. Used for normalization. */
|
||||
public float sumOfSquaredWeights() throws IOException {
|
||||
float max = 0.0f, sum = 0.0f;
|
||||
for (int i = 0; i < weights.size(); i++) {
|
||||
float sub = ((Weight) weights.get(i)).sumOfSquaredWeights();
|
||||
sum += sub;
|
||||
max = Math.max(max, sub);
|
||||
}
|
||||
return (((sum - max) * tieBreakerMultiplier * tieBreakerMultiplier) + max) * getBoost() * getBoost();
|
||||
}
|
||||
|
||||
/* Apply the computed normalization factor to our subqueries */
|
||||
public void normalize(float norm) {
|
||||
norm *= getBoost(); // Incorporate our boost
|
||||
for (int i = 0 ; i < weights.size(); i++)
|
||||
((Weight) weights.get(i)).normalize(norm);
|
||||
}
|
||||
|
||||
/* Create the scorer used to score our associated DisjunctionMaxQuery */
|
||||
public Scorer scorer(IndexReader reader) throws IOException {
|
||||
DisjunctionMaxScorer result = new DisjunctionMaxScorer(tieBreakerMultiplier, similarity);
|
||||
for (int i = 0 ; i < weights.size(); i++) {
|
||||
Weight w = (Weight) weights.get(i);
|
||||
Scorer subScorer = w.scorer(reader);
|
||||
if (subScorer == null) return null;
|
||||
result.add(subScorer);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Explain the score we computed for doc */
|
||||
public Explanation explain(IndexReader reader, int doc) throws IOException {
|
||||
if ( disjuncts.size() == 1) return ((Weight) weights.get(0)).explain(reader,doc);
|
||||
ComplexExplanation result = new ComplexExplanation();
|
||||
float max = 0.0f, sum = 0.0f;
|
||||
result.setDescription(tieBreakerMultiplier == 0.0f ? "max of:" : "max plus " + tieBreakerMultiplier + " times others of:");
|
||||
for (int i = 0 ; i < weights.size(); i++) {
|
||||
Explanation e = ((Weight) weights.get(i)).explain(reader, doc);
|
||||
if (e.isMatch()) {
|
||||
result.setMatch(Boolean.TRUE);
|
||||
result.addDetail(e);
|
||||
sum += e.getValue();
|
||||
max = Math.max(max, e.getValue());
|
||||
}
|
||||
}
|
||||
result.setValue(max + (sum - max)*tieBreakerMultiplier);
|
||||
return result;
|
||||
}
|
||||
|
||||
} // end of DisjunctionMaxWeight inner class
|
||||
|
||||
/* Create the Weight used to score us */
|
||||
protected Weight createWeight(Searcher searcher) throws IOException {
|
||||
return new DisjunctionMaxWeight(searcher);
|
||||
}
|
||||
|
||||
/** Optimize our representation and our subqueries representations
|
||||
* @param reader the IndexReader we query
|
||||
* @return an optimized copy of us (which may not be a copy if there is nothing to optimize) */
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
if (disjuncts.size() == 1) {
|
||||
Query singleton = (Query) disjuncts.get(0);
|
||||
Query result = singleton.rewrite(reader);
|
||||
if (getBoost() != 1.0f) {
|
||||
if (result == singleton) result = (Query)result.clone();
|
||||
result.setBoost(getBoost() * result.getBoost());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
DisjunctionMaxQuery clone = null;
|
||||
for (int i = 0 ; i < disjuncts.size(); i++) {
|
||||
Query clause = (Query) disjuncts.get(i);
|
||||
Query rewrite = clause.rewrite(reader);
|
||||
if (rewrite != clause) {
|
||||
if (clone == null) clone = (DisjunctionMaxQuery)this.clone();
|
||||
clone.disjuncts.set(i, rewrite);
|
||||
}
|
||||
}
|
||||
if (clone != null) return clone;
|
||||
else return this;
|
||||
}
|
||||
|
||||
/** Create a shallow copy of us -- used in rewriting if necessary
|
||||
* @return a copy of us (but reuse, don't copy, our subqueries) */
|
||||
public Object clone() {
|
||||
DisjunctionMaxQuery clone = (DisjunctionMaxQuery)super.clone();
|
||||
clone.disjuncts = (ArrayList)this.disjuncts.clone();
|
||||
return clone;
|
||||
}
|
||||
|
||||
|
||||
// inherit javadoc
|
||||
public void extractTerms(Set terms) {
|
||||
for (int i = 0; i < disjuncts.size(); i++) {
|
||||
((Query)disjuncts.get(i)).extractTerms(terms);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Prettyprint us.
|
||||
* @param field the field to which we are applied
|
||||
* @return a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost"
|
||||
*/
|
||||
public String toString(String field) {
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
buffer.append("(");
|
||||
for (int i = 0 ; i < disjuncts.size(); i++) {
|
||||
Query subquery = (Query) disjuncts.get(i);
|
||||
if (subquery instanceof BooleanQuery) { // wrap sub-bools in parens
|
||||
buffer.append("(");
|
||||
buffer.append(subquery.toString(field));
|
||||
buffer.append(")");
|
||||
}
|
||||
else buffer.append(subquery.toString(field));
|
||||
if (i != disjuncts.size()-1) buffer.append(" | ");
|
||||
}
|
||||
buffer.append(")");
|
||||
if (tieBreakerMultiplier != 0.0f) {
|
||||
buffer.append("~");
|
||||
buffer.append(tieBreakerMultiplier);
|
||||
}
|
||||
if (getBoost() != 1.0) {
|
||||
buffer.append("^");
|
||||
buffer.append(getBoost());
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/** Return true iff we represent the same query as o
|
||||
* @param o another object
|
||||
* @return true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us
|
||||
*/
|
||||
public boolean equals(Object o) {
|
||||
if (! (o instanceof DisjunctionMaxQuery) ) return false;
|
||||
DisjunctionMaxQuery other = (DisjunctionMaxQuery)o;
|
||||
return this.getBoost() == other.getBoost()
|
||||
&& this.tieBreakerMultiplier == other.tieBreakerMultiplier
|
||||
&& this.disjuncts.equals(other.disjuncts);
|
||||
}
|
||||
|
||||
/** Compute a hash code for hashing us
|
||||
* @return the hash code
|
||||
*/
|
||||
public int hashCode() {
|
||||
return Float.floatToIntBits(getBoost())
|
||||
+ Float.floatToIntBits(tieBreakerMultiplier)
|
||||
+ disjuncts.hashCode();
|
||||
}
|
||||
|
||||
}
|
||||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum
|
||||
* score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries.
|
||||
* This is useful when searching for a word in multiple fields with different boost factors (so that the fields cannot be
|
||||
* combined equivalently into a single search field). We want the primary score to be the one associated with the highest boost,
|
||||
* not the sum of the field scores (as BooleanQuery would give).
|
||||
* If the query is "albino elephant" this ensures that "albino" matching one field and "elephant" matching
|
||||
* another gets a higher score than "albino" matching both fields.
|
||||
* To get this result, use both BooleanQuery and DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in
|
||||
* each field, while the set of these DisjunctionMaxQuery's is combined into a BooleanQuery.
|
||||
* The tie breaker capability allows results that include the same term in multiple fields to be judged better than results that
|
||||
* include this term in only the best of those multiple fields, without confusing this with the better case of two different terms
|
||||
* in the multiple fields.
|
||||
* @author Chuck Williams
|
||||
*/
|
||||
public class DisjunctionMaxQuery extends Query {
|
||||
|
||||
/* The subqueries */
|
||||
private ArrayList disjuncts = new ArrayList();
|
||||
|
||||
/* Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. */
|
||||
private float tieBreakerMultiplier = 0.0f;
|
||||
|
||||
/** Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries.
|
||||
* @param tieBreakerMultiplier this score of each non-maximum disjunct for a document is multiplied by this weight
|
||||
* and added into the final score. If non-zero, the value should be small, on the order of 0.1, which says that
|
||||
* 10 occurrences of word in a lower-scored field that is also in a higher scored field is just as good as a unique
|
||||
* word in the lower scored field (i.e., one that is not in any higher scored field.
|
||||
*/
|
||||
public DisjunctionMaxQuery(float tieBreakerMultiplier) {
|
||||
this.tieBreakerMultiplier = tieBreakerMultiplier;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new DisjunctionMaxQuery
|
||||
* @param disjuncts a Collection<Query> of all the disjuncts to add
|
||||
* @param tieBreakerMultiplier the weight to give to each matching non-maximum disjunct
|
||||
*/
|
||||
public DisjunctionMaxQuery(Collection disjuncts, float tieBreakerMultiplier) {
|
||||
this.tieBreakerMultiplier = tieBreakerMultiplier;
|
||||
add(disjuncts);
|
||||
}
|
||||
|
||||
/** Add a subquery to this disjunction
|
||||
* @param query the disjunct added
|
||||
*/
|
||||
public void add(Query query) {
|
||||
disjuncts.add(query);
|
||||
}
|
||||
|
||||
/** Add a collection of disjuncts to this disjunction
|
||||
* via Iterable<Query>
|
||||
*/
|
||||
public void add(Collection disjuncts) {
|
||||
this.disjuncts.addAll(disjuncts);
|
||||
}
|
||||
|
||||
/** An Iterator<Query> over the disjuncts */
|
||||
public Iterator iterator() {
|
||||
return disjuncts.iterator();
|
||||
}
|
||||
|
||||
/* The Weight for DisjunctionMaxQuery's, used to normalize, score and explain these queries */
|
||||
private class DisjunctionMaxWeight implements Weight {
|
||||
|
||||
private Similarity similarity; // The similarity which we are associated.
|
||||
private ArrayList weights = new ArrayList(); // The Weight's for our subqueries, in 1-1 correspondence with disjuncts
|
||||
|
||||
/* Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */
|
||||
public DisjunctionMaxWeight(Searcher searcher) throws IOException {
|
||||
this.similarity = searcher.getSimilarity();
|
||||
for (int i = 0; i < disjuncts.size(); i++)
|
||||
weights.add(((Query) disjuncts.get(i)).createWeight(searcher));
|
||||
}
|
||||
|
||||
/* Return our associated DisjunctionMaxQuery */
|
||||
public Query getQuery() { return DisjunctionMaxQuery.this; }
|
||||
|
||||
/* Return our boost */
|
||||
public float getValue() { return getBoost(); }
|
||||
|
||||
/* Compute the sub of squared weights of us applied to our subqueries. Used for normalization. */
|
||||
public float sumOfSquaredWeights() throws IOException {
|
||||
float max = 0.0f, sum = 0.0f;
|
||||
for (int i = 0; i < weights.size(); i++) {
|
||||
float sub = ((Weight) weights.get(i)).sumOfSquaredWeights();
|
||||
sum += sub;
|
||||
max = Math.max(max, sub);
|
||||
}
|
||||
return (((sum - max) * tieBreakerMultiplier * tieBreakerMultiplier) + max) * getBoost() * getBoost();
|
||||
}
|
||||
|
||||
/* Apply the computed normalization factor to our subqueries */
|
||||
public void normalize(float norm) {
|
||||
norm *= getBoost(); // Incorporate our boost
|
||||
for (int i = 0 ; i < weights.size(); i++)
|
||||
((Weight) weights.get(i)).normalize(norm);
|
||||
}
|
||||
|
||||
/* Create the scorer used to score our associated DisjunctionMaxQuery */
|
||||
public Scorer scorer(IndexReader reader) throws IOException {
|
||||
DisjunctionMaxScorer result = new DisjunctionMaxScorer(tieBreakerMultiplier, similarity);
|
||||
for (int i = 0 ; i < weights.size(); i++) {
|
||||
Weight w = (Weight) weights.get(i);
|
||||
Scorer subScorer = w.scorer(reader);
|
||||
if (subScorer == null) return null;
|
||||
result.add(subScorer);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Explain the score we computed for doc */
|
||||
public Explanation explain(IndexReader reader, int doc) throws IOException {
|
||||
if ( disjuncts.size() == 1) return ((Weight) weights.get(0)).explain(reader,doc);
|
||||
ComplexExplanation result = new ComplexExplanation();
|
||||
float max = 0.0f, sum = 0.0f;
|
||||
result.setDescription(tieBreakerMultiplier == 0.0f ? "max of:" : "max plus " + tieBreakerMultiplier + " times others of:");
|
||||
for (int i = 0 ; i < weights.size(); i++) {
|
||||
Explanation e = ((Weight) weights.get(i)).explain(reader, doc);
|
||||
if (e.isMatch()) {
|
||||
result.setMatch(Boolean.TRUE);
|
||||
result.addDetail(e);
|
||||
sum += e.getValue();
|
||||
max = Math.max(max, e.getValue());
|
||||
}
|
||||
}
|
||||
result.setValue(max + (sum - max)*tieBreakerMultiplier);
|
||||
return result;
|
||||
}
|
||||
|
||||
} // end of DisjunctionMaxWeight inner class
|
||||
|
||||
/* Create the Weight used to score us */
|
||||
protected Weight createWeight(Searcher searcher) throws IOException {
|
||||
return new DisjunctionMaxWeight(searcher);
|
||||
}
|
||||
|
||||
/** Optimize our representation and our subqueries representations
|
||||
* @param reader the IndexReader we query
|
||||
* @return an optimized copy of us (which may not be a copy if there is nothing to optimize) */
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
if (disjuncts.size() == 1) {
|
||||
Query singleton = (Query) disjuncts.get(0);
|
||||
Query result = singleton.rewrite(reader);
|
||||
if (getBoost() != 1.0f) {
|
||||
if (result == singleton) result = (Query)result.clone();
|
||||
result.setBoost(getBoost() * result.getBoost());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
DisjunctionMaxQuery clone = null;
|
||||
for (int i = 0 ; i < disjuncts.size(); i++) {
|
||||
Query clause = (Query) disjuncts.get(i);
|
||||
Query rewrite = clause.rewrite(reader);
|
||||
if (rewrite != clause) {
|
||||
if (clone == null) clone = (DisjunctionMaxQuery)this.clone();
|
||||
clone.disjuncts.set(i, rewrite);
|
||||
}
|
||||
}
|
||||
if (clone != null) return clone;
|
||||
else return this;
|
||||
}
|
||||
|
||||
/** Create a shallow copy of us -- used in rewriting if necessary
|
||||
* @return a copy of us (but reuse, don't copy, our subqueries) */
|
||||
public Object clone() {
|
||||
DisjunctionMaxQuery clone = (DisjunctionMaxQuery)super.clone();
|
||||
clone.disjuncts = (ArrayList)this.disjuncts.clone();
|
||||
return clone;
|
||||
}
|
||||
|
||||
|
||||
// inherit javadoc
|
||||
public void extractTerms(Set terms) {
|
||||
for (int i = 0; i < disjuncts.size(); i++) {
|
||||
((Query)disjuncts.get(i)).extractTerms(terms);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Prettyprint us.
|
||||
* @param field the field to which we are applied
|
||||
* @return a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost"
|
||||
*/
|
||||
public String toString(String field) {
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
buffer.append("(");
|
||||
for (int i = 0 ; i < disjuncts.size(); i++) {
|
||||
Query subquery = (Query) disjuncts.get(i);
|
||||
if (subquery instanceof BooleanQuery) { // wrap sub-bools in parens
|
||||
buffer.append("(");
|
||||
buffer.append(subquery.toString(field));
|
||||
buffer.append(")");
|
||||
}
|
||||
else buffer.append(subquery.toString(field));
|
||||
if (i != disjuncts.size()-1) buffer.append(" | ");
|
||||
}
|
||||
buffer.append(")");
|
||||
if (tieBreakerMultiplier != 0.0f) {
|
||||
buffer.append("~");
|
||||
buffer.append(tieBreakerMultiplier);
|
||||
}
|
||||
if (getBoost() != 1.0) {
|
||||
buffer.append("^");
|
||||
buffer.append(getBoost());
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/** Return true iff we represent the same query as o
|
||||
* @param o another object
|
||||
* @return true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us
|
||||
*/
|
||||
public boolean equals(Object o) {
|
||||
if (! (o instanceof DisjunctionMaxQuery) ) return false;
|
||||
DisjunctionMaxQuery other = (DisjunctionMaxQuery)o;
|
||||
return this.getBoost() == other.getBoost()
|
||||
&& this.tieBreakerMultiplier == other.tieBreakerMultiplier
|
||||
&& this.disjuncts.equals(other.disjuncts);
|
||||
}
|
||||
|
||||
/** Compute a hash code for hashing us
|
||||
* @return the hash code
|
||||
*/
|
||||
public int hashCode() {
|
||||
return Float.floatToIntBits(getBoost())
|
||||
+ Float.floatToIntBits(tieBreakerMultiplier)
|
||||
+ disjuncts.hashCode();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,195 +1,195 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* The Scorer for DisjunctionMaxQuery's. The union of all documents generated by the the subquery scorers
|
||||
* is generated in document number order. The score for each document is the maximum of the scores computed
|
||||
* by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores
|
||||
* for the other subqueries that generate the document.
|
||||
* @author Chuck Williams
|
||||
*/
|
||||
class DisjunctionMaxScorer extends Scorer {
|
||||
|
||||
/* The scorers for subqueries that have remaining docs, kept as a min heap by number of next doc. */
|
||||
private ArrayList subScorers = new ArrayList();
|
||||
|
||||
/* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
|
||||
private float tieBreakerMultiplier;
|
||||
|
||||
private boolean more = false; // True iff there is a next document
|
||||
private boolean firstTime = true; // True iff next() has not yet been called
|
||||
|
||||
/** Creates a new instance of DisjunctionMaxScorer
|
||||
* @param tieBreakerMultiplier Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result.
|
||||
* @param similarity -- not used since our definition involves neither coord nor terms directly */
|
||||
public DisjunctionMaxScorer(float tieBreakerMultiplier, Similarity similarity) {
|
||||
super(similarity);
|
||||
this.tieBreakerMultiplier = tieBreakerMultiplier;
|
||||
}
|
||||
|
||||
/** Add the scorer for a subquery
|
||||
* @param scorer the scorer of a subquery of our associated DisjunctionMaxQuery
|
||||
*/
|
||||
public void add(Scorer scorer) throws IOException {
|
||||
if (scorer.next()) { // Initialize and retain only if it produces docs
|
||||
subScorers.add(scorer);
|
||||
more = true;
|
||||
}
|
||||
}
|
||||
|
||||
/** Generate the next document matching our associated DisjunctionMaxQuery.
|
||||
* @return true iff there is a next document
|
||||
*/
|
||||
public boolean next() throws IOException {
|
||||
if (!more) return false;
|
||||
if (firstTime) {
|
||||
heapify();
|
||||
firstTime = false;
|
||||
return true; // more would have been false if no subScorers had any docs
|
||||
}
|
||||
// Increment all generators that generated the last doc and adjust the heap.
|
||||
int lastdoc = ((Scorer) subScorers.get(0)).doc();
|
||||
do {
|
||||
if (((Scorer) subScorers.get(0)).next())
|
||||
heapAdjust(0);
|
||||
else {
|
||||
heapRemoveRoot();
|
||||
if (subScorers.isEmpty()) return (more = false);
|
||||
}
|
||||
} while ( ((Scorer) subScorers.get(0)).doc()==lastdoc );
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Determine the current document number. Initially invalid, until {@link #next()} is called the first time.
|
||||
* @return the document number of the currently generated document
|
||||
*/
|
||||
public int doc() {
|
||||
return ((Scorer) subScorers.get(0)).doc();
|
||||
}
|
||||
|
||||
/** Determine the current document score. Initially invalid, until {@link #next()} is called the first time.
|
||||
* @return the score of the current generated document
|
||||
*/
|
||||
public float score() throws IOException {
|
||||
int doc = ((Scorer) subScorers.get(0)).doc();
|
||||
float[] sum = {((Scorer) subScorers.get(0)).score()}, max = {sum[0]};
|
||||
int size = subScorers.size();
|
||||
scoreAll(1, size, doc, sum, max);
|
||||
scoreAll(2, size, doc, sum, max);
|
||||
return max[0] + (sum[0] - max[0])*tieBreakerMultiplier;
|
||||
}
|
||||
|
||||
// Recursively iterate all subScorers that generated last doc computing sum and max
|
||||
private void scoreAll(int root, int size, int doc, float[] sum, float[] max) throws IOException {
|
||||
if (root<size && ((Scorer) subScorers.get(root)).doc() == doc) {
|
||||
float sub = ((Scorer) subScorers.get(root)).score();
|
||||
sum[0] += sub;
|
||||
max[0] = Math.max(max[0], sub);
|
||||
scoreAll((root<<1)+1, size, doc, sum, max);
|
||||
scoreAll((root<<1)+2, size, doc, sum, max);
|
||||
}
|
||||
}
|
||||
|
||||
/** Advance to the first document beyond the current whose number is greater than or equal to target.
|
||||
* @param target the minimum number of the next desired document
|
||||
* @return true iff there is a document to be generated whose number is at least target
|
||||
*/
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
if (firstTime) {
|
||||
if (!more) return false;
|
||||
heapify();
|
||||
firstTime = false;
|
||||
}
|
||||
|
||||
while (subScorers.size()>0 && ((Scorer)subScorers.get(0)).doc()<target) {
|
||||
if (((Scorer)subScorers.get(0)).skipTo(target))
|
||||
heapAdjust(0);
|
||||
else
|
||||
heapRemoveRoot();
|
||||
}
|
||||
if ((subScorers.size()==0))
|
||||
return (more = false);
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Explain a score that we computed. UNSUPPORTED -- see explanation capability in DisjunctionMaxQuery.
|
||||
* @param doc the number of a document we scored
|
||||
* @return the Explanation for our score
|
||||
*/
|
||||
public Explanation explain(int doc) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
// Organize subScorers into a min heap with scorers generating the earlest document on top.
|
||||
private void heapify() {
|
||||
int size = subScorers.size();
|
||||
for (int i=(size>>1)-1; i>=0; i--)
|
||||
heapAdjust(i);
|
||||
}
|
||||
|
||||
/* The subtree of subScorers at root is a min heap except possibly for its root element.
|
||||
* Bubble the root down as required to make the subtree a heap.
|
||||
*/
|
||||
private void heapAdjust(int root) {
|
||||
Scorer scorer=(Scorer)subScorers.get(root);
|
||||
int doc=scorer.doc();
|
||||
int i=root, size=subScorers.size();
|
||||
while (i<=(size>>1)-1) {
|
||||
int lchild=(i<<1)+1;
|
||||
Scorer lscorer=(Scorer)subScorers.get(lchild);
|
||||
int ldoc=lscorer.doc();
|
||||
int rdoc=Integer.MAX_VALUE, rchild=(i<<1)+2;
|
||||
Scorer rscorer=null;
|
||||
if (rchild<size) {
|
||||
rscorer=(Scorer)subScorers.get(rchild);
|
||||
rdoc=rscorer.doc();
|
||||
}
|
||||
if (ldoc<doc) {
|
||||
if (rdoc<ldoc) {
|
||||
subScorers.set(i, rscorer);
|
||||
subScorers.set(rchild, scorer);
|
||||
i=rchild;
|
||||
} else {
|
||||
subScorers.set(i, lscorer);
|
||||
subScorers.set(lchild, scorer);
|
||||
i=lchild;
|
||||
}
|
||||
} else if (rdoc<doc) {
|
||||
subScorers.set(i, rscorer);
|
||||
subScorers.set(rchild, scorer);
|
||||
i=rchild;
|
||||
} else return;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the root Scorer from subScorers and re-establish it as a heap
|
||||
private void heapRemoveRoot() {
|
||||
int size=subScorers.size();
|
||||
if (size==1)
|
||||
subScorers.remove(0);
|
||||
else {
|
||||
subScorers.set(0, subScorers.get(size-1));
|
||||
subScorers.remove(size-1);
|
||||
heapAdjust(0);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* The Scorer for DisjunctionMaxQuery's. The union of all documents generated by the the subquery scorers
|
||||
* is generated in document number order. The score for each document is the maximum of the scores computed
|
||||
* by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores
|
||||
* for the other subqueries that generate the document.
|
||||
* @author Chuck Williams
|
||||
*/
|
||||
class DisjunctionMaxScorer extends Scorer {
|
||||
|
||||
/* The scorers for subqueries that have remaining docs, kept as a min heap by number of next doc. */
|
||||
private ArrayList subScorers = new ArrayList();
|
||||
|
||||
/* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
|
||||
private float tieBreakerMultiplier;
|
||||
|
||||
private boolean more = false; // True iff there is a next document
|
||||
private boolean firstTime = true; // True iff next() has not yet been called
|
||||
|
||||
/** Creates a new instance of DisjunctionMaxScorer
|
||||
* @param tieBreakerMultiplier Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result.
|
||||
* @param similarity -- not used since our definition involves neither coord nor terms directly */
|
||||
public DisjunctionMaxScorer(float tieBreakerMultiplier, Similarity similarity) {
|
||||
super(similarity);
|
||||
this.tieBreakerMultiplier = tieBreakerMultiplier;
|
||||
}
|
||||
|
||||
/** Add the scorer for a subquery
|
||||
* @param scorer the scorer of a subquery of our associated DisjunctionMaxQuery
|
||||
*/
|
||||
public void add(Scorer scorer) throws IOException {
|
||||
if (scorer.next()) { // Initialize and retain only if it produces docs
|
||||
subScorers.add(scorer);
|
||||
more = true;
|
||||
}
|
||||
}
|
||||
|
||||
/** Generate the next document matching our associated DisjunctionMaxQuery.
|
||||
* @return true iff there is a next document
|
||||
*/
|
||||
public boolean next() throws IOException {
|
||||
if (!more) return false;
|
||||
if (firstTime) {
|
||||
heapify();
|
||||
firstTime = false;
|
||||
return true; // more would have been false if no subScorers had any docs
|
||||
}
|
||||
// Increment all generators that generated the last doc and adjust the heap.
|
||||
int lastdoc = ((Scorer) subScorers.get(0)).doc();
|
||||
do {
|
||||
if (((Scorer) subScorers.get(0)).next())
|
||||
heapAdjust(0);
|
||||
else {
|
||||
heapRemoveRoot();
|
||||
if (subScorers.isEmpty()) return (more = false);
|
||||
}
|
||||
} while ( ((Scorer) subScorers.get(0)).doc()==lastdoc );
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Determine the current document number. Initially invalid, until {@link #next()} is called the first time.
|
||||
* @return the document number of the currently generated document
|
||||
*/
|
||||
public int doc() {
|
||||
return ((Scorer) subScorers.get(0)).doc();
|
||||
}
|
||||
|
||||
/** Determine the current document score. Initially invalid, until {@link #next()} is called the first time.
|
||||
* @return the score of the current generated document
|
||||
*/
|
||||
public float score() throws IOException {
|
||||
int doc = ((Scorer) subScorers.get(0)).doc();
|
||||
float[] sum = {((Scorer) subScorers.get(0)).score()}, max = {sum[0]};
|
||||
int size = subScorers.size();
|
||||
scoreAll(1, size, doc, sum, max);
|
||||
scoreAll(2, size, doc, sum, max);
|
||||
return max[0] + (sum[0] - max[0])*tieBreakerMultiplier;
|
||||
}
|
||||
|
||||
// Recursively iterate all subScorers that generated last doc computing sum and max
|
||||
private void scoreAll(int root, int size, int doc, float[] sum, float[] max) throws IOException {
|
||||
if (root<size && ((Scorer) subScorers.get(root)).doc() == doc) {
|
||||
float sub = ((Scorer) subScorers.get(root)).score();
|
||||
sum[0] += sub;
|
||||
max[0] = Math.max(max[0], sub);
|
||||
scoreAll((root<<1)+1, size, doc, sum, max);
|
||||
scoreAll((root<<1)+2, size, doc, sum, max);
|
||||
}
|
||||
}
|
||||
|
||||
/** Advance to the first document beyond the current whose number is greater than or equal to target.
|
||||
* @param target the minimum number of the next desired document
|
||||
* @return true iff there is a document to be generated whose number is at least target
|
||||
*/
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
if (firstTime) {
|
||||
if (!more) return false;
|
||||
heapify();
|
||||
firstTime = false;
|
||||
}
|
||||
|
||||
while (subScorers.size()>0 && ((Scorer)subScorers.get(0)).doc()<target) {
|
||||
if (((Scorer)subScorers.get(0)).skipTo(target))
|
||||
heapAdjust(0);
|
||||
else
|
||||
heapRemoveRoot();
|
||||
}
|
||||
if ((subScorers.size()==0))
|
||||
return (more = false);
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Explain a score that we computed. UNSUPPORTED -- see explanation capability in DisjunctionMaxQuery.
|
||||
* @param doc the number of a document we scored
|
||||
* @return the Explanation for our score
|
||||
*/
|
||||
public Explanation explain(int doc) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
// Organize subScorers into a min heap with scorers generating the earlest document on top.
|
||||
private void heapify() {
|
||||
int size = subScorers.size();
|
||||
for (int i=(size>>1)-1; i>=0; i--)
|
||||
heapAdjust(i);
|
||||
}
|
||||
|
||||
/* The subtree of subScorers at root is a min heap except possibly for its root element.
|
||||
* Bubble the root down as required to make the subtree a heap.
|
||||
*/
|
||||
private void heapAdjust(int root) {
|
||||
Scorer scorer=(Scorer)subScorers.get(root);
|
||||
int doc=scorer.doc();
|
||||
int i=root, size=subScorers.size();
|
||||
while (i<=(size>>1)-1) {
|
||||
int lchild=(i<<1)+1;
|
||||
Scorer lscorer=(Scorer)subScorers.get(lchild);
|
||||
int ldoc=lscorer.doc();
|
||||
int rdoc=Integer.MAX_VALUE, rchild=(i<<1)+2;
|
||||
Scorer rscorer=null;
|
||||
if (rchild<size) {
|
||||
rscorer=(Scorer)subScorers.get(rchild);
|
||||
rdoc=rscorer.doc();
|
||||
}
|
||||
if (ldoc<doc) {
|
||||
if (rdoc<ldoc) {
|
||||
subScorers.set(i, rscorer);
|
||||
subScorers.set(rchild, scorer);
|
||||
i=rchild;
|
||||
} else {
|
||||
subScorers.set(i, lscorer);
|
||||
subScorers.set(lchild, scorer);
|
||||
i=lchild;
|
||||
}
|
||||
} else if (rdoc<doc) {
|
||||
subScorers.set(i, rscorer);
|
||||
subScorers.set(rchild, scorer);
|
||||
i=rchild;
|
||||
} else return;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the root Scorer from subScorers and re-establish it as a heap
|
||||
private void heapRemoveRoot() {
|
||||
int size=subScorers.size();
|
||||
if (size==1)
|
||||
subScorers.remove(0);
|
||||
else {
|
||||
subScorers.set(0, subScorers.get(size-1));
|
||||
subScorers.remove(size-1);
|
||||
heapAdjust(0);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,125 +1,125 @@
|
|||
package org.apache.lucene.util;
|
||||
/**
|
||||
* Copyright 2005 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/** Floating point numbers smaller than 32 bits.
|
||||
*
|
||||
* @author yonik
|
||||
* @version $Id$
|
||||
*/
|
||||
public class SmallFloat {
|
||||
|
||||
/** Converts a 32 bit float to an 8 bit float.
|
||||
* <br>Values less than zero are all mapped to zero.
|
||||
* <br>Values are truncated (rounded down) to the nearest 8 bit value.
|
||||
* <br>Values between zero and the smallest representable value
|
||||
* are rounded up.
|
||||
*
|
||||
* @param f the 32 bit float to be converted to an 8 bit float (byte)
|
||||
* @param numMantissaBits the number of mantissa bits to use in the byte, with the remainder to be used in the exponent
|
||||
* @param zeroExp the zero-point in the range of exponent values
|
||||
* @return the 8 bit float representation
|
||||
*/
|
||||
public static byte floatToByte(float f, int numMantissaBits, int zeroExp) {
|
||||
// Adjustment from a float zero exponent to our zero exponent,
|
||||
// shifted over to our exponent position.
|
||||
int fzero = (63-zeroExp)<<numMantissaBits;
|
||||
int bits = Float.floatToRawIntBits(f);
|
||||
int smallfloat = bits >> (24-numMantissaBits);
|
||||
if (smallfloat < fzero) {
|
||||
return (bits<=0) ?
|
||||
(byte)0 // negative numbers and zero both map to 0 byte
|
||||
:(byte)1; // underflow is mapped to smallest non-zero number.
|
||||
} else if (smallfloat >= fzero + 0x100) {
|
||||
return -1; // overflow maps to largest number
|
||||
} else {
|
||||
return (byte)(smallfloat - fzero);
|
||||
}
|
||||
}
|
||||
|
||||
/** Converts an 8 bit float to a 32 bit float. */
|
||||
public static float byteToFloat(byte b, int numMantissaBits, int zeroExp) {
|
||||
// on Java1.5 & 1.6 JVMs, prebuilding a decoding array and doing a lookup
|
||||
// is only a little bit faster (anywhere from 0% to 7%)
|
||||
if (b == 0) return 0.0f;
|
||||
int bits = (b&0xff) << (24-numMantissaBits);
|
||||
bits += (63-zeroExp) << 24;
|
||||
return Float.intBitsToFloat(bits);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Some specializations of the generic functions follow.
|
||||
// The generic functions are just as fast with current (1.5)
|
||||
// -server JVMs, but still slower with client JVMs.
|
||||
//
|
||||
|
||||
/** floatToByte(b, mantissaBits=3, zeroExponent=15)
|
||||
* <br>smallest non-zero value = 5.820766E-10
|
||||
* <br>largest value = 7.5161928E9
|
||||
* <br>epsilon = 0.125
|
||||
*/
|
||||
public static byte floatToByte315(float f) {
|
||||
int bits = Float.floatToRawIntBits(f);
|
||||
int smallfloat = bits >> (24-3);
|
||||
if (smallfloat < (63-15)<<3) {
|
||||
return (bits<=0) ? (byte)0 : (byte)1;
|
||||
}
|
||||
if (smallfloat >= ((63-15)<<3) + 0x100) {
|
||||
return -1;
|
||||
}
|
||||
return (byte)(smallfloat - ((63-15)<<3));
|
||||
}
|
||||
|
||||
/** byteToFloat(b, mantissaBits=3, zeroExponent=15) */
|
||||
public static float byte315ToFloat(byte b) {
|
||||
// on Java1.5 & 1.6 JVMs, prebuilding a decoding array and doing a lookup
|
||||
// is only a little bit faster (anywhere from 0% to 7%)
|
||||
if (b == 0) return 0.0f;
|
||||
int bits = (b&0xff) << (24-3);
|
||||
bits += (63-15) << 24;
|
||||
return Float.intBitsToFloat(bits);
|
||||
}
|
||||
|
||||
|
||||
/** floatToByte(b, mantissaBits=5, zeroExponent=2)
|
||||
* <br>smallest nonzero value = 0.033203125
|
||||
* <br>largest value = 1984.0
|
||||
* <br>epsilon = 0.03125
|
||||
*/
|
||||
public static byte floatToByte52(float f) {
|
||||
int bits = Float.floatToRawIntBits(f);
|
||||
int smallfloat = bits >> (24-5);
|
||||
if (smallfloat < (63-2)<<5) {
|
||||
return (bits<=0) ? (byte)0 : (byte)1;
|
||||
}
|
||||
if (smallfloat >= ((63-2)<<5) + 0x100) {
|
||||
return -1;
|
||||
}
|
||||
return (byte)(smallfloat - ((63-2)<<5));
|
||||
}
|
||||
|
||||
/** byteToFloat(b, mantissaBits=5, zeroExponent=2) */
|
||||
public static float byte52ToFloat(byte b) {
|
||||
// on Java1.5 & 1.6 JVMs, prebuilding a decoding array and doing a lookup
|
||||
// is only a little bit faster (anywhere from 0% to 7%)
|
||||
if (b == 0) return 0.0f;
|
||||
int bits = (b&0xff) << (24-5);
|
||||
bits += (63-2) << 24;
|
||||
return Float.intBitsToFloat(bits);
|
||||
}
|
||||
}
|
||||
package org.apache.lucene.util;
|
||||
/**
|
||||
* Copyright 2005 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/** Floating point numbers smaller than 32 bits.
|
||||
*
|
||||
* @author yonik
|
||||
* @version $Id$
|
||||
*/
|
||||
public class SmallFloat {
|
||||
|
||||
/** Converts a 32 bit float to an 8 bit float.
|
||||
* <br>Values less than zero are all mapped to zero.
|
||||
* <br>Values are truncated (rounded down) to the nearest 8 bit value.
|
||||
* <br>Values between zero and the smallest representable value
|
||||
* are rounded up.
|
||||
*
|
||||
* @param f the 32 bit float to be converted to an 8 bit float (byte)
|
||||
* @param numMantissaBits the number of mantissa bits to use in the byte, with the remainder to be used in the exponent
|
||||
* @param zeroExp the zero-point in the range of exponent values
|
||||
* @return the 8 bit float representation
|
||||
*/
|
||||
public static byte floatToByte(float f, int numMantissaBits, int zeroExp) {
|
||||
// Adjustment from a float zero exponent to our zero exponent,
|
||||
// shifted over to our exponent position.
|
||||
int fzero = (63-zeroExp)<<numMantissaBits;
|
||||
int bits = Float.floatToRawIntBits(f);
|
||||
int smallfloat = bits >> (24-numMantissaBits);
|
||||
if (smallfloat < fzero) {
|
||||
return (bits<=0) ?
|
||||
(byte)0 // negative numbers and zero both map to 0 byte
|
||||
:(byte)1; // underflow is mapped to smallest non-zero number.
|
||||
} else if (smallfloat >= fzero + 0x100) {
|
||||
return -1; // overflow maps to largest number
|
||||
} else {
|
||||
return (byte)(smallfloat - fzero);
|
||||
}
|
||||
}
|
||||
|
||||
/** Converts an 8 bit float to a 32 bit float. */
|
||||
public static float byteToFloat(byte b, int numMantissaBits, int zeroExp) {
|
||||
// on Java1.5 & 1.6 JVMs, prebuilding a decoding array and doing a lookup
|
||||
// is only a little bit faster (anywhere from 0% to 7%)
|
||||
if (b == 0) return 0.0f;
|
||||
int bits = (b&0xff) << (24-numMantissaBits);
|
||||
bits += (63-zeroExp) << 24;
|
||||
return Float.intBitsToFloat(bits);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Some specializations of the generic functions follow.
|
||||
// The generic functions are just as fast with current (1.5)
|
||||
// -server JVMs, but still slower with client JVMs.
|
||||
//
|
||||
|
||||
/** floatToByte(b, mantissaBits=3, zeroExponent=15)
|
||||
* <br>smallest non-zero value = 5.820766E-10
|
||||
* <br>largest value = 7.5161928E9
|
||||
* <br>epsilon = 0.125
|
||||
*/
|
||||
public static byte floatToByte315(float f) {
|
||||
int bits = Float.floatToRawIntBits(f);
|
||||
int smallfloat = bits >> (24-3);
|
||||
if (smallfloat < (63-15)<<3) {
|
||||
return (bits<=0) ? (byte)0 : (byte)1;
|
||||
}
|
||||
if (smallfloat >= ((63-15)<<3) + 0x100) {
|
||||
return -1;
|
||||
}
|
||||
return (byte)(smallfloat - ((63-15)<<3));
|
||||
}
|
||||
|
||||
/** byteToFloat(b, mantissaBits=3, zeroExponent=15) */
|
||||
public static float byte315ToFloat(byte b) {
|
||||
// on Java1.5 & 1.6 JVMs, prebuilding a decoding array and doing a lookup
|
||||
// is only a little bit faster (anywhere from 0% to 7%)
|
||||
if (b == 0) return 0.0f;
|
||||
int bits = (b&0xff) << (24-3);
|
||||
bits += (63-15) << 24;
|
||||
return Float.intBitsToFloat(bits);
|
||||
}
|
||||
|
||||
|
||||
/** floatToByte(b, mantissaBits=5, zeroExponent=2)
|
||||
* <br>smallest nonzero value = 0.033203125
|
||||
* <br>largest value = 1984.0
|
||||
* <br>epsilon = 0.03125
|
||||
*/
|
||||
public static byte floatToByte52(float f) {
|
||||
int bits = Float.floatToRawIntBits(f);
|
||||
int smallfloat = bits >> (24-5);
|
||||
if (smallfloat < (63-2)<<5) {
|
||||
return (bits<=0) ? (byte)0 : (byte)1;
|
||||
}
|
||||
if (smallfloat >= ((63-2)<<5) + 0x100) {
|
||||
return -1;
|
||||
}
|
||||
return (byte)(smallfloat - ((63-2)<<5));
|
||||
}
|
||||
|
||||
/** byteToFloat(b, mantissaBits=5, zeroExponent=2) */
|
||||
public static float byte52ToFloat(byte b) {
|
||||
// on Java1.5 & 1.6 JVMs, prebuilding a decoding array and doing a lookup
|
||||
// is only a little bit faster (anywhere from 0% to 7%)
|
||||
if (b == 0) return 0.0f;
|
||||
int bits = (b&0xff) << (24-5);
|
||||
bits += (63-2) << 24;
|
||||
return Float.intBitsToFloat(bits);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,103 +1,103 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.TermVector;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermPositions;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
public class TestCachingTokenFilter extends LuceneTestCase {
|
||||
private String[] tokens = new String[] {"term1", "term2", "term3", "term2"};
|
||||
|
||||
public void testCaching() throws IOException {
|
||||
Directory dir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
|
||||
Document doc = new Document();
|
||||
TokenStream stream = new TokenStream() {
|
||||
private int index = 0;
|
||||
|
||||
public Token next() throws IOException {
|
||||
if (index == tokens.length) {
|
||||
return null;
|
||||
} else {
|
||||
return new Token(tokens[index++], 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
stream = new CachingTokenFilter(stream);
|
||||
|
||||
doc.add(new Field("preanalyzed", stream, TermVector.NO));
|
||||
|
||||
// 1) we consume all tokens twice before we add the doc to the index
|
||||
checkTokens(stream);
|
||||
stream.reset();
|
||||
checkTokens(stream);
|
||||
|
||||
// 2) now add the document to the index and verify if all tokens are indexed
|
||||
// don't reset the stream here, the DocumentWriter should do that implicitly
|
||||
writer.addDocument(doc);
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
TermPositions termPositions = reader.termPositions(new Term("preanalyzed", "term1"));
|
||||
assertTrue(termPositions.next());
|
||||
assertEquals(1, termPositions.freq());
|
||||
assertEquals(0, termPositions.nextPosition());
|
||||
|
||||
termPositions.seek(new Term("preanalyzed", "term2"));
|
||||
assertTrue(termPositions.next());
|
||||
assertEquals(2, termPositions.freq());
|
||||
assertEquals(1, termPositions.nextPosition());
|
||||
assertEquals(3, termPositions.nextPosition());
|
||||
|
||||
termPositions.seek(new Term("preanalyzed", "term3"));
|
||||
assertTrue(termPositions.next());
|
||||
assertEquals(1, termPositions.freq());
|
||||
assertEquals(2, termPositions.nextPosition());
|
||||
reader.close();
|
||||
|
||||
// 3) reset stream and consume tokens again
|
||||
stream.reset();
|
||||
checkTokens(stream);
|
||||
}
|
||||
|
||||
private void checkTokens(TokenStream stream) throws IOException {
|
||||
int count = 0;
|
||||
Token token;
|
||||
while ((token = stream.next()) != null) {
|
||||
assertTrue(count < tokens.length);
|
||||
assertEquals(tokens[count], token.termText());
|
||||
count++;
|
||||
}
|
||||
|
||||
assertEquals(tokens.length, count);
|
||||
}
|
||||
}
|
||||
package org.apache.lucene.analysis;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.TermVector;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermPositions;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
public class TestCachingTokenFilter extends LuceneTestCase {
|
||||
private String[] tokens = new String[] {"term1", "term2", "term3", "term2"};
|
||||
|
||||
public void testCaching() throws IOException {
|
||||
Directory dir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
|
||||
Document doc = new Document();
|
||||
TokenStream stream = new TokenStream() {
|
||||
private int index = 0;
|
||||
|
||||
public Token next() throws IOException {
|
||||
if (index == tokens.length) {
|
||||
return null;
|
||||
} else {
|
||||
return new Token(tokens[index++], 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
stream = new CachingTokenFilter(stream);
|
||||
|
||||
doc.add(new Field("preanalyzed", stream, TermVector.NO));
|
||||
|
||||
// 1) we consume all tokens twice before we add the doc to the index
|
||||
checkTokens(stream);
|
||||
stream.reset();
|
||||
checkTokens(stream);
|
||||
|
||||
// 2) now add the document to the index and verify if all tokens are indexed
|
||||
// don't reset the stream here, the DocumentWriter should do that implicitly
|
||||
writer.addDocument(doc);
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
TermPositions termPositions = reader.termPositions(new Term("preanalyzed", "term1"));
|
||||
assertTrue(termPositions.next());
|
||||
assertEquals(1, termPositions.freq());
|
||||
assertEquals(0, termPositions.nextPosition());
|
||||
|
||||
termPositions.seek(new Term("preanalyzed", "term2"));
|
||||
assertTrue(termPositions.next());
|
||||
assertEquals(2, termPositions.freq());
|
||||
assertEquals(1, termPositions.nextPosition());
|
||||
assertEquals(3, termPositions.nextPosition());
|
||||
|
||||
termPositions.seek(new Term("preanalyzed", "term3"));
|
||||
assertTrue(termPositions.next());
|
||||
assertEquals(1, termPositions.freq());
|
||||
assertEquals(2, termPositions.nextPosition());
|
||||
reader.close();
|
||||
|
||||
// 3) reset stream and consume tokens again
|
||||
stream.reset();
|
||||
checkTokens(stream);
|
||||
}
|
||||
|
||||
private void checkTokens(TokenStream stream) throws IOException {
|
||||
int count = 0;
|
||||
Token token;
|
||||
while ((token = stream.next()) != null) {
|
||||
assertTrue(count < tokens.length);
|
||||
assertEquals(tokens[count], token.termText());
|
||||
count++;
|
||||
}
|
||||
|
||||
assertEquals(tokens.length, count);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,128 +1,128 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
/**
|
||||
* Copyright 2005 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.English;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author yonik
|
||||
*/
|
||||
public class TestStopFilter extends LuceneTestCase {
|
||||
|
||||
private final static boolean VERBOSE = false;
|
||||
|
||||
// other StopFilter functionality is already tested by TestStopAnalyzer
|
||||
|
||||
public void testExactCase() throws IOException {
|
||||
StringReader reader = new StringReader("Now is The Time");
|
||||
String[] stopWords = new String[] { "is", "the", "Time" };
|
||||
TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopWords);
|
||||
assertEquals("Now", stream.next().termText());
|
||||
assertEquals("The", stream.next().termText());
|
||||
assertEquals(null, stream.next());
|
||||
}
|
||||
|
||||
public void testIgnoreCase() throws IOException {
|
||||
StringReader reader = new StringReader("Now is The Time");
|
||||
String[] stopWords = new String[] { "is", "the", "Time" };
|
||||
TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopWords, true);
|
||||
assertEquals("Now", stream.next().termText());
|
||||
assertEquals(null,stream.next());
|
||||
}
|
||||
|
||||
public void testStopFilt() throws IOException {
|
||||
StringReader reader = new StringReader("Now is The Time");
|
||||
String[] stopWords = new String[] { "is", "the", "Time" };
|
||||
Set stopSet = StopFilter.makeStopSet(stopWords);
|
||||
TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
|
||||
assertEquals("Now", stream.next().termText());
|
||||
assertEquals("The", stream.next().termText());
|
||||
assertEquals(null, stream.next());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Position increments applied by StopFilter with and without enabling this option.
|
||||
*/
|
||||
public void testStopPositons() throws IOException {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
ArrayList a = new ArrayList();
|
||||
for (int i=0; i<20; i++) {
|
||||
String w = English.intToEnglish(i).trim();
|
||||
sb.append(w).append(" ");
|
||||
if (i%3 != 0) a.add(w);
|
||||
}
|
||||
log(sb.toString());
|
||||
String stopWords[] = (String[]) a.toArray(new String[0]);
|
||||
for (int i=0; i<a.size(); i++) log("Stop: "+stopWords[i]);
|
||||
Set stopSet = StopFilter.makeStopSet(stopWords);
|
||||
// with increments
|
||||
StringReader reader = new StringReader(sb.toString());
|
||||
StopFilter stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
|
||||
doTestStopPositons(stpf,true);
|
||||
// without increments
|
||||
reader = new StringReader(sb.toString());
|
||||
stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
|
||||
doTestStopPositons(stpf,false);
|
||||
// with increments, concatenating two stop filters
|
||||
ArrayList a0 = new ArrayList();
|
||||
ArrayList a1 = new ArrayList();
|
||||
for (int i=0; i<a.size(); i++) {
|
||||
if (i%2==0) {
|
||||
a0.add(a.get(i));
|
||||
} else {
|
||||
a1.add(a.get(i));
|
||||
}
|
||||
}
|
||||
String stopWords0[] = (String[]) a0.toArray(new String[0]);
|
||||
for (int i=0; i<a0.size(); i++) log("Stop0: "+stopWords0[i]);
|
||||
String stopWords1[] = (String[]) a1.toArray(new String[0]);
|
||||
for (int i=0; i<a1.size(); i++) log("Stop1: "+stopWords1[i]);
|
||||
Set stopSet0 = StopFilter.makeStopSet(stopWords0);
|
||||
Set stopSet1 = StopFilter.makeStopSet(stopWords1);
|
||||
reader = new StringReader(sb.toString());
|
||||
StopFilter stpf0 = new StopFilter(new WhitespaceTokenizer(reader), stopSet0); // first part of the set
|
||||
stpf0.setEnablePositionIncrements(true);
|
||||
StopFilter stpf01 = new StopFilter(stpf0, stopSet1); // two stop filters concatenated!
|
||||
doTestStopPositons(stpf01,true);
|
||||
}
|
||||
|
||||
private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws IOException {
|
||||
log("---> test with enable-increments-"+(enableIcrements?"enabled":"disabled"));
|
||||
stpf.setEnablePositionIncrements(enableIcrements);
|
||||
for (int i=0; i<20; i+=3) {
|
||||
Token t = stpf.next();
|
||||
log("Token "+i+": "+t);
|
||||
String w = English.intToEnglish(i).trim();
|
||||
assertEquals("expecting token "+i+" to be "+w,w,t.termText());
|
||||
assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,t.getPositionIncrement());
|
||||
}
|
||||
assertNull(stpf.next());
|
||||
}
|
||||
|
||||
// print debug info depending on VERBOSE
|
||||
private static void log(String s) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
package org.apache.lucene.analysis;
|
||||
|
||||
/**
|
||||
* Copyright 2005 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.English;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author yonik
|
||||
*/
|
||||
public class TestStopFilter extends LuceneTestCase {
|
||||
|
||||
private final static boolean VERBOSE = false;
|
||||
|
||||
// other StopFilter functionality is already tested by TestStopAnalyzer
|
||||
|
||||
public void testExactCase() throws IOException {
|
||||
StringReader reader = new StringReader("Now is The Time");
|
||||
String[] stopWords = new String[] { "is", "the", "Time" };
|
||||
TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopWords);
|
||||
assertEquals("Now", stream.next().termText());
|
||||
assertEquals("The", stream.next().termText());
|
||||
assertEquals(null, stream.next());
|
||||
}
|
||||
|
||||
public void testIgnoreCase() throws IOException {
|
||||
StringReader reader = new StringReader("Now is The Time");
|
||||
String[] stopWords = new String[] { "is", "the", "Time" };
|
||||
TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopWords, true);
|
||||
assertEquals("Now", stream.next().termText());
|
||||
assertEquals(null,stream.next());
|
||||
}
|
||||
|
||||
public void testStopFilt() throws IOException {
|
||||
StringReader reader = new StringReader("Now is The Time");
|
||||
String[] stopWords = new String[] { "is", "the", "Time" };
|
||||
Set stopSet = StopFilter.makeStopSet(stopWords);
|
||||
TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
|
||||
assertEquals("Now", stream.next().termText());
|
||||
assertEquals("The", stream.next().termText());
|
||||
assertEquals(null, stream.next());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Position increments applied by StopFilter with and without enabling this option.
|
||||
*/
|
||||
public void testStopPositons() throws IOException {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
ArrayList a = new ArrayList();
|
||||
for (int i=0; i<20; i++) {
|
||||
String w = English.intToEnglish(i).trim();
|
||||
sb.append(w).append(" ");
|
||||
if (i%3 != 0) a.add(w);
|
||||
}
|
||||
log(sb.toString());
|
||||
String stopWords[] = (String[]) a.toArray(new String[0]);
|
||||
for (int i=0; i<a.size(); i++) log("Stop: "+stopWords[i]);
|
||||
Set stopSet = StopFilter.makeStopSet(stopWords);
|
||||
// with increments
|
||||
StringReader reader = new StringReader(sb.toString());
|
||||
StopFilter stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
|
||||
doTestStopPositons(stpf,true);
|
||||
// without increments
|
||||
reader = new StringReader(sb.toString());
|
||||
stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
|
||||
doTestStopPositons(stpf,false);
|
||||
// with increments, concatenating two stop filters
|
||||
ArrayList a0 = new ArrayList();
|
||||
ArrayList a1 = new ArrayList();
|
||||
for (int i=0; i<a.size(); i++) {
|
||||
if (i%2==0) {
|
||||
a0.add(a.get(i));
|
||||
} else {
|
||||
a1.add(a.get(i));
|
||||
}
|
||||
}
|
||||
String stopWords0[] = (String[]) a0.toArray(new String[0]);
|
||||
for (int i=0; i<a0.size(); i++) log("Stop0: "+stopWords0[i]);
|
||||
String stopWords1[] = (String[]) a1.toArray(new String[0]);
|
||||
for (int i=0; i<a1.size(); i++) log("Stop1: "+stopWords1[i]);
|
||||
Set stopSet0 = StopFilter.makeStopSet(stopWords0);
|
||||
Set stopSet1 = StopFilter.makeStopSet(stopWords1);
|
||||
reader = new StringReader(sb.toString());
|
||||
StopFilter stpf0 = new StopFilter(new WhitespaceTokenizer(reader), stopSet0); // first part of the set
|
||||
stpf0.setEnablePositionIncrements(true);
|
||||
StopFilter stpf01 = new StopFilter(stpf0, stopSet1); // two stop filters concatenated!
|
||||
doTestStopPositons(stpf01,true);
|
||||
}
|
||||
|
||||
private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws IOException {
|
||||
log("---> test with enable-increments-"+(enableIcrements?"enabled":"disabled"));
|
||||
stpf.setEnablePositionIncrements(enableIcrements);
|
||||
for (int i=0; i<20; i+=3) {
|
||||
Token t = stpf.next();
|
||||
log("Token "+i+": "+t);
|
||||
String w = English.intToEnglish(i).trim();
|
||||
assertEquals("expecting token "+i+" to be "+w,w,t.termText());
|
||||
assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,t.getPositionIncrement());
|
||||
}
|
||||
assertNull(stpf.next());
|
||||
}
|
||||
|
||||
// print debug info depending on VERBOSE
|
||||
private static void log(String s) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,108 +1,108 @@
|
|||
package org.apache.lucene.index;
|
||||
/**
|
||||
* Copyright 2006 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockRAMDirectory;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
public class TestIndexWriterMerging extends LuceneTestCase
|
||||
{
|
||||
|
||||
/**
|
||||
* Tests that index merging (specifically addIndexes()) doesn't
|
||||
* change the index order of documents.
|
||||
*/
|
||||
public void testLucene() throws IOException
|
||||
{
|
||||
|
||||
int num=100;
|
||||
|
||||
Directory indexA = new MockRAMDirectory();
|
||||
Directory indexB = new MockRAMDirectory();
|
||||
|
||||
fillIndex(indexA, 0, num);
|
||||
boolean fail = verifyIndex(indexA, 0);
|
||||
if (fail)
|
||||
{
|
||||
fail("Index a is invalid");
|
||||
}
|
||||
|
||||
fillIndex(indexB, num, num);
|
||||
fail = verifyIndex(indexB, num);
|
||||
if (fail)
|
||||
{
|
||||
fail("Index b is invalid");
|
||||
}
|
||||
|
||||
Directory merged = new MockRAMDirectory();
|
||||
|
||||
IndexWriter writer = new IndexWriter(merged, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
writer.setMergeFactor(2);
|
||||
|
||||
writer.addIndexes(new Directory[]{indexA, indexB});
|
||||
writer.close();
|
||||
|
||||
fail = verifyIndex(merged, 0);
|
||||
merged.close();
|
||||
|
||||
assertFalse("The merged index is invalid", fail);
|
||||
}
|
||||
|
||||
private boolean verifyIndex(Directory directory, int startAt) throws IOException
|
||||
{
|
||||
boolean fail = false;
|
||||
IndexReader reader = IndexReader.open(directory);
|
||||
|
||||
int max = reader.maxDoc();
|
||||
for (int i = 0; i < max; i++)
|
||||
{
|
||||
Document temp = reader.document(i);
|
||||
//System.out.println("doc "+i+"="+temp.getField("count").stringValue());
|
||||
//compare the index doc number to the value that it should be
|
||||
if (!temp.getField("count").stringValue().equals((i + startAt) + ""))
|
||||
{
|
||||
fail = true;
|
||||
System.out.println("Document " + (i + startAt) + " is returning document " + temp.getField("count").stringValue());
|
||||
}
|
||||
}
|
||||
reader.close();
|
||||
return fail;
|
||||
}
|
||||
|
||||
private void fillIndex(Directory dir, int start, int numDocs) throws IOException
|
||||
{
|
||||
|
||||
IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
writer.setMergeFactor(2);
|
||||
writer.setMaxBufferedDocs(2);
|
||||
|
||||
for (int i = start; i < (start + numDocs); i++)
|
||||
{
|
||||
Document temp = new Document();
|
||||
temp.add(new Field("count", (""+i), Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
|
||||
writer.addDocument(temp);
|
||||
}
|
||||
writer.close();
|
||||
}
|
||||
}
|
||||
package org.apache.lucene.index;
|
||||
/**
|
||||
* Copyright 2006 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockRAMDirectory;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
public class TestIndexWriterMerging extends LuceneTestCase
|
||||
{
|
||||
|
||||
/**
|
||||
* Tests that index merging (specifically addIndexes()) doesn't
|
||||
* change the index order of documents.
|
||||
*/
|
||||
public void testLucene() throws IOException
|
||||
{
|
||||
|
||||
int num=100;
|
||||
|
||||
Directory indexA = new MockRAMDirectory();
|
||||
Directory indexB = new MockRAMDirectory();
|
||||
|
||||
fillIndex(indexA, 0, num);
|
||||
boolean fail = verifyIndex(indexA, 0);
|
||||
if (fail)
|
||||
{
|
||||
fail("Index a is invalid");
|
||||
}
|
||||
|
||||
fillIndex(indexB, num, num);
|
||||
fail = verifyIndex(indexB, num);
|
||||
if (fail)
|
||||
{
|
||||
fail("Index b is invalid");
|
||||
}
|
||||
|
||||
Directory merged = new MockRAMDirectory();
|
||||
|
||||
IndexWriter writer = new IndexWriter(merged, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
writer.setMergeFactor(2);
|
||||
|
||||
writer.addIndexes(new Directory[]{indexA, indexB});
|
||||
writer.close();
|
||||
|
||||
fail = verifyIndex(merged, 0);
|
||||
merged.close();
|
||||
|
||||
assertFalse("The merged index is invalid", fail);
|
||||
}
|
||||
|
||||
private boolean verifyIndex(Directory directory, int startAt) throws IOException
|
||||
{
|
||||
boolean fail = false;
|
||||
IndexReader reader = IndexReader.open(directory);
|
||||
|
||||
int max = reader.maxDoc();
|
||||
for (int i = 0; i < max; i++)
|
||||
{
|
||||
Document temp = reader.document(i);
|
||||
//System.out.println("doc "+i+"="+temp.getField("count").stringValue());
|
||||
//compare the index doc number to the value that it should be
|
||||
if (!temp.getField("count").stringValue().equals((i + startAt) + ""))
|
||||
{
|
||||
fail = true;
|
||||
System.out.println("Document " + (i + startAt) + " is returning document " + temp.getField("count").stringValue());
|
||||
}
|
||||
}
|
||||
reader.close();
|
||||
return fail;
|
||||
}
|
||||
|
||||
private void fillIndex(Directory dir, int start, int numDocs) throws IOException
|
||||
{
|
||||
|
||||
IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
writer.setMergeFactor(2);
|
||||
writer.setMaxBufferedDocs(2);
|
||||
|
||||
for (int i = start; i < (start + numDocs); i++)
|
||||
{
|
||||
Document temp = new Document();
|
||||
temp.add(new Field("count", (""+i), Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
|
||||
writer.addDocument(temp);
|
||||
}
|
||||
writer.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,158 +1,158 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseTokenizer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
/**
|
||||
* This testcase tests whether multi-level skipping is being used
|
||||
* to reduce I/O while skipping through posting lists.
|
||||
*
|
||||
* Skipping in general is already covered by several other
|
||||
* testcases.
|
||||
*
|
||||
*/
|
||||
public class TestMultiLevelSkipList extends LuceneTestCase {
|
||||
public void testSimpleSkip() throws IOException {
|
||||
RAMDirectory dir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true,
|
||||
IndexWriter.MaxFieldLength.LIMITED);
|
||||
Term term = new Term("test", "a");
|
||||
for (int i = 0; i < 5000; i++) {
|
||||
Document d1 = new Document();
|
||||
d1.add(new Field(term.field(), term.text(), Store.NO, Index.TOKENIZED));
|
||||
writer.addDocument(d1);
|
||||
}
|
||||
writer.flush();
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
SegmentTermPositions tp = (SegmentTermPositions) reader.termPositions();
|
||||
tp.freqStream = new CountingStream(tp.freqStream);
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
counter = 0;
|
||||
tp.seek(term);
|
||||
|
||||
checkSkipTo(tp, 14, 185); // no skips
|
||||
checkSkipTo(tp, 17, 190); // one skip on level 0
|
||||
checkSkipTo(tp, 287, 200); // one skip on level 1, two on level 0
|
||||
|
||||
// this test would fail if we had only one skip level,
|
||||
// because than more bytes would be read from the freqStream
|
||||
checkSkipTo(tp, 4800, 250);// one skip on level 2
|
||||
}
|
||||
}
|
||||
|
||||
public void checkSkipTo(TermPositions tp, int target, int maxCounter) throws IOException {
|
||||
tp.skipTo(target);
|
||||
if (maxCounter < counter) {
|
||||
fail("Too many bytes read: " + counter);
|
||||
}
|
||||
|
||||
assertEquals("Wrong document " + tp.doc() + " after skipTo target " + target, target, tp.doc());
|
||||
assertEquals("Frequency is not 1: " + tp.freq(), 1,tp.freq());
|
||||
tp.nextPosition();
|
||||
byte[] b = new byte[1];
|
||||
tp.getPayload(b, 0);
|
||||
assertEquals("Wrong payload for the target " + target + ": " + b[0], (byte) target, b[0]);
|
||||
}
|
||||
|
||||
private static class PayloadAnalyzer extends Analyzer {
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new PayloadFilter(new LowerCaseTokenizer(reader));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class PayloadFilter extends TokenFilter {
|
||||
static int count = 0;
|
||||
|
||||
protected PayloadFilter(TokenStream input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
public Token next() throws IOException {
|
||||
Token t = input.next();
|
||||
if (t != null) {
|
||||
t.setPayload(new Payload(new byte[] { (byte) count++ }));
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private int counter = 0;
|
||||
|
||||
// Simply extends IndexInput in a way that we are able to count the number
|
||||
// of bytes read
|
||||
class CountingStream extends IndexInput {
|
||||
private IndexInput input;
|
||||
|
||||
CountingStream(IndexInput input) {
|
||||
this.input = input;
|
||||
}
|
||||
|
||||
public byte readByte() throws IOException {
|
||||
TestMultiLevelSkipList.this.counter++;
|
||||
return this.input.readByte();
|
||||
}
|
||||
|
||||
public void readBytes(byte[] b, int offset, int len) throws IOException {
|
||||
TestMultiLevelSkipList.this.counter += len;
|
||||
this.input.readBytes(b, offset, len);
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
this.input.close();
|
||||
}
|
||||
|
||||
public long getFilePointer() {
|
||||
return this.input.getFilePointer();
|
||||
}
|
||||
|
||||
public void seek(long pos) throws IOException {
|
||||
this.input.seek(pos);
|
||||
}
|
||||
|
||||
public long length() {
|
||||
return this.input.length();
|
||||
}
|
||||
|
||||
public Object clone() {
|
||||
return new CountingStream((IndexInput) this.input.clone());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseTokenizer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
/**
|
||||
* This testcase tests whether multi-level skipping is being used
|
||||
* to reduce I/O while skipping through posting lists.
|
||||
*
|
||||
* Skipping in general is already covered by several other
|
||||
* testcases.
|
||||
*
|
||||
*/
|
||||
public class TestMultiLevelSkipList extends LuceneTestCase {
|
||||
public void testSimpleSkip() throws IOException {
|
||||
RAMDirectory dir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true,
|
||||
IndexWriter.MaxFieldLength.LIMITED);
|
||||
Term term = new Term("test", "a");
|
||||
for (int i = 0; i < 5000; i++) {
|
||||
Document d1 = new Document();
|
||||
d1.add(new Field(term.field(), term.text(), Store.NO, Index.TOKENIZED));
|
||||
writer.addDocument(d1);
|
||||
}
|
||||
writer.flush();
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
SegmentTermPositions tp = (SegmentTermPositions) reader.termPositions();
|
||||
tp.freqStream = new CountingStream(tp.freqStream);
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
counter = 0;
|
||||
tp.seek(term);
|
||||
|
||||
checkSkipTo(tp, 14, 185); // no skips
|
||||
checkSkipTo(tp, 17, 190); // one skip on level 0
|
||||
checkSkipTo(tp, 287, 200); // one skip on level 1, two on level 0
|
||||
|
||||
// this test would fail if we had only one skip level,
|
||||
// because than more bytes would be read from the freqStream
|
||||
checkSkipTo(tp, 4800, 250);// one skip on level 2
|
||||
}
|
||||
}
|
||||
|
||||
public void checkSkipTo(TermPositions tp, int target, int maxCounter) throws IOException {
|
||||
tp.skipTo(target);
|
||||
if (maxCounter < counter) {
|
||||
fail("Too many bytes read: " + counter);
|
||||
}
|
||||
|
||||
assertEquals("Wrong document " + tp.doc() + " after skipTo target " + target, target, tp.doc());
|
||||
assertEquals("Frequency is not 1: " + tp.freq(), 1,tp.freq());
|
||||
tp.nextPosition();
|
||||
byte[] b = new byte[1];
|
||||
tp.getPayload(b, 0);
|
||||
assertEquals("Wrong payload for the target " + target + ": " + b[0], (byte) target, b[0]);
|
||||
}
|
||||
|
||||
private static class PayloadAnalyzer extends Analyzer {
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new PayloadFilter(new LowerCaseTokenizer(reader));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class PayloadFilter extends TokenFilter {
|
||||
static int count = 0;
|
||||
|
||||
protected PayloadFilter(TokenStream input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
public Token next() throws IOException {
|
||||
Token t = input.next();
|
||||
if (t != null) {
|
||||
t.setPayload(new Payload(new byte[] { (byte) count++ }));
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private int counter = 0;
|
||||
|
||||
// Simply extends IndexInput in a way that we are able to count the number
|
||||
// of bytes read
|
||||
class CountingStream extends IndexInput {
|
||||
private IndexInput input;
|
||||
|
||||
CountingStream(IndexInput input) {
|
||||
this.input = input;
|
||||
}
|
||||
|
||||
public byte readByte() throws IOException {
|
||||
TestMultiLevelSkipList.this.counter++;
|
||||
return this.input.readByte();
|
||||
}
|
||||
|
||||
public void readBytes(byte[] b, int offset, int len) throws IOException {
|
||||
TestMultiLevelSkipList.this.counter += len;
|
||||
this.input.readBytes(b, offset, len);
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
this.input.close();
|
||||
}
|
||||
|
||||
public long getFilePointer() {
|
||||
return this.input.getFilePointer();
|
||||
}
|
||||
|
||||
public void seek(long pos) throws IOException {
|
||||
this.input.seek(pos);
|
||||
}
|
||||
|
||||
public long length() {
|
||||
return this.input.length();
|
||||
}
|
||||
|
||||
public Object clone() {
|
||||
return new CountingStream((IndexInput) this.input.clone());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,115 +1,115 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Copyright 2006 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* @author yonik
|
||||
* @version $Id$
|
||||
*/
|
||||
|
||||
class RepeatingTokenStream extends TokenStream {
|
||||
public int num;
|
||||
Token t;
|
||||
|
||||
public RepeatingTokenStream(String val) {
|
||||
t = new Token(val,0,val.length());
|
||||
}
|
||||
|
||||
public Token next() throws IOException {
|
||||
return --num<0 ? null : t;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public class TestTermdocPerf extends LuceneTestCase {
|
||||
|
||||
void addDocs(Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
|
||||
final Random random = new Random(0);
|
||||
final RepeatingTokenStream ts = new RepeatingTokenStream(val);
|
||||
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
if (random.nextFloat() < percentDocs) ts.num = random.nextInt(maxTF)+1;
|
||||
else ts.num=0;
|
||||
return ts;
|
||||
}
|
||||
};
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(field,val, Field.Store.NO, Field.Index.NO_NORMS));
|
||||
IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
writer.setMaxBufferedDocs(100);
|
||||
writer.setMergeFactor(100);
|
||||
|
||||
for (int i=0; i<ndocs; i++) {
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
}
|
||||
|
||||
|
||||
public int doTest(int iter, int ndocs, int maxTF, float percentDocs) throws IOException {
|
||||
Directory dir = new RAMDirectory();
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
addDocs(dir, ndocs, "foo", "val", maxTF, percentDocs);
|
||||
long end = System.currentTimeMillis();
|
||||
System.out.println("milliseconds for creation of " + ndocs + " docs = " + (end-start));
|
||||
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
TermEnum tenum = reader.terms(new Term("foo","val"));
|
||||
TermDocs tdocs = reader.termDocs();
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
|
||||
int ret=0;
|
||||
for (int i=0; i<iter; i++) {
|
||||
tdocs.seek(tenum);
|
||||
while (tdocs.next()) {
|
||||
ret += tdocs.doc();
|
||||
}
|
||||
}
|
||||
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("milliseconds for " + iter + " TermDocs iteration: " + (end-start));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
public void testTermDocPerf() throws IOException {
|
||||
// performance test for 10% of documents containing a term
|
||||
// doTest(100000, 10000,3,.1f);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Copyright 2006 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* @author yonik
|
||||
* @version $Id$
|
||||
*/
|
||||
|
||||
class RepeatingTokenStream extends TokenStream {
|
||||
public int num;
|
||||
Token t;
|
||||
|
||||
public RepeatingTokenStream(String val) {
|
||||
t = new Token(val,0,val.length());
|
||||
}
|
||||
|
||||
public Token next() throws IOException {
|
||||
return --num<0 ? null : t;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public class TestTermdocPerf extends LuceneTestCase {
|
||||
|
||||
void addDocs(Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
|
||||
final Random random = new Random(0);
|
||||
final RepeatingTokenStream ts = new RepeatingTokenStream(val);
|
||||
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
if (random.nextFloat() < percentDocs) ts.num = random.nextInt(maxTF)+1;
|
||||
else ts.num=0;
|
||||
return ts;
|
||||
}
|
||||
};
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(field,val, Field.Store.NO, Field.Index.NO_NORMS));
|
||||
IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
writer.setMaxBufferedDocs(100);
|
||||
writer.setMergeFactor(100);
|
||||
|
||||
for (int i=0; i<ndocs; i++) {
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
}
|
||||
|
||||
|
||||
public int doTest(int iter, int ndocs, int maxTF, float percentDocs) throws IOException {
|
||||
Directory dir = new RAMDirectory();
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
addDocs(dir, ndocs, "foo", "val", maxTF, percentDocs);
|
||||
long end = System.currentTimeMillis();
|
||||
System.out.println("milliseconds for creation of " + ndocs + " docs = " + (end-start));
|
||||
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
TermEnum tenum = reader.terms(new Term("foo","val"));
|
||||
TermDocs tdocs = reader.termDocs();
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
|
||||
int ret=0;
|
||||
for (int i=0; i<iter; i++) {
|
||||
tdocs.seek(tenum);
|
||||
while (tdocs.next()) {
|
||||
ret += tdocs.doc();
|
||||
}
|
||||
}
|
||||
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("milliseconds for " + iter + " TermDocs iteration: " + (end-start));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
public void testTermDocPerf() throws IOException {
|
||||
// performance test for 10% of documents containing a term
|
||||
// doTest(100000, 10000,3,.1f);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -1,284 +1,284 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2005 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Calendar;
|
||||
import java.util.GregorianCalendar;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import junit.framework.Test;
|
||||
import junit.framework.TestCase;
|
||||
import junit.framework.TestSuite;
|
||||
import junit.textui.TestRunner;
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
/**
|
||||
* Unit test for sorting code.
|
||||
*
|
||||
* @author Martin Seitz (T-Systems)
|
||||
*/
|
||||
|
||||
public class TestCustomSearcherSort
|
||||
extends TestCase
|
||||
implements Serializable {
|
||||
|
||||
private Directory index = null;
|
||||
private Query query = null;
|
||||
// reduced from 20000 to 2000 to speed up test...
|
||||
private final static int INDEX_SIZE = 2000;
|
||||
|
||||
public TestCustomSearcherSort (String name) {
|
||||
super (name);
|
||||
}
|
||||
|
||||
public static void main (String[] argv) {
|
||||
TestRunner.run (suite());
|
||||
}
|
||||
|
||||
public static Test suite() {
|
||||
return new TestSuite (TestCustomSearcherSort.class);
|
||||
}
|
||||
|
||||
|
||||
// create an index for testing
|
||||
private Directory getIndex()
|
||||
throws IOException {
|
||||
RAMDirectory indexStore = new RAMDirectory ();
|
||||
IndexWriter writer = new IndexWriter (indexStore, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
RandomGen random = new RandomGen();
|
||||
for (int i=0; i<INDEX_SIZE; ++i) { // don't decrease; if to low the problem doesn't show up
|
||||
Document doc = new Document();
|
||||
if((i%5)!=0) { // some documents must not have an entry in the first sort field
|
||||
doc.add (new Field("publicationDate_", random.getLuceneDate(), Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
}
|
||||
if((i%7)==0) { // some documents to match the query (see below)
|
||||
doc.add (new Field("content", "test", Field.Store.YES, Field.Index.TOKENIZED));
|
||||
}
|
||||
// every document has a defined 'mandant' field
|
||||
doc.add(new Field("mandant", Integer.toString(i%3), Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
writer.addDocument (doc);
|
||||
}
|
||||
writer.optimize ();
|
||||
writer.close ();
|
||||
return indexStore;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create index and query for test cases.
|
||||
*/
|
||||
public void setUp() throws Exception {
|
||||
index = getIndex();
|
||||
query = new TermQuery( new Term("content", "test"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the test using two CustomSearcher instances.
|
||||
*/
|
||||
public void testFieldSortCustomSearcher() throws Exception {
|
||||
// log("Run testFieldSortCustomSearcher");
|
||||
// define the sort criteria
|
||||
Sort custSort = new Sort(new SortField[] {
|
||||
new SortField("publicationDate_"),
|
||||
SortField.FIELD_SCORE
|
||||
});
|
||||
Searcher searcher = new CustomSearcher (index, 2);
|
||||
// search and check hits
|
||||
matchHits(searcher, custSort);
|
||||
}
|
||||
/**
|
||||
* Run the test using one CustomSearcher wrapped by a MultiSearcher.
|
||||
*/
|
||||
public void testFieldSortSingleSearcher() throws Exception {
|
||||
// log("Run testFieldSortSingleSearcher");
|
||||
// define the sort criteria
|
||||
Sort custSort = new Sort(new SortField[] {
|
||||
new SortField("publicationDate_"),
|
||||
SortField.FIELD_SCORE
|
||||
});
|
||||
Searcher searcher =
|
||||
new MultiSearcher(new Searchable[] {
|
||||
new CustomSearcher (index, 2)});
|
||||
// search and check hits
|
||||
matchHits(searcher, custSort);
|
||||
}
|
||||
/**
|
||||
* Run the test using two CustomSearcher instances.
|
||||
*/
|
||||
public void testFieldSortMultiCustomSearcher() throws Exception {
|
||||
// log("Run testFieldSortMultiCustomSearcher");
|
||||
// define the sort criteria
|
||||
Sort custSort = new Sort(new SortField[] {
|
||||
new SortField("publicationDate_"),
|
||||
SortField.FIELD_SCORE
|
||||
});
|
||||
Searcher searcher =
|
||||
new MultiSearcher(new Searchable[] {
|
||||
new CustomSearcher (index, 0),
|
||||
new CustomSearcher (index, 2)});
|
||||
// search and check hits
|
||||
matchHits(searcher, custSort);
|
||||
}
|
||||
|
||||
|
||||
// make sure the documents returned by the search match the expected list
|
||||
private void matchHits (Searcher searcher, Sort sort)
|
||||
throws IOException {
|
||||
// make a query without sorting first
|
||||
Hits hitsByRank = searcher.search(query);
|
||||
checkHits(hitsByRank, "Sort by rank: "); // check for duplicates
|
||||
Map resultMap = new TreeMap();
|
||||
// store hits in TreeMap - TreeMap does not allow duplicates; existing entries are silently overwritten
|
||||
for(int hitid=0;hitid<hitsByRank.length(); ++hitid) {
|
||||
resultMap.put(
|
||||
new Integer(hitsByRank.id(hitid)), // Key: Lucene Document ID
|
||||
new Integer(hitid)); // Value: Hits-Objekt Index
|
||||
}
|
||||
|
||||
// now make a query using the sort criteria
|
||||
Hits resultSort = searcher.search (query, sort);
|
||||
checkHits(resultSort, "Sort by custom criteria: "); // check for duplicates
|
||||
|
||||
String lf = System.getProperty("line.separator", "\n");
|
||||
// besides the sorting both sets of hits must be identical
|
||||
for(int hitid=0;hitid<resultSort.length(); ++hitid) {
|
||||
Integer idHitDate = new Integer(resultSort.id(hitid)); // document ID from sorted search
|
||||
if(!resultMap.containsKey(idHitDate)) {
|
||||
log("ID "+idHitDate+" not found. Possibliy a duplicate.");
|
||||
}
|
||||
assertTrue(resultMap.containsKey(idHitDate)); // same ID must be in the Map from the rank-sorted search
|
||||
// every hit must appear once in both result sets --> remove it from the Map.
|
||||
// At the end the Map must be empty!
|
||||
resultMap.remove(idHitDate);
|
||||
}
|
||||
if(resultMap.size()==0) {
|
||||
// log("All hits matched");
|
||||
} else {
|
||||
log("Couldn't match "+resultMap.size()+" hits.");
|
||||
}
|
||||
assertEquals(resultMap.size(), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the hits for duplicates.
|
||||
* @param hits
|
||||
*/
|
||||
private void checkHits(Hits hits, String prefix) {
|
||||
if(hits!=null) {
|
||||
Map idMap = new TreeMap();
|
||||
for(int docnum=0;docnum<hits.length();++docnum) {
|
||||
Integer luceneId = null;
|
||||
try {
|
||||
luceneId = new Integer(hits.id(docnum));
|
||||
if(idMap.containsKey(luceneId)) {
|
||||
StringBuffer message = new StringBuffer(prefix);
|
||||
message.append("Duplicate key for hit index = ");
|
||||
message.append(docnum);
|
||||
message.append(", previous index = ");
|
||||
message.append(((Integer)idMap.get(luceneId)).toString());
|
||||
message.append(", Lucene ID = ");
|
||||
message.append(luceneId);
|
||||
log(message.toString());
|
||||
} else {
|
||||
idMap.put(luceneId, new Integer(docnum));
|
||||
}
|
||||
} catch(IOException ioe) {
|
||||
StringBuffer message = new StringBuffer(prefix);
|
||||
message.append("Error occurred for hit index = ");
|
||||
message.append(docnum);
|
||||
message.append(" (");
|
||||
message.append(ioe.getMessage());
|
||||
message.append(")");
|
||||
log(message.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Simply write to console - choosen to be independant of log4j etc
|
||||
private void log(String message) {
|
||||
System.out.println(message);
|
||||
}
|
||||
|
||||
public class CustomSearcher extends IndexSearcher {
|
||||
private int switcher;
|
||||
/**
|
||||
* @param directory
|
||||
* @throws IOException
|
||||
*/
|
||||
public CustomSearcher(Directory directory, int switcher) throws IOException {
|
||||
super(directory);
|
||||
this.switcher = switcher;
|
||||
}
|
||||
/**
|
||||
* @param r
|
||||
*/
|
||||
public CustomSearcher(IndexReader r, int switcher) {
|
||||
super(r);
|
||||
this.switcher = switcher;
|
||||
}
|
||||
/**
|
||||
* @param path
|
||||
* @throws IOException
|
||||
*/
|
||||
public CustomSearcher(String path, int switcher) throws IOException {
|
||||
super(path);
|
||||
this.switcher = switcher;
|
||||
}
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.Searchable#search(org.apache.lucene.search.Query, org.apache.lucene.search.Filter, int, org.apache.lucene.search.Sort)
|
||||
*/
|
||||
public TopFieldDocs search(Query query, Filter filter, int nDocs,
|
||||
Sort sort) throws IOException {
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
bq.add(query, BooleanClause.Occur.MUST);
|
||||
bq.add(new TermQuery(new Term("mandant", Integer.toString(switcher))), BooleanClause.Occur.MUST);
|
||||
return super.search(bq, filter, nDocs, sort);
|
||||
}
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.Searchable#search(org.apache.lucene.search.Query, org.apache.lucene.search.Filter, int)
|
||||
*/
|
||||
public TopDocs search(Query query, Filter filter, int nDocs)
|
||||
throws IOException {
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
bq.add(query, BooleanClause.Occur.MUST);
|
||||
bq.add(new TermQuery(new Term("mandant", Integer.toString(switcher))), BooleanClause.Occur.MUST);
|
||||
return super.search(bq, filter, nDocs);
|
||||
}
|
||||
}
|
||||
private class RandomGen {
|
||||
private Random random = new Random(0); // to generate some arbitrary contents
|
||||
private Calendar base = new GregorianCalendar(1980, 1, 1);
|
||||
|
||||
// Just to generate some different Lucene Date strings
|
||||
private String getLuceneDate() {
|
||||
return DateTools.timeToString(base.getTimeInMillis() + random.nextInt() - Integer.MIN_VALUE, DateTools.Resolution.DAY);
|
||||
}
|
||||
}
|
||||
}
|
||||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2005 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Calendar;
|
||||
import java.util.GregorianCalendar;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import junit.framework.Test;
|
||||
import junit.framework.TestCase;
|
||||
import junit.framework.TestSuite;
|
||||
import junit.textui.TestRunner;
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
/**
|
||||
* Unit test for sorting code.
|
||||
*
|
||||
* @author Martin Seitz (T-Systems)
|
||||
*/
|
||||
|
||||
public class TestCustomSearcherSort
|
||||
extends TestCase
|
||||
implements Serializable {
|
||||
|
||||
private Directory index = null;
|
||||
private Query query = null;
|
||||
// reduced from 20000 to 2000 to speed up test...
|
||||
private final static int INDEX_SIZE = 2000;
|
||||
|
||||
public TestCustomSearcherSort (String name) {
|
||||
super (name);
|
||||
}
|
||||
|
||||
public static void main (String[] argv) {
|
||||
TestRunner.run (suite());
|
||||
}
|
||||
|
||||
public static Test suite() {
|
||||
return new TestSuite (TestCustomSearcherSort.class);
|
||||
}
|
||||
|
||||
|
||||
// create an index for testing
|
||||
private Directory getIndex()
|
||||
throws IOException {
|
||||
RAMDirectory indexStore = new RAMDirectory ();
|
||||
IndexWriter writer = new IndexWriter (indexStore, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
RandomGen random = new RandomGen();
|
||||
for (int i=0; i<INDEX_SIZE; ++i) { // don't decrease; if to low the problem doesn't show up
|
||||
Document doc = new Document();
|
||||
if((i%5)!=0) { // some documents must not have an entry in the first sort field
|
||||
doc.add (new Field("publicationDate_", random.getLuceneDate(), Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
}
|
||||
if((i%7)==0) { // some documents to match the query (see below)
|
||||
doc.add (new Field("content", "test", Field.Store.YES, Field.Index.TOKENIZED));
|
||||
}
|
||||
// every document has a defined 'mandant' field
|
||||
doc.add(new Field("mandant", Integer.toString(i%3), Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
writer.addDocument (doc);
|
||||
}
|
||||
writer.optimize ();
|
||||
writer.close ();
|
||||
return indexStore;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create index and query for test cases.
|
||||
*/
|
||||
public void setUp() throws Exception {
|
||||
index = getIndex();
|
||||
query = new TermQuery( new Term("content", "test"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the test using two CustomSearcher instances.
|
||||
*/
|
||||
public void testFieldSortCustomSearcher() throws Exception {
|
||||
// log("Run testFieldSortCustomSearcher");
|
||||
// define the sort criteria
|
||||
Sort custSort = new Sort(new SortField[] {
|
||||
new SortField("publicationDate_"),
|
||||
SortField.FIELD_SCORE
|
||||
});
|
||||
Searcher searcher = new CustomSearcher (index, 2);
|
||||
// search and check hits
|
||||
matchHits(searcher, custSort);
|
||||
}
|
||||
/**
|
||||
* Run the test using one CustomSearcher wrapped by a MultiSearcher.
|
||||
*/
|
||||
public void testFieldSortSingleSearcher() throws Exception {
|
||||
// log("Run testFieldSortSingleSearcher");
|
||||
// define the sort criteria
|
||||
Sort custSort = new Sort(new SortField[] {
|
||||
new SortField("publicationDate_"),
|
||||
SortField.FIELD_SCORE
|
||||
});
|
||||
Searcher searcher =
|
||||
new MultiSearcher(new Searchable[] {
|
||||
new CustomSearcher (index, 2)});
|
||||
// search and check hits
|
||||
matchHits(searcher, custSort);
|
||||
}
|
||||
/**
|
||||
* Run the test using two CustomSearcher instances.
|
||||
*/
|
||||
public void testFieldSortMultiCustomSearcher() throws Exception {
|
||||
// log("Run testFieldSortMultiCustomSearcher");
|
||||
// define the sort criteria
|
||||
Sort custSort = new Sort(new SortField[] {
|
||||
new SortField("publicationDate_"),
|
||||
SortField.FIELD_SCORE
|
||||
});
|
||||
Searcher searcher =
|
||||
new MultiSearcher(new Searchable[] {
|
||||
new CustomSearcher (index, 0),
|
||||
new CustomSearcher (index, 2)});
|
||||
// search and check hits
|
||||
matchHits(searcher, custSort);
|
||||
}
|
||||
|
||||
|
||||
// make sure the documents returned by the search match the expected list
|
||||
private void matchHits (Searcher searcher, Sort sort)
|
||||
throws IOException {
|
||||
// make a query without sorting first
|
||||
Hits hitsByRank = searcher.search(query);
|
||||
checkHits(hitsByRank, "Sort by rank: "); // check for duplicates
|
||||
Map resultMap = new TreeMap();
|
||||
// store hits in TreeMap - TreeMap does not allow duplicates; existing entries are silently overwritten
|
||||
for(int hitid=0;hitid<hitsByRank.length(); ++hitid) {
|
||||
resultMap.put(
|
||||
new Integer(hitsByRank.id(hitid)), // Key: Lucene Document ID
|
||||
new Integer(hitid)); // Value: Hits-Objekt Index
|
||||
}
|
||||
|
||||
// now make a query using the sort criteria
|
||||
Hits resultSort = searcher.search (query, sort);
|
||||
checkHits(resultSort, "Sort by custom criteria: "); // check for duplicates
|
||||
|
||||
String lf = System.getProperty("line.separator", "\n");
|
||||
// besides the sorting both sets of hits must be identical
|
||||
for(int hitid=0;hitid<resultSort.length(); ++hitid) {
|
||||
Integer idHitDate = new Integer(resultSort.id(hitid)); // document ID from sorted search
|
||||
if(!resultMap.containsKey(idHitDate)) {
|
||||
log("ID "+idHitDate+" not found. Possibliy a duplicate.");
|
||||
}
|
||||
assertTrue(resultMap.containsKey(idHitDate)); // same ID must be in the Map from the rank-sorted search
|
||||
// every hit must appear once in both result sets --> remove it from the Map.
|
||||
// At the end the Map must be empty!
|
||||
resultMap.remove(idHitDate);
|
||||
}
|
||||
if(resultMap.size()==0) {
|
||||
// log("All hits matched");
|
||||
} else {
|
||||
log("Couldn't match "+resultMap.size()+" hits.");
|
||||
}
|
||||
assertEquals(resultMap.size(), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the hits for duplicates.
|
||||
* @param hits
|
||||
*/
|
||||
private void checkHits(Hits hits, String prefix) {
|
||||
if(hits!=null) {
|
||||
Map idMap = new TreeMap();
|
||||
for(int docnum=0;docnum<hits.length();++docnum) {
|
||||
Integer luceneId = null;
|
||||
try {
|
||||
luceneId = new Integer(hits.id(docnum));
|
||||
if(idMap.containsKey(luceneId)) {
|
||||
StringBuffer message = new StringBuffer(prefix);
|
||||
message.append("Duplicate key for hit index = ");
|
||||
message.append(docnum);
|
||||
message.append(", previous index = ");
|
||||
message.append(((Integer)idMap.get(luceneId)).toString());
|
||||
message.append(", Lucene ID = ");
|
||||
message.append(luceneId);
|
||||
log(message.toString());
|
||||
} else {
|
||||
idMap.put(luceneId, new Integer(docnum));
|
||||
}
|
||||
} catch(IOException ioe) {
|
||||
StringBuffer message = new StringBuffer(prefix);
|
||||
message.append("Error occurred for hit index = ");
|
||||
message.append(docnum);
|
||||
message.append(" (");
|
||||
message.append(ioe.getMessage());
|
||||
message.append(")");
|
||||
log(message.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Simply write to console - choosen to be independant of log4j etc
|
||||
private void log(String message) {
|
||||
System.out.println(message);
|
||||
}
|
||||
|
||||
public class CustomSearcher extends IndexSearcher {
|
||||
private int switcher;
|
||||
/**
|
||||
* @param directory
|
||||
* @throws IOException
|
||||
*/
|
||||
public CustomSearcher(Directory directory, int switcher) throws IOException {
|
||||
super(directory);
|
||||
this.switcher = switcher;
|
||||
}
|
||||
/**
|
||||
* @param r
|
||||
*/
|
||||
public CustomSearcher(IndexReader r, int switcher) {
|
||||
super(r);
|
||||
this.switcher = switcher;
|
||||
}
|
||||
/**
|
||||
* @param path
|
||||
* @throws IOException
|
||||
*/
|
||||
public CustomSearcher(String path, int switcher) throws IOException {
|
||||
super(path);
|
||||
this.switcher = switcher;
|
||||
}
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.Searchable#search(org.apache.lucene.search.Query, org.apache.lucene.search.Filter, int, org.apache.lucene.search.Sort)
|
||||
*/
|
||||
public TopFieldDocs search(Query query, Filter filter, int nDocs,
|
||||
Sort sort) throws IOException {
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
bq.add(query, BooleanClause.Occur.MUST);
|
||||
bq.add(new TermQuery(new Term("mandant", Integer.toString(switcher))), BooleanClause.Occur.MUST);
|
||||
return super.search(bq, filter, nDocs, sort);
|
||||
}
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.Searchable#search(org.apache.lucene.search.Query, org.apache.lucene.search.Filter, int)
|
||||
*/
|
||||
public TopDocs search(Query query, Filter filter, int nDocs)
|
||||
throws IOException {
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
bq.add(query, BooleanClause.Occur.MUST);
|
||||
bq.add(new TermQuery(new Term("mandant", Integer.toString(switcher))), BooleanClause.Occur.MUST);
|
||||
return super.search(bq, filter, nDocs);
|
||||
}
|
||||
}
|
||||
private class RandomGen {
|
||||
private Random random = new Random(0); // to generate some arbitrary contents
|
||||
private Calendar base = new GregorianCalendar(1980, 1, 1);
|
||||
|
||||
// Just to generate some different Lucene Date strings
|
||||
private String getLuceneDate() {
|
||||
return DateTools.timeToString(base.getTimeInMillis() + random.nextInt() - Integer.MIN_VALUE, DateTools.Resolution.DAY);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue