LUCENE-3892: merge trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1374578 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-08-18 13:35:11 +00:00
commit bfcd96c689
169 changed files with 3911 additions and 720 deletions

View File

@ -145,21 +145,11 @@
<classpathentry kind="lib" path="solr/contrib/extraction/lib/bcmail-jdk15-1.45.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/bcprov-jdk15-1.45.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/boilerpipe-1.1.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/commons-compress-1.3.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/dom4j-1.6.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/fontbox-1.6.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/jempbox-1.6.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/metadata-extractor-2.4.0-beta-1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/netcdf-4.2-min.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/pdfbox-1.6.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-3.8-beta5.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-ooxml-3.8-beta5.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-ooxml-schemas-3.8-beta5.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-scratchpad-3.8-beta5.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/rome-0.9.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tagsoup-1.2.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-core-1.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-parsers-1.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/xmlbeans-2.3.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/langid/lib/langdetect-1.1-20120112.jar"/>
<classpathentry kind="lib" path="solr/contrib/langid/lib/jsonic-1.2.7.jar"/>
@ -175,5 +165,25 @@
<classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-collections-3.2.1.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="lucene/test-framework/lib/randomizedtesting-runner-2.0.0.rc5.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/apache-mime4j-core-0.7.2.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/apache-mime4j-dom-0.7.2.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/commons-compress-1.4.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/fontbox-1.7.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/icu4j-49.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/isoparser-1.0-RC-1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/jdom-1.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/jempbox-1.7.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/juniversalchardet-1.0.3.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/pdfbox-1.7.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-3.8.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-ooxml-3.8.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-ooxml-schemas-3.8.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-scratchpad-3.8.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-core-1.2.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-parsers-1.2.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/vorbis-java-core-0.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/vorbis-java-tika-0.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/xercesImpl-2.9.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/xz-1.0.jar"/>
<classpathentry kind="output" path="bin/other"/>
</classpath>

View File

@ -75,6 +75,14 @@ Bug Fixes
encoders / stemmers via the ResourceLoader now instead of Class.forName().
Solr users should now no longer have to embed these in its war. (David Smiley)
* SOLR-3737: StempelPolishStemFilterFactory loaded its stemmer table incorrectly.
Also, ensure immutability and use only one instance of this table in RAM (lazy
loaded) since its quite large. (sausarkar, Steven Rowe, Robert Muir)
* LUCENE-4310: MappingCharFilter was failing to match input strings
containing non-BMP Unicode characters. (Dawid Weiss, Robert Muir,
Mike McCandless)
Build
* LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for

View File

@ -111,9 +111,8 @@ public class NormalizeCharMap {
final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
final IntsRef scratch = new IntsRef();
for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
builder.add(Util.toUTF32(ent.getKey(), scratch),
builder.add(Util.toUTF16(ent.getKey(), scratch),
new CharsRef(ent.getValue()));
}
map = builder.finish();
pendingPairs.clear();

View File

@ -33,6 +33,7 @@ import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
public class TestMappingCharFilter extends BaseTokenStreamTestCase {
@ -55,6 +56,11 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
builder.add( "empty", "" );
// BMP (surrogate pair):
builder.add(UnicodeUtil.newString(new int[] {0x1D122}, 0, 1), "fclef");
builder.add("\uff01", "full-width-exclamation");
normMap = builder.build();
}
@ -128,6 +134,18 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
assertTokenStreamContents(ts, new String[0], new int[]{}, new int[]{}, 5);
}
public void testNonBMPChar() throws Exception {
CharFilter cs = new MappingCharFilter( normMap, new StringReader( UnicodeUtil.newString(new int[] {0x1D122}, 0, 1) ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"fclef"}, new int[]{0}, new int[]{2}, 2);
}
public void testFullWidthChar() throws Exception {
CharFilter cs = new MappingCharFilter( normMap, new StringReader( "\uff01") );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"full-width-exclamation"}, new int[]{0}, new int[]{1}, 1);
}
//
// 1111111111222
// 01234567890123456789012

View File

@ -58,6 +58,13 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
return DefaultsHolder.DEFAULT_STOP_SET;
}
/**
* Returns an unmodifiable instance of the default stemmer table.
*/
public static Trie getDefaultTable() {
return DefaultsHolder.DEFAULT_TABLE;
}
/**
* Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
* accesses the static final set the first time.;

View File

@ -17,28 +17,17 @@ package org.apache.lucene.analysis.stempel;
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pl.PolishAnalyzer;
import org.apache.lucene.analysis.stempel.StempelFilter;
import org.apache.lucene.analysis.stempel.StempelStemmer;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.egothor.stemmer.Trie;
/**
* Factory for {@link StempelFilter} using a Polish stemming table.
*/
public class StempelPolishStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
private Trie stemmer = null;
private static final String STEMTABLE = "/org/apache/lucene/analysis/pl/stemmer_20000.tbl";
public class StempelPolishStemFilterFactory extends TokenFilterFactory {
public TokenStream create(TokenStream input) {
return new StempelFilter(input, new StempelStemmer(stemmer));
}
public void inform(ResourceLoader loader) throws IOException {
stemmer = StempelStemmer.load(loader.openResource(STEMTABLE));
return new StempelFilter(input, new StempelStemmer(PolishAnalyzer.getDefaultTable()));
}
}

View File

@ -332,7 +332,7 @@ public class Trie {
* @param key the key
* @param cmd the patch command
*/
public void add(CharSequence key, CharSequence cmd) {
void add(CharSequence key, CharSequence cmd) {
if (key == null || cmd == null) {
return;
}

View File

@ -22,7 +22,6 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
/**
* Tests for {@link StempelPolishStemFilterFactory}
@ -31,7 +30,6 @@ public class TestStempelPolishStemFilterFactory extends BaseTokenStreamTestCase
public void testBasics() throws Exception {
StringReader document = new StringReader("studenta studenci");
StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory();
factory.inform(new ClasspathResourceLoader(getClass()));
TokenStream ts = factory.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, document));
assertTokenStreamContents(ts,
new String[] { "student", "student" });

View File

@ -234,10 +234,10 @@
<check-missing-javadocs dir="build/docs/analyzers-stempel" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-uima" level="class"/>
<check-missing-javadocs dir="build/docs/benchmark" level="class"/>
<!-- core: problems -->
<check-missing-javadocs dir="build/docs/core" level="class"/>
<check-missing-javadocs dir="build/docs/demo" level="class"/>
<check-missing-javadocs dir="build/docs/facet" level="class"/>
<!-- grouping: problems -->
<check-missing-javadocs dir="build/docs/grouping" level="class"/>
<!-- highlighter: problems -->
<check-missing-javadocs dir="build/docs/join" level="class"/>
<check-missing-javadocs dir="build/docs/memory" level="class"/>
@ -247,7 +247,7 @@
<check-missing-javadocs dir="build/docs/sandbox" level="class"/>
<!-- spatial: problems -->
<check-missing-javadocs dir="build/docs/suggest" level="class"/>
<!-- test-framework: problems -->
<check-missing-javadocs dir="build/docs/test-framework" level="class"/>
</sequential>
</target>

View File

@ -26,11 +26,15 @@ import org.apache.lucene.index.TermState;
* terms dict.
*/
public class BlockTermState extends OrdTermState {
public int docFreq; // how many docs have this term
public long totalTermFreq; // total number of occurrences of this term
/** how many docs have this term */
public int docFreq;
/** total number of occurrences of this term */
public long totalTermFreq;
public int termBlockOrd; // the term's ord in the current block
public long blockFilePointer; // fp into the terms dict primary file (_X.tim) that holds this term
/** the term's ord in the current block */
public int termBlockOrd;
/** fp into the terms dict primary file (_X.tim) that holds this term */
public long blockFilePointer;
@Override
public void copyFrom(TermState _other) {

View File

@ -36,7 +36,7 @@ import org.apache.lucene.util.MathUtil;
*/
public abstract class MultiLevelSkipListReader {
// the maximum number of skip levels possible for this index
/** the maximum number of skip levels possible for this index */
protected int maxNumberOfSkipLevels;
// number of levels in this skip list

View File

@ -52,7 +52,7 @@ import org.apache.lucene.util.MathUtil;
*/
public abstract class MultiLevelSkipListWriter {
// number of levels in this skip list
/** number of levels in this skip list */
protected int numberOfSkipLevels;
// the skip interval in the list with level = 0
@ -93,8 +93,8 @@ public abstract class MultiLevelSkipListWriter {
}
}
/** creates new buffers or empties the existing ones */
protected void resetSkip() {
// creates new buffers or empties the existing ones
if (skipBuffer == null) {
init();
} else {

View File

@ -1796,7 +1796,7 @@ public class DirectPostingsFormat extends PostingsFormat {
}
// Docs + freqs:
public final static class HighFreqDocsEnum extends DocsEnum {
private final static class HighFreqDocsEnum extends DocsEnum {
private int[] docIDs;
private int[] freqs;
private final Bits liveDocs;
@ -1969,7 +1969,7 @@ public class DirectPostingsFormat extends PostingsFormat {
}
// TODO: specialize offsets and not
public final static class HighFreqDocsAndPositionsEnum extends DocsAndPositionsEnum {
private final static class HighFreqDocsAndPositionsEnum extends DocsAndPositionsEnum {
private int[] docIDs;
private int[] freqs;
private int[][] positions;

View File

@ -36,7 +36,7 @@ public abstract class IntIndexInput implements Closeable {
public abstract Index index() throws IOException;
// TODO: -- can we simplify this?
/** Records a single skip-point in the {@link IntIndexInput.Reader}. */
public abstract static class Index {
public abstract void read(DataInput indexIn, boolean absolute) throws IOException;
@ -50,6 +50,7 @@ public abstract class IntIndexInput implements Closeable {
public abstract Index clone();
}
/** Reads int values. */
public abstract static class Reader {
/** Reads next single int */

View File

@ -38,6 +38,7 @@ public abstract class IntIndexOutput implements Closeable {
* >= 0. */
public abstract void write(int v) throws IOException;
/** Records a single skip-point in the IndexOutput. */
public abstract static class Index {
/** Internally records the current location */

View File

@ -22,8 +22,15 @@ import org.apache.lucene.store.IOContext;
import java.io.IOException;
/** @lucene.experimental */
/** Provides int reader and writer to specified files.
*
* @lucene.experimental */
public abstract class IntStreamFactory {
/** Create an {@link IntIndexInput} on the provided
* fileName. */
public abstract IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException;
/** Create an {@link IntIndexOutput} on the provided
* fileName. */
public abstract IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException;
}

View File

@ -119,10 +119,13 @@ public class DocTermOrds {
protected final String field;
protected int numTermsInField;
protected long termInstances; // total number of references to term numbers
/** total number of references to term numbers */
protected long termInstances;
private long memsz;
protected int total_time; // total time to uninvert the field
protected int phase1_time; // time for phase1 of the uninvert process
/** total time to uninvert the field */
protected int total_time;
/** time for phase1 of the uninvert process */
protected int phase1_time;
protected int[] index;
protected byte[][] tnums = new byte[256][];
@ -234,7 +237,7 @@ public class DocTermOrds {
protected void setActualDocFreq(int termNum, int df) throws IOException {
}
// Call this only once (if you subclass!)
/** Call this only once (if you subclass!) */
protected void uninvert(final AtomicReader reader, final BytesRef termPrefix) throws IOException {
//System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
final long startTime = System.currentTimeMillis();

View File

@ -267,11 +267,11 @@ public class FieldInfos implements Iterable<FieldInfo> {
return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType);
}
// NOTE: this method does not carry over termVector
// booleans nor docValuesType; the indexer chain
// (TermVectorsConsumerPerField, DocFieldProcessor) must
// set these fields when they succeed in consuming
// the document:
/** NOTE: this method does not carry over termVector
* booleans nor docValuesType; the indexer chain
* (TermVectorsConsumerPerField, DocFieldProcessor) must
* set these fields when they succeed in consuming
* the document */
public FieldInfo addOrUpdate(String name, IndexableFieldType fieldType) {
// TODO: really, indexer shouldn't even call this
// method (it's only called from DocFieldProcessor);

View File

@ -243,6 +243,10 @@ public abstract class MergePolicy implements java.io.Closeable, Cloneable {
}
}
/** Thrown when a merge was explicity aborted because
* {@link IndexWriter#close(boolean)} was called with
* <code>false</code>. Normally this exception is
* privately caught and suppresed by {@link IndexWriter}. */
public static class MergeAbortedException extends IOException {
public MergeAbortedException() {
super("merge is aborted");

View File

@ -29,6 +29,9 @@ import org.apache.lucene.util.packed.PackedInts;
* @lucene.experimental */
public class MergeState {
/**
* Remaps docids around deletes during merge
*/
public static abstract class DocMap {
private final Bits liveDocs;
@ -197,6 +200,9 @@ public class MergeState {
public SegmentReader[] matchingSegmentReaders;
public int matchedCount;
/**
* Class for recording units of work when merging segments.
*/
public static class CheckAbort {
private double workCount;
private final MergePolicy.OneMerge merge;

View File

@ -43,7 +43,7 @@ import org.apache.lucene.util.packed.PackedInts.Reader;
* @lucene.experimental
* @lucene.internal
*/
public class MultiDocValues extends DocValues {
class MultiDocValues extends DocValues {
private static DocValuesPuller DEFAULT_PULLER = new DocValuesPuller();
private static final DocValuesPuller NORMS_PULLER = new DocValuesPuller() {

View File

@ -143,6 +143,8 @@ public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
}
// TODO: implement bulk read more efficiently than super
/** Holds a {@link DocsAndPositionsEnum} along with the
* corresponding {@link ReaderSlice}. */
public final static class EnumWithSlice {
public DocsAndPositionsEnum docsAndPositionsEnum;
public ReaderSlice slice;

View File

@ -122,6 +122,8 @@ public final class MultiDocsEnum extends DocsEnum {
}
// TODO: implement bulk read more efficiently than super
/** Holds a {@link DocsEnum} along with the
* corresponding {@link ReaderSlice}. */
public final static class EnumWithSlice {
public DocsEnum docsEnum;
public ReaderSlice slice;

View File

@ -133,7 +133,8 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfoPerCom
// or wrote; this is normally the same as generation except if
// there was an IOException that had interrupted a commit
public Map<String,String> userData = Collections.<String,String>emptyMap(); // Opaque Map<String, String> that user can specify during IndexWriter.commit
/** Opaque Map&lt;String, String&gt; that user can specify during IndexWriter.commit */
public Map<String,String> userData = Collections.<String,String>emptyMap();
private List<SegmentInfoPerCommit> segments = new ArrayList<SegmentInfoPerCommit>();

View File

@ -30,11 +30,11 @@ public class SegmentReadState {
public final FieldInfos fieldInfos;
public final IOContext context;
// NOTE: if this is < 0, that means "defer terms index
// load until needed". But if the codec must load the
// terms index on init (preflex is the only once currently
// that must do so), then it should negate this value to
// get the app's terms divisor:
/** NOTE: if this is &lt; 0, that means "defer terms index
* load until needed". But if the codec must load the
* terms index on init (preflex is the only once currently
* that must do so), then it should negate this value to
* get the app's terms divisor */
public int termsIndexDivisor;
public final String segmentSuffix;

View File

@ -33,11 +33,11 @@ public class SegmentWriteState {
public final FieldInfos fieldInfos;
public int delCountOnFlush;
// Deletes to apply while we are flushing the segment. A
// Term is enrolled in here if it was deleted at one
// point, and it's mapped to the docIDUpto, meaning any
// docID < docIDUpto containing this term should be
// deleted.
/** Deletes to apply while we are flushing the segment. A
* Term is enrolled in here if it was deleted at one
* point, and it's mapped to the docIDUpto, meaning any
* docID &lt; docIDUpto containing this term should be
* deleted. */
public final BufferedDeletes segDeletes;
// Lazily created:

View File

@ -32,6 +32,9 @@ import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.packed.PackedInts;
/**
* Utility class for merging SortedBytes DocValues
* instances.
*
* @lucene.internal
*/
public final class SortedBytesMergeUtils {
@ -54,7 +57,14 @@ public final class SortedBytesMergeUtils {
}
return new MergeContext(comp, mergeDocCount, size, type);
}
/**
* Encapsulates contextual information about the merge.
* This class holds document id to ordinal mappings, offsets for
* variable length values and the comparator to sort the merged
* bytes.
*
* @lucene.internal
*/
public static final class MergeContext {
private final Comparator<BytesRef> comp;
private final BytesRef missingValue = new BytesRef();
@ -169,10 +179,36 @@ public final class SortedBytesMergeUtils {
return merger.currentOrd;
}
/**
* Implementation of this interface consume the merged bytes with their
* corresponding ordinal and byte offset. The offset is the byte offset in
* target sorted source where the currently merged {@link BytesRef} instance
* should be stored at.
*/
public static interface BytesRefConsumer {
/**
* Consumes a single {@link BytesRef}. The provided {@link BytesRef}
* instances are strictly increasing with respect to the used
* {@link Comparator} used for merging
*
* @param ref
* the {@link BytesRef} to consume
* @param ord
* the ordinal of the given {@link BytesRef} in the merge target
* @param offset
* the byte offset of the given {@link BytesRef} in the merge
* target
* @throws IOException
* if an {@link IOException} occurs
*/
public void consume(BytesRef ref, int ord, long offset) throws IOException;
}
/**
* A simple {@link BytesRefConsumer} that writes the merged {@link BytesRef}
* instances sequentially to an {@link IndexOutput}.
*/
public static final class IndexOutputBytesRefConsumer implements BytesRefConsumer {
private final IndexOutput datOut;
@ -187,6 +223,14 @@ public final class SortedBytesMergeUtils {
}
}
/**
* {@link RecordMerger} merges a list of {@link SortedSourceSlice} lazily by
* consuming the sorted source records one by one and de-duplicates records
* that are shared across slices. The algorithm is based on a lazy priority queue
* that prevents reading merge sources into heap memory.
*
* @lucene.internal
*/
private static final class RecordMerger {
private final MergeQueue queue;
private final SortedSourceSlice[] top;
@ -231,6 +275,12 @@ public final class SortedBytesMergeUtils {
}
}
/**
* {@link SortedSourceSlice} represents a single {@link SortedSource} merge candidate.
* It encapsulates ordinal and pre-calculated target doc id to ordinal mappings.
* This class also holds state private to the merge process.
* @lucene.internal
*/
public static class SortedSourceSlice {
final SortedSource source;
final int readerIdx;

View File

@ -45,6 +45,9 @@ import org.apache.lucene.util.packed.PackedInts;
*/
public interface FieldCache {
/**
* Placeholder indicating creation of this cache is currently in-progress.
*/
public static final class CreationPlaceholder {
Object value;
}

View File

@ -194,6 +194,9 @@ public abstract class FieldComparator<T> {
* than the provided value. */
public abstract int compareDocToValue(int doc, T value) throws IOException;
/**
* Base FieldComparator class for numeric types
*/
public static abstract class NumericComparator<T extends Number> extends FieldComparator<T> {
protected final T missingValue;
protected final String field;

View File

@ -33,6 +33,10 @@ import org.apache.lucene.util.PriorityQueue;
*/
public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> extends PriorityQueue<T> {
/**
* Extension of ScoreDoc to also store the
* {@link FieldComparator} slot.
*/
public static class Entry extends ScoreDoc {
public int slot;

View File

@ -398,12 +398,17 @@ public class FuzzyTermsEnum extends TermsEnum {
return scale_factor;
}
/** @lucene.internal */
/**
* reuses compiled automata across different segments,
* because they are independent of the index
* @lucene.internal */
public static interface LevenshteinAutomataAttribute extends Attribute {
public List<CompiledAutomaton> automata();
}
/** @lucene.internal */
/**
* Stores compiled automata as a list (indexed by edit distance)
* @lucene.internal */
public static final class LevenshteinAutomataAttributeImpl extends AttributeImpl implements LevenshteinAutomataAttribute {
private final List<CompiledAutomaton> automata = new ArrayList<CompiledAutomaton>();

View File

@ -82,7 +82,7 @@ public class IndexSearcher {
// in the next release
protected final IndexReaderContext readerContext;
protected final List<AtomicReaderContext> leafContexts;
// used with executor - each slice holds a set of leafs executed within one thread
/** used with executor - each slice holds a set of leafs executed within one thread */
protected final LeafSlice[] leafSlices;
// These are only used for multi-threaded search

View File

@ -32,7 +32,11 @@ import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
/** @lucene.internal Only public to be accessible by spans package. */
/**
* Base rewrite method that translates each term into a query, and keeps
* the scores as computed by the query.
* <p>
* @lucene.internal Only public to be accessible by spans package. */
public abstract class ScoringRewrite<Q extends Query> extends TermCollectingRewrite<Q> {
/** A rewrite method that first translates each term into

View File

@ -38,6 +38,9 @@ import org.apache.lucene.util.StringHelper;
*/
public class SortField {
/**
* Specifies the type of the terms to be sorted, or special types such as CUSTOM
*/
public static enum Type {
/** Sort by document score (relevance). Sort values are Float and higher

View File

@ -225,6 +225,8 @@ public class TimeLimitingCollector extends Collector {
}
/**
* Thread used to timeout search requests.
* Can be stopped completely with {@link TimerThread#stopTimer()}
* @lucene.experimental
*/
public static final class TimerThread extends Thread {

View File

@ -33,8 +33,8 @@ import org.apache.lucene.util.PriorityQueue;
*/
public abstract class TopDocsCollector<T extends ScoreDoc> extends Collector {
// This is used in case topDocs() is called with illegal parameters, or there
// simply aren't (enough) results.
/** This is used in case topDocs() is called with illegal parameters, or there
* simply aren't (enough) results. */
protected static final TopDocs EMPTY_TOPDOCS = new TopDocs(0, new ScoreDoc[0], Float.NaN);
/**

View File

@ -436,6 +436,9 @@ public abstract class FSDirectory extends Directory {
return chunkSize;
}
/**
* Writes output with {@link RandomAccessFile#write(byte[], int, int)}
*/
protected static class FSIndexOutput extends BufferedIndexOutput {
private final FSDirectory parent;
private final String name;

View File

@ -106,6 +106,9 @@ public class NIOFSDirectory extends FSDirectory {
};
}
/**
* Reads bytes with {@link FileChannel#read(ByteBuffer, long)}
*/
protected static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput {
private ByteBuffer byteBuf; // wraps the buffer for NIO

View File

@ -19,7 +19,9 @@ package org.apache.lucene.store;
import java.util.ArrayList;
/** @lucene.internal */
/**
* Represents a file in RAM as a list of byte[] buffers.
* @lucene.internal */
public class RAMFile {
protected ArrayList<byte[]> buffers = new ArrayList<byte[]>();
long length;

View File

@ -85,8 +85,16 @@ public class SimpleFSDirectory extends FSDirectory {
};
}
/**
* Reads bytes with {@link RandomAccessFile#seek(long)} followed by
* {@link RandomAccessFile#read(byte[], int, int)}.
*/
protected static class SimpleFSIndexInput extends BufferedIndexInput {
/**
* Extension of RandomAccessFile that tracks if the file is
* open.
*/
protected static class Descriptor extends RandomAccessFile {
// remember if the file is open, so that we don't try to close it
// more than once

View File

@ -117,10 +117,13 @@ public final class ByteBlockPool {
public byte[][] buffers = new byte[10][];
int bufferUpto = -1; // Which buffer we are upto
public int byteUpto = BYTE_BLOCK_SIZE; // Where we are in head buffer
/** Where we are in head buffer */
public int byteUpto = BYTE_BLOCK_SIZE;
public byte[] buffer; // Current head buffer
public int byteOffset = -BYTE_BLOCK_SIZE; // Current head offset
/** Current head buffer */
public byte[] buffer;
/** Current head offset */
public int byteOffset = -BYTE_BLOCK_SIZE;
private final Allocator allocator;

View File

@ -48,6 +48,11 @@ public class FuzzySet {
public static final int FUZZY_SERIALIZATION_VERSION=1;
/**
* Result from {@link FuzzySet#contains(BytesRef)}:
* can never return definitively YES (always MAYBE),
* but can sometimes definitely return NO.
*/
public enum ContainsResult {
MAYBE, NO
};

View File

@ -158,7 +158,7 @@ public final class FST<T> {
private final boolean packed;
private PackedInts.Reader nodeRefToAddress;
// If arc has this label then that arc is final/accepted
/** If arc has this label then that arc is final/accepted */
public static final int END_LABEL = -1;
private boolean allowArrayArcs = true;
@ -174,7 +174,7 @@ public final class FST<T> {
// building an FST w/ willPackFST=true:
int node;
// To node (ord or address):
/** To node (ord or address) */
public int target;
byte flags;
@ -542,8 +542,8 @@ public final class FST<T> {
return v;
}
// returns true if the node at this address has any
// outgoing arcs
/** returns true if the node at this address has any
* outgoing arcs */
public static<T> boolean targetHasArcs(Arc<T> arc) {
return arc.target > 0;
}

View File

@ -767,6 +767,19 @@ public final class Util {
}
}
/** Just maps each UTF16 unit (char) to the ints in an
* IntsRef. */
public static IntsRef toUTF16(CharSequence s, IntsRef scratch) {
final int charLimit = s.length();
scratch.offset = 0;
scratch.length = charLimit;
scratch.grow(charLimit);
for (int idx = 0; idx < charLimit; idx++) {
scratch.ints[idx] = (int) s.charAt(idx);
}
return scratch;
}
/** Decodes the Unicode codepoints from the provided
* CharSequence and places them in the provided scratch
* IntsRef, which must not be null, returning it. */

View File

@ -734,7 +734,7 @@ public class PackedInts {
}
return new Packed64(in, valueCount, bitsPerValue);
default:
throw new AssertionError("Unknwown Writer format: " + format);
throw new AssertionError("Unknown Writer format: " + format);
}
}

View File

@ -20,12 +20,13 @@ package org.apache.lucene.search;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.Norm;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity;
@ -36,7 +37,7 @@ import org.apache.lucene.util.LuceneTestCase;
public class TestSimilarityProvider extends LuceneTestCase {
private Directory directory;
private IndexReader reader;
private DirectoryReader reader;
private IndexSearcher searcher;
@Override
@ -75,8 +76,9 @@ public class TestSimilarityProvider extends LuceneTestCase {
public void testBasics() throws Exception {
// sanity check of norms writer
// TODO: generalize
byte fooNorms[] = (byte[]) MultiDocValues.getNormDocValues(reader, "foo").getSource().getArray();
byte barNorms[] = (byte[]) MultiDocValues.getNormDocValues(reader, "bar").getSource().getArray();
AtomicReader slow = new SlowCompositeReaderWrapper(reader);
byte fooNorms[] = (byte[]) slow.normValues("foo").getSource().getArray();
byte barNorms[] = (byte[]) slow.normValues("bar").getSource().getArray();
for (int i = 0; i < fooNorms.length; i++) {
assertFalse(fooNorms[i] == barNorms[i]);
}

View File

@ -0,0 +1,49 @@
package org.apache.lucene.util.junitcompat;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import org.apache.lucene.util._TestUtil;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.JUnitCore;
import org.junit.runner.Result;
public class TestLeaveFilesIfTestFails extends WithNestedTests {
public TestLeaveFilesIfTestFails() {
super(true);
}
public static class Nested1 extends WithNestedTests.AbstractNestedTest {
static File file;
public void testDummy() {
file = _TestUtil.getTempDir("leftover");
file.mkdirs();
fail();
}
}
@Test
public void testLeaveFilesIfTestFails() {
Result r = JUnitCore.runClasses(Nested1.class);
Assert.assertEquals(1, r.getFailureCount());
Assert.assertTrue(Nested1.file.exists());
Nested1.file.delete();
}
}

View File

@ -44,6 +44,10 @@ public abstract class AbstractDistinctValuesCollector<GC extends AbstractDistinc
public void setScorer(Scorer scorer) throws IOException {
}
/**
* Returned by {@link AbstractDistinctValuesCollector#getGroups()},
* representing the value and set of distinct values for the group.
*/
public abstract static class GroupCount<GROUP_VALUE_TYPE> {
public final GROUP_VALUE_TYPE groupValue;

View File

@ -17,7 +17,12 @@
package org.apache.lucene.search.grouping;
/** @lucene.internal */
import org.apache.lucene.search.FieldComparator; // javadocs
/**
* Expert: representation of a group in {@link AbstractFirstPassGroupingCollector},
* tracking the top doc and {@link FieldComparator} slot.
* @lucene.internal */
public class CollectedSearchGroup<T> extends SearchGroup<T> {
int topDoc;
int comparatorSlot;

View File

@ -90,6 +90,28 @@
</sequential>
</macrodef>
<property name="test-framework.jar" value="${common.dir}/build/test-framework/lucene-test-framework-${version}.jar"/>
<target name="check-test-framework-uptodate" unless="test-framework.uptodate">
<module-uptodate name="test-framework" jarfile="${test-framework.jar}" property="test-framework.uptodate"/>
</target>
<target name="jar-test-framework" unless="test-framework.uptodate" depends="check-test-framework-uptodate">
<ant dir="${common.dir}/test-framework" target="jar-core" inheritall="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
<property name="test-framework.uptodate" value="true"/>
</target>
<property name="test-framework-javadoc.jar" value="${common.dir}/build/test-framework/lucene-test-framework-${version}-javadoc.jar"/>
<target name="check-test-framework-javadocs-uptodate" unless="test-framework-javadocs.uptodate">
<module-uptodate name="test-framework" jarfile="${test-framework-javadoc.jar}" property="test-framework-javadocs.uptodate"/>
</target>
<target name="javadocs-test-framework" unless="test-framework-javadocs.uptodate" depends="check-test-framework-javadocs-uptodate">
<ant dir="${common.dir}/test-framework" target="javadocs" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
<property name="test-framework-javadocs.uptodate" value="true"/>
</target>
<property name="queryparser.jar" value="${common.dir}/build/queryparser/lucene-queryparser-${version}.jar"/>
<target name="check-queryparser-uptodate" unless="queryparser.uptodate">
<module-uptodate name="queryparser" jarfile="${queryparser.jar}" property="queryparser.uptodate"/>

View File

@ -24,6 +24,9 @@ import org.apache.lucene.queries.function.docvalues.DoubleDocValues;
import java.io.IOException;
import java.util.Map;
/**
* Function that returns a constant double value for every document.
*/
public class DoubleConstValueSource extends ConstNumberSource {
final double constant;
private final float fv;

View File

@ -28,7 +28,13 @@ import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Map;
/** @lucene.internal */
/**
* Function that returns {@link TFIDFSimilarity #idf(long, long)}
* for every document.
* <p>
* Note that the configured Similarity for the field must be
* a subclass of {@link TFIDFSimilarity}
* @lucene.internal */
public class IDFValueSource extends DocFreqValueSource {
public IDFValueSource(String field, String val, String indexedField, BytesRef indexedBytes) {
super(field, val, indexedField, indexedBytes);

View File

@ -30,6 +30,10 @@ import java.util.List;
import java.util.Map;
/**
* Depending on the boolean value of the <code>ifSource</code> function,
* returns the value of the <code>trueSource</code> or <code>falseSource</code> function.
*/
public class IfFunction extends BoolFunction {
private final ValueSource ifSource;
private final ValueSource trueSource;

View File

@ -17,6 +17,7 @@
package org.apache.lucene.queries.function.valuesource;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader; // javadocs
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.IndexSearcher;
@ -24,6 +25,11 @@ import org.apache.lucene.search.IndexSearcher;
import java.io.IOException;
import java.util.Map;
/**
* Returns the value of {@link IndexReader#maxDoc()}
* for every document. This is the number of documents
* including deletions.
*/
public class MaxDocValueSource extends ValueSource {
public String name() {
return "maxdoc";

View File

@ -28,6 +28,13 @@ import org.apache.lucene.search.similarities.TFIDFSimilarity;
import java.io.IOException;
import java.util.Map;
/**
* Function that returns {@link TFIDFSimilarity#decodeNormValue(byte)}
* for every document.
* <p>
* Note that the configured Similarity for the field must be
* a subclass of {@link TFIDFSimilarity}
* @lucene.internal */
public class NormValueSource extends ValueSource {
protected final String field;
public NormValueSource(String field) {

View File

@ -30,7 +30,10 @@ import java.io.IOException;
import java.util.Map;
/**
* <code>TotalTermFreqValueSource</code> returns the total term freq (sum of term freqs across all docuyments).
* <code>SumTotalTermFreqValueSource</code> returns the number of tokens.
* (sum of term freqs across all documents, across all terms).
* Returns -1 if frequencies were omitted for the field, or if
* the codec doesn't support this statistic.
* @lucene.internal
*/
public class SumTotalTermFreqValueSource extends ValueSource {

View File

@ -28,6 +28,13 @@ import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Map;
/**
* Function that returns {@link TFIDFSimilarity#tf(int)}
* for every document.
* <p>
* Note that the configured Similarity for the field must be
* a subclass of {@link TFIDFSimilarity}
* @lucene.internal */
public class TFValueSource extends TermFreqValueSource {
public TFValueSource(String field, String val, String indexedField, BytesRef indexedBytes) {
super(field, val, indexedField, indexedBytes);

View File

@ -26,6 +26,13 @@ import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Map;
/**
* Function that returns {@link DocsEnum#freq()} for the
* supplied term in every document.
* <p>
* If the term does not exist in the document, returns 0.
* If frequencies are omitted, returns 1.
*/
public class TermFreqValueSource extends DocFreqValueSource {
public TermFreqValueSource(String field, String val, String indexedField, BytesRef indexedBytes) {
super(field, val, indexedField, indexedBytes);

View File

@ -28,7 +28,10 @@ import java.io.IOException;
import java.util.Map;
/**
* <code>TotalTermFreqValueSource</code> returns the total term freq (sum of term freqs across all docuyments).
* <code>TotalTermFreqValueSource</code> returns the total term freq
* (sum of term freqs across all documents).
* Returns -1 if frequencies were omitted for the field, or if
* the codec doesn't support this statistic.
* @lucene.internal
*/
public class TotalTermFreqValueSource extends ValueSource {

View File

@ -29,7 +29,9 @@ import org.apache.lucene.util._TestUtil;
// a MockRemovesTokensTF, ideally subclassing FilteringTF
// (in modules/analysis)
// Randomly injects holes:
/**
* Randomly injects holes (similar to what a stopfilter would do)
*/
public final class MockHoleInjectingTokenFilter extends TokenFilter {
private final long randomSeed;

View File

@ -27,8 +27,9 @@ import java.io.Reader;
/**
*
*
* Wraps a whitespace tokenizer with a filter that sets
* the first token, and odd tokens to posinc=1, and all others
* to 0, encoding the position as pos: XXX in the payload.
**/
public final class MockPayloadAnalyzer extends Analyzer {

View File

@ -30,6 +30,7 @@ import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.TermsIndexReaderBase;
import org.apache.lucene.codecs.TermsIndexWriterBase;
import org.apache.lucene.codecs.lucene40.Lucene40Codec; // javadocs
import org.apache.lucene.codecs.lucene40.Lucene40PostingsReader;
import org.apache.lucene.codecs.lucene40.Lucene40PostingsWriter;
import org.apache.lucene.index.SegmentReadState;
@ -39,6 +40,10 @@ import org.apache.lucene.util.BytesRef;
// TODO: we could make separate base class that can wrapp
// any PostingsBaseFormat and make it ord-able...
/**
* Customized version of {@link Lucene40Codec} that uses
* {@link FixedGapTermsIndexWriter}.
*/
public class Lucene40WithOrds extends PostingsFormat {
public Lucene40WithOrds() {

View File

@ -72,6 +72,9 @@ public class MockFixedIntBlockPostingsFormat extends PostingsFormat {
return new MockIntFactory(blockSize);
}
/**
* Encodes blocks as vInts of a fixed block size.
*/
public static class MockIntFactory extends IntStreamFactory {
private final int blockSize;

View File

@ -70,6 +70,10 @@ public class MockVariableIntBlockPostingsFormat extends PostingsFormat {
return getName() + "(baseBlockSize="+ baseBlockSize + ")";
}
/**
* If the first value is <= 3, writes baseBlockSize vInts at once,
* otherwise writes 2*baseBlockSize vInts.
*/
public static class MockIntFactory extends IntStreamFactory {
private final int baseBlockSize;

View File

@ -25,7 +25,10 @@ import org.apache.lucene.codecs.sep.IntStreamFactory;
import java.io.IOException;
/** @lucene.experimental */
/**
* Encodes ints directly as vInts with {@link MockSingleIntIndexOutput}
* @lucene.experimental
*/
public class MockSingleIntFactory extends IntStreamFactory {
@Override
public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException {

View File

@ -28,7 +28,7 @@ import org.apache.lucene.store.IndexInput;
/** Reads IndexInputs written with {@link
* MockSingleIntIndexOutput}. NOTE: this class is just for
* demonstration puprposes (it is a very slow way to read a
* demonstration purposes (it is a very slow way to read a
* block of ints).
*
* @lucene.experimental
@ -54,6 +54,9 @@ public class MockSingleIntIndexInput extends IntIndexInput {
in.close();
}
/**
* Just reads a vInt directly from the file.
*/
public static class Reader extends IntIndexInput.Reader {
// clone:
private final IndexInput in;

View File

@ -68,7 +68,7 @@ public class AlcoholicMergePolicy extends LogMergePolicy {
return info.sizeInBytes();
}
public static enum Drink {
private static enum Drink {
Beer(15), Wine(17), Champagne(21), WhiteRussian(22), SingleMalt(30);
@ -77,11 +77,6 @@ public class AlcoholicMergePolicy extends LogMergePolicy {
Drink(long drunkFactor) {
this.drunkFactor = drunkFactor;
}
public long drunk() {
return drunkFactor;
}
}
}

View File

@ -23,6 +23,10 @@ import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Set;
/**
* A {@link FilterAtomicReader} that exposes only a subset
* of fields from the underlying wrapped reader.
*/
public final class FieldFilterAtomicReader extends FilterAtomicReader {
private final Set<String> fields;

View File

@ -42,9 +42,15 @@ import org.apache.lucene.util._TestUtil;
// - doc blocks? so we can test joins/grouping...
// - controlled consistency (NRTMgr)
/**
* Base test class for simulating distributed search across multiple shards.
*/
public abstract class ShardSearchingTestBase extends LuceneTestCase {
// TODO: maybe SLM should throw this instead of returning null...
/**
* Thrown when the lease for a searcher has expired.
*/
public static class SearcherExpiredException extends RuntimeException {
public SearcherExpiredException(String message) {
super(message);
@ -604,6 +610,9 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
}
}
/**
* An IndexSearcher and associated version (lease)
*/
protected static class SearcherAndVersion {
public final IndexSearcher searcher;
public final long version;

View File

@ -146,6 +146,12 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
preventDoubleWrite = value;
}
/**
* Enum for controlling hard disk throttling.
* Set via {@link MockDirectoryWrapper #setThrottling(Throttling)}
* <p>
* WARNING: can make tests very slow.
*/
public static enum Throttling {
/** always emulate a slow hard disk. could be very slow! */
ALWAYS,

View File

@ -24,13 +24,17 @@ import java.io.*;
*/
final class CloseableFile implements Closeable {
private final File file;
private final TestRuleMarkFailure failureMarker;
public CloseableFile(File file) {
public CloseableFile(File file, TestRuleMarkFailure failureMarker) {
this.file = file;
this.failureMarker = failureMarker;
}
@Override
public void close() throws IOException {
// only if there were no other test failures.
if (failureMarker.wasSuccessful()) {
if (file.exists()) {
try {
_TestUtil.rmDir(file);
@ -45,4 +49,5 @@ final class CloseableFile implements Closeable {
}
}
}
}
}

View File

@ -18,6 +18,7 @@ package org.apache.lucene.util;
*/
/**
* Converts numbers to english strings for testing.
* @lucene.internal
*/
public final class English {

View File

@ -26,6 +26,9 @@ package org.apache.lucene.util;
* @lucene.internal */
public abstract class RollingBuffer<T extends RollingBuffer.Resettable> {
/**
* Implement to reset an instance
*/
public static interface Resettable {
public void reset();
}

View File

@ -1,10 +1,5 @@
package org.apache.lucene.util;
import org.apache.lucene.search.FieldCache;
import org.junit.rules.TestRule;
import org.junit.runner.Description;
import org.junit.runners.model.Statement;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -22,6 +17,30 @@ import org.junit.runners.model.Statement;
* limitations under the License.
*/
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.FieldCacheSanityChecker; // javadocs
import org.junit.rules.TestRule;
import org.junit.runner.Description;
import org.junit.runners.model.Statement;
/**
* This rule will fail the test if it has insane field caches.
* <p>
* calling assertSaneFieldCaches here isn't as useful as having test
* classes call it directly from the scope where the index readers
* are used, because they could be gc'ed just before this tearDown
* method is called.
* <p>
* But it's better then nothing.
* <p>
* If you are testing functionality that you know for a fact
* "violates" FieldCache sanity, then you should either explicitly
* call purgeFieldCache at the end of your test method, or refactor
* your Test class so that the inconsistent FieldCache usages are
* isolated in distinct test methods
*
* @see FieldCacheSanityChecker
*/
public class TestRuleFieldCacheSanity implements TestRule {
@Override
@ -33,18 +52,6 @@ public class TestRuleFieldCacheSanity implements TestRule {
Throwable problem = null;
try {
// calling assertSaneFieldCaches here isn't as useful as having test
// classes call it directly from the scope where the index readers
// are used, because they could be gc'ed just before this tearDown
// method is called.
//
// But it's better then nothing.
//
// If you are testing functionality that you know for a fact
// "violates" FieldCache sanity, then you should either explicitly
// call purgeFieldCache at the end of your test method, or refactor
// your Test class so that the inconsistent FieldCache usages are
// isolated in distinct test methods
LuceneTestCase.assertSaneFieldCaches(d.getDisplayName());
} catch (Throwable t) {
problem = t;

View File

@ -21,6 +21,10 @@ import org.junit.rules.TestRule;
import org.junit.runner.Description;
import org.junit.runners.model.Statement;
/**
* Stores the suite name so you can retrieve it
* from {@link #getTestClass()}
*/
public class TestRuleStoreClassName implements TestRule {
private volatile Description description;

View File

@ -94,7 +94,7 @@ public class _TestUtil {
try {
File f = createTempFile(desc, "tmp", LuceneTestCase.TEMP_DIR);
f.delete();
LuceneTestCase.closeAfterSuite(new CloseableFile(f));
LuceneTestCase.closeAfterSuite(new CloseableFile(f, LuceneTestCase.suiteFailureMarker));
return f;
} catch (IOException e) {
throw new RuntimeException(e);
@ -136,7 +136,7 @@ public class _TestUtil {
rmDir(destDir);
destDir.mkdir();
LuceneTestCase.closeAfterSuite(new CloseableFile(destDir));
LuceneTestCase.closeAfterSuite(new CloseableFile(destDir, LuceneTestCase.suiteFailureMarker));
while (entries.hasMoreElements()) {
ZipEntry entry = entries.nextElement();

View File

@ -127,6 +127,13 @@ public class AutomatonTestUtil {
return code;
}
/**
* Lets you retrieve random strings accepted
* by an Automaton.
* <p>
* Once created, call {@link #getRandomAcceptedString(Random)}
* to get a new string (in UTF-32 codepoints).
*/
public static class RandomAcceptedStrings {
private final Map<Transition,Boolean> leadsToAccept;

View File

@ -26,6 +26,14 @@ $Id$
================== 4.0.0 ==================
Versions of Major Components
---------------------
Apache Tika 1.2
Carrot2 3.5.0
Velocity 1.6.4 and Velocity Tools 2.0
Apache UIMA 2.3.1
Apache ZooKeeper 3.3.6
Upgrading from Solr 4.0.0-BETA
----------------------
@ -63,6 +71,27 @@ Bug Fixes
* SOLR-3649: Fixed bug in JavabinLoader that caused deleteById(List<String> ids)
to not work in SolrJ (siren)
* SOLR-3730: Rollback is not implemented quite right and can cause corner case fails in
SolrCloud tests. (rmuir, Mark Miller)
* SOLR-2981: Fixed StatsComponent to no longer return duplicated information
when requesting multiple stats.facet fields.
(Roman Kliewer via hossman)
Other Changes
----------------------
* SOLR-3690: Fixed binary release packages to include dependencie needed for
the solr-test-framework (hossman)
* SOLR-2857: The /update/json and /update/csv URLs were restored to aid
in the migration of existing clients. (yonik)
* SOLR-3691: SimplePostTool: Mode for crawling/posting web pages
See http://wiki.apache.org/solr/ExtractingRequestHandler for examples (janhoy)
* SOLR-3707: Upgrade Solr to Tika 1.2 (janhoy)
================== 4.0.0-BETA ===================
@ -271,7 +300,6 @@ Other Changes
Also, the configuration itself can be passed using the "dataConfig" parameter rather than
using a file (this previously worked in debug mode only). When configuration errors are
encountered, the error message is returned in XML format. (James Dyer)
* SOLR-3439: Make SolrCell easier to use out of the box. Also improves "/browse" to display
rich-text documents correctly, along with facets for author and content_type.
With the new "content" field, highlighting of body is supported. See also SOLR-3672 for

View File

@ -310,12 +310,11 @@ Copyright 2004 Sun Microsystems, Inc. (Rome JAR)
Copyright 2002-2008 by John Cowan (TagSoup -- http://ccil.org/~cowan/XML/tagsoup/)
Copyright (C) 1999-2007 Shigeru Chiba. All Rights Reserved.
(Javassist, MPL licensed: http://www.csg.ci.i.u-tokyo.ac.jp/~chiba/javassist/)
Copyright (C) 1994-2007 by the Xiph.org Foundation, http://www.xiph.org/ (OggVorbis)
Scannotation (C) Bill Burke
Copyright 2012 Kohei Taketa juniversalchardet (http://code.google.com/p/juniversalchardet/)
Lasse Collin and others, XZ for Java (http://tukaani.org/xz/java.html)
=========================================================================
== Language Detection Notices ==

View File

@ -386,8 +386,9 @@
<tarfileset dir="."
prefix="${fullnamever}"
includes="LICENSE.txt NOTICE.txt CHANGES.txt README.txt example/**
client/README.txt client/ruby/solr-ruby/** contrib/**/lib/**
contrib/**/README.txt licenses/**"
client/README.txt client/ruby/solr-ruby/**
contrib/**/lib/** contrib/**/README.txt
licenses/**"
excludes="lib/README.committers.txt **/data/ **/logs/*
**/classes/ **/*.sh **/ivy.xml **/build.xml
**/bin/ **/*.iml **/*.ipr **/*.iws **/pom.xml
@ -401,7 +402,9 @@
includes="example/**/*.sh example/**/bin/" />
<tarfileset dir="."
prefix="${fullnamever}"
includes="dist/*.jar dist/*.war dist/solrj-lib/*"
includes="dist/*.jar dist/*.war
dist/solrj-lib/*
dist/test-framework/**"
excludes="**/*.tgz **/*.zip **/*.md5 **/*src*.jar **/*docs*.jar **/*.sha1" />
<tarfileset dir="${dest}/docs"
prefix="${fullnamever}/docs" />

View File

@ -193,7 +193,7 @@
<property name="lucenedocs" location="${common.dir}/build/docs"/>
<!-- dependency to ensure all lucene javadocs are present -->
<target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-analyzers-uima,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial"/>
<target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-analyzers-uima,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial,javadocs-test-framework"/>
<!-- create javadocs for the current module -->
<target name="javadocs" depends="compile-core,define-lucene-javadoc-url,lucene-javadocs">

View File

@ -20,36 +20,36 @@
<info organisation="org.apache.solr" module="extraction"/>
<dependencies>
<!-- Tika JARs -->
<dependency org="org.apache.tika" name="tika-core" rev="1.1" transitive="false"/>
<dependency org="org.apache.tika" name="tika-parsers" rev="1.1" transitive="false"/>
<!-- Tika dependencies - see http://tika.apache.org/1.1/gettingstarted.html#Using_Tika_as_a_Maven_dependency -->
<dependency org="org.apache.tika" name="tika-core" rev="1.2" transitive="false"/>
<dependency org="org.apache.tika" name="tika-parsers" rev="1.2" transitive="false"/>
<!-- Tika dependencies - see http://tika.apache.org/1.2/gettingstarted.html#Using_Tika_as_a_Maven_dependency -->
<!-- When upgrading Tika, upgrade dependencies versions and add any new ones
(except slf4j-api, commons-codec, commons-logging, geronimo-stax-api_1.0_spec) -->
<dependency org="org.gagravarr" name="vorbis-java-tika" rev="0.1" transitive="false"/>
<dependency org="org.gagravarr" name="vorbis-java-core" rev="0.1" transitive="false"/>
<dependency org="edu.ucar" name="netcdf" rev="4.2-min" transitive="false"/>
<dependency org="org.apache.james" name="apache-mime4j-core" rev="0.7" transitive="false"/>
<dependency org="org.apache.james" name="apache-mime4j-dom" rev="0.7" transitive="false"/>
<dependency org="org.apache.commons" name="commons-compress" rev="1.3" transitive="false"/>
<dependency org="org.apache.pdfbox" name="pdfbox" rev="1.6.0" transitive="false"/>
<dependency org="org.apache.pdfbox" name="fontbox" rev="1.6.0" transitive="false"/>
<dependency org="org.apache.pdfbox" name="jempbox" rev="1.6.0" transitive="false"/>
<dependency org="org.apache.james" name="apache-mime4j-core" rev="0.7.2" transitive="false"/>
<dependency org="org.apache.james" name="apache-mime4j-dom" rev="0.7.2" transitive="false"/>
<dependency org="org.apache.commons" name="commons-compress" rev="1.4.1" transitive="false"/>
<dependency org="org.apache.pdfbox" name="pdfbox" rev="1.7.0" transitive="false"/>
<dependency org="org.apache.pdfbox" name="fontbox" rev="1.7.0" transitive="false"/>
<dependency org="org.apache.pdfbox" name="jempbox" rev="1.7.0" transitive="false"/>
<dependency org="org.bouncycastle" name="bcmail-jdk15" rev="1.45" transitive="false"/>
<dependency org="org.bouncycastle" name="bcprov-jdk15" rev="1.45" transitive="false"/>
<dependency org="org.apache.poi" name="poi" rev="3.8-beta5" transitive="false"/>
<dependency org="org.apache.poi" name="poi-scratchpad" rev="3.8-beta5" transitive="false"/>
<dependency org="org.apache.poi" name="poi-ooxml" rev="3.8-beta5" transitive="false"/>
<dependency org="org.apache.poi" name="poi-ooxml-schemas" rev="3.8-beta5" transitive="false"/>
<dependency org="org.apache.poi" name="poi" rev="3.8" transitive="false"/>
<dependency org="org.apache.poi" name="poi-scratchpad" rev="3.8" transitive="false"/>
<dependency org="org.apache.poi" name="poi-ooxml" rev="3.8" transitive="false"/>
<dependency org="org.apache.poi" name="poi-ooxml-schemas" rev="3.8" transitive="false"/>
<dependency org="org.apache.xmlbeans" name="xmlbeans" rev="2.3.0" transitive="false"/>
<dependency org="dom4j" name="dom4j" rev="1.6.1" transitive="false"/>
<dependency org="org.ccil.cowan.tagsoup" name="tagsoup" rev="1.2.1" transitive="false"/>
<dependency org="com.googlecode.mp4parser" name="isoparser" rev="1.0-beta-5" transitive="false"/>
<dependency org="net.sf.scannotation" name="scannotation" rev="1.0.2" transitive="false"/>
<dependency org="javassist" name="javassist" rev="3.6.0.GA" transitive="false"/>
<dependency org="com.googlecode.mp4parser" name="isoparser" rev="1.0-RC-1" transitive="false"/>
<dependency org="com.drewnoakes" name="metadata-extractor" rev="2.4.0-beta-1" transitive="false"/>
<dependency org="de.l3s.boilerpipe" name="boilerpipe" rev="1.1.0" transitive="false"/>
<dependency org="rome" name="rome" rev="0.9" transitive="false"/>
<dependency org="jdom" name="jdom" rev="1.0" transitive="false"/>
<dependency org="com.googlecode.juniversalchardet" name="juniversalchardet" rev="1.0.3" transitive="false"/>
<dependency org="org.tukaani" name="xz" rev="1.0" transitive="false"/>
<!-- Other ExtracingRequestHandler dependencies -->
<dependency org="com.ibm.icu" name="icu4j" rev="49.1" transitive="false"/>
<dependency org="xerces" name="xercesImpl" rev="2.9.1" transitive="false"/>

View File

@ -64,8 +64,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
"fmap.producer", "extractedProducer",
"fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
"fmap.Creation-Date", "extractedDate",
"fmap.AAPL:Keywords", "ignored_a",
"fmap.xmpTPg:NPages", "ignored_a",
"uprefix", "ignored_",
"fmap.Author", "extractedAuthor",
"fmap.content", "extractedContent",
"literal.id", "one",
@ -81,6 +80,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
"fmap.Author", "extractedAuthor",
"fmap.language", "extractedLanguage",
"literal.id", "two",
"uprefix", "ignored_",
"fmap.content", "extractedContent",
"fmap.Last-Modified", "extractedDate"
);
@ -136,6 +136,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
"fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
"fmap.Author", "extractedAuthor",
"literal.id", "three",
"uprefix", "ignored_",
"fmap.content", "extractedContent",
"fmap.language", "extractedLanguage",
"fmap.Last-Modified", "extractedDate"
@ -206,6 +207,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
"fmap.Author", "extractedAuthor",
"fmap.content", "extractedContent",
"literal.id", "one",
"uprefix", "ignored_",
"fmap.language", "extractedLanguage",
"literal.extractionLiteralMV", "one",
"literal.extractionLiteralMV", "two",
@ -374,9 +376,8 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
loadLocal("extraction/arabic.pdf", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer",
"fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
"fmap.Creation-Date", "extractedDate",
"fmap.AAPL:Keywords", "ignored_a",
"fmap.xmpTPg:NPages", "ignored_a",
"fmap.Author", "extractedAuthor",
"uprefix", "ignored_",
"fmap.content", "wdf_nocase",
"literal.id", "one",
"fmap.Last-Modified", "extractedDate");
@ -404,8 +405,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
loadLocal("extraction/password-is-solrcell.docx", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer",
"fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
"fmap.Creation-Date", "extractedDate",
"fmap.AAPL:Keywords", "ignored_a",
"fmap.xmpTPg:NPages", "ignored_a",
"uprefix", "ignored_",
"fmap.Author", "extractedAuthor",
"fmap.content", "wdf_nocase",
"literal.id", "one",
@ -462,8 +462,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
"fmap.content", "extractedContent",
"fmap.language", "extractedLanguage",
"fmap.Creation-Date", "extractedDate",
"fmap.AAPL:Keywords", "ignored_a",
"fmap.xmpTPg:NPages", "ignored_a",
"uprefix", "ignored_",
"fmap.Last-Modified", "extractedDate");
// Here the literal value should override the Tika-parsed title:
@ -478,8 +477,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
"fmap.content", "extractedContent",
"fmap.language", "extractedLanguage",
"fmap.Creation-Date", "extractedDate",
"fmap.AAPL:Keywords", "ignored_a",
"fmap.xmpTPg:NPages", "ignored_a",
"uprefix", "ignored_",
"fmap.Last-Modified", "extractedDate");
// Here we mimic the old behaviour where literals are added, not overridden
@ -498,8 +496,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
"fmap.content", "extractedContent",
"fmap.language", "extractedLanguage",
"fmap.Creation-Date", "extractedDate",
"fmap.AAPL:Keywords", "ignored_a",
"fmap.xmpTPg:NPages", "ignored_a",
"uprefix", "ignored_",
"fmap.Last-Modified", "extractedDate");
assertU(commit());

View File

@ -125,6 +125,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
}
cacheValue.refCnt--;
if (cacheValue.refCnt == 0 && cacheValue.doneWithDir) {
log.info("Closing directory:" + cacheValue.path);
directory.close();
byDirectoryCache.remove(directory);
byPathCache.remove(cacheValue.path);
@ -194,6 +195,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
byDirectoryCache.put(directory, newCacheValue);
byPathCache.put(fullPath, newCacheValue);
log.info("return new directory for " + fullPath + " forceNew:" + forceNew);
} else {
cacheValue.refCnt++;
}

View File

@ -1554,7 +1554,7 @@ public final class SolrCore implements SolrInfoMBean {
} catch (Throwable e) {
// do not allow decref() operations to fail since they are typically called in finally blocks
// and throwing another exception would be very unexpected.
SolrException.log(log, "Error closing searcher:", e);
SolrException.log(log, "Error closing searcher:" + this, e);
}
}
};

View File

@ -29,7 +29,7 @@ public class CSVRequestHandler extends UpdateRequestHandler {
public void init(NamedList args) {
super.init(args);
setAssumeContentType("application/csv");
log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler");
// log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler");
}
//////////////////////// SolrInfoMBeans methods //////////////////////

View File

@ -29,7 +29,7 @@ public class JsonUpdateRequestHandler extends UpdateRequestHandler {
public void init(NamedList args) {
super.init(args);
setAssumeContentType("application/json");
log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler");
// log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler");
}
//////////////////////// SolrInfoMBeans methods //////////////////////

View File

@ -384,7 +384,7 @@ public class SnapPuller {
// may be closed
core.getDirectoryFactory().doneWithDirectory(oldDirectory);
}
doCommit();
doCommit(isFullCopyNeeded);
}
replicationStartTime = 0;
@ -533,11 +533,11 @@ public class SnapPuller {
return sb;
}
private void doCommit() throws IOException {
private void doCommit(boolean isFullCopyNeeded) throws IOException {
SolrQueryRequest req = new LocalSolrQueryRequest(solrCore,
new ModifiableSolrParams());
// reboot the writer on the new index and get a new searcher
solrCore.getUpdateHandler().newIndexWriter(true);
solrCore.getUpdateHandler().newIndexWriter(isFullCopyNeeded);
try {
// first try to open an NRT searcher so that the new

View File

@ -182,8 +182,8 @@ abstract class AbstractStatsValues<T> implements StatsValues {
for (Map.Entry<String, StatsValues> e2 : entry.getValue().entrySet()) {
nl2.add(e2.getKey(), e2.getValue().getStatsValues());
}
res.add(FACETS, nl);
}
res.add(FACETS, nl);
return res;
}

View File

@ -74,8 +74,7 @@ public final class DefaultSolrCoreState extends SolrCoreState {
}
if (indexWriter == null) {
indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2",
false, false);
indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2", false);
}
if (refCntWriter == null) {
refCntWriter = new RefCounted<IndexWriter>(indexWriter) {
@ -113,15 +112,25 @@ public final class DefaultSolrCoreState extends SolrCoreState {
try {
if (indexWriter != null) {
if (!rollback) {
try {
log.info("Closing old IndexWriter... core=" + coreName);
indexWriter.close();
} catch (Throwable t) {
SolrException.log(log, "Error closing old IndexWriter. core=" + coreName, t);
SolrException.log(log, "Error closing old IndexWriter. core="
+ coreName, t);
}
} else {
try {
log.info("Rollback old IndexWriter... core=" + coreName);
indexWriter.rollback();
} catch (Throwable t) {
SolrException.log(log, "Error rolling back old IndexWriter. core="
+ coreName, t);
}
}
indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2",
false, true);
}
indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2", true);
log.info("New IndexWriter is ready to be used.");
// we need to null this so it picks up the new writer next get call
refCntWriter = null;
@ -174,14 +183,12 @@ public final class DefaultSolrCoreState extends SolrCoreState {
@Override
public synchronized void rollbackIndexWriter(SolrCore core) throws IOException {
indexWriter.rollback();
newIndexWriter(core, true);
}
protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name,
boolean removeAllExisting, boolean forceNewDirectory) throws IOException {
protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name, boolean forceNewDirectory) throws IOException {
return new SolrIndexWriter(name, core.getNewIndexDir(),
core.getDirectoryFactory(), removeAllExisting, core.getSchema(),
core.getDirectoryFactory(), false, core.getSchema(),
core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec(), forceNewDirectory);
}

View File

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update;
import org.apache.solr.common.util.FastOutputStream;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
/** @lucene.internal */
public class MemOutputStream extends FastOutputStream {
public List<byte[]> buffers = new LinkedList<byte[]>();
public MemOutputStream(byte[] tempBuffer) {
super(null, tempBuffer, 0);
}
@Override
public void flush(byte[] arr, int offset, int len) throws IOException {
if (arr == buf && offset==0 && len==buf.length) {
buffers.add(buf); // steal the buffer
buf = new byte[8192];
} else if (len > 0) {
byte[] newBuf = new byte[len];
System.arraycopy(arr, offset, newBuf, 0, len);
buffers.add(newBuf);
}
}
public void writeAll(FastOutputStream fos) throws IOException {
for (byte[] buffer : buffers) {
fos.write(buffer);
}
if (pos > 0) {
fos.write(buf, 0, pos);
}
}
}

View File

@ -141,6 +141,8 @@ public class SolrIndexWriter extends IndexWriter {
super.rollback();
} finally {
isClosed = true;
directoryFactory.release(getDirectory());
numCloses.incrementAndGet();
}
}

View File

@ -775,31 +775,3 @@ class ChannelFastInputStream extends FastInputStream {
}
class MemOutputStream extends FastOutputStream {
public List<byte[]> buffers = new LinkedList<byte[]>();
public MemOutputStream(byte[] tempBuffer) {
super(null, tempBuffer, 0);
}
@Override
public void flush(byte[] arr, int offset, int len) throws IOException {
if (arr == buf && offset==0 && len==buf.length) {
buffers.add(buf); // steal the buffer
buf = new byte[8192];
} else if (len > 0) {
byte[] newBuf = new byte[len];
System.arraycopy(arr, offset, newBuf, 0, len);
buffers.add(newBuf);
}
}
public void writeAll(FastOutputStream fos) throws IOException {
for (byte[] buffer : buffers) {
fos.write(buffer);
}
if (pos > 0) {
fos.write(buf, 0, pos);
}
}
}

View File

@ -28,7 +28,7 @@ public class FastWriter extends Writer {
// it won't cause double buffering.
private static final int BUFSIZE = 8192;
protected final Writer sink;
protected final char[] buf;
protected char[] buf;
protected int pos;
public FastWriter(Writer w) {
@ -69,42 +69,64 @@ public class FastWriter extends Writer {
}
@Override
public void write(char cbuf[], int off, int len) throws IOException {
public void write(char arr[], int off, int len) throws IOException {
for(;;) {
int space = buf.length - pos;
if (len < space) {
System.arraycopy(cbuf, off, buf, pos, len);
if (len <= space) {
System.arraycopy(arr, off, buf, pos, len);
pos += len;
} else if (len<BUFSIZE) {
// if the data to write is small enough, buffer it.
System.arraycopy(cbuf, off, buf, pos, space);
flush(buf, 0, buf.length);
pos = len-space;
System.arraycopy(cbuf, off+space, buf, 0, pos);
} else {
return;
} else if (len > buf.length) {
if (pos>0) {
flush(buf,0,pos); // flush
pos=0;
}
// don't buffer, just write to sink
flush(cbuf, off, len);
flush(arr, off, len);
return;
}
// buffer is too big to fit in the free space, but
// not big enough to warrant writing on its own.
// write whatever we can fit, then flush and iterate.
System.arraycopy(arr, off, buf, pos, space);
flush(buf, 0, buf.length);
pos = 0;
off += space;
len -= space;
}
}
@Override
public void write(String str, int off, int len) throws IOException {
for(;;) {
int space = buf.length - pos;
if (len < space) {
if (len <= space) {
str.getChars(off, off+len, buf, pos);
pos += len;
} else if (len<BUFSIZE) {
// if the data to write is small enough, buffer it.
str.getChars(off, off+space, buf, pos);
flush(buf, 0, buf.length);
str.getChars(off+space, off+len, buf, 0);
pos = len-space;
} else {
return;
} else if (len > buf.length) {
if (pos>0) {
flush(buf,0,pos); // flush
pos=0;
}
// don't buffer, just write to sink
flush(str, off, len);
return;
}
// buffer is too big to fit in the free space, but
// not big enough to warrant writing on its own.
// write whatever we can fit, then flush and iterate.
str.getChars(off, off+space, buf, pos);
flush(buf, 0, buf.length);
pos = 0;
off += space;
len -= space;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,49 @@
<html>
<head>
<title>Welcome to Solr</title>
</head>
<body>
<p>
Here is some text
</p>
<div>Here is some text in a div</div>
<div>This has a <a href="http://www.apache.org">link</a>.</div>
<a href="#news">News</a>
<ul class="minitoc">
<li>
<a href="#03+October+2008+-+Solr+Logo+Contest">03 October 2008 - Solr Logo Contest</a>
</li>
<li>
<a href="#15+September+2008+-+Solr+1.3.0+Available">15 September 2008 - Solr 1.3.0 Available</a>
</li>
<li>
<a href="#28+August+2008+-+Lucene%2FSolr+at+ApacheCon+New+Orleans">28 August 2008 - Lucene/Solr at ApacheCon New Orleans</a>
</li>
<li>
<a href="#03+September+2007+-+Lucene+at+ApacheCon+Atlanta">03 September 2007 - Lucene at ApacheCon Atlanta</a>
</li>
<li>
<a href="#06+June+2007%3A+Release+1.2+available">06 June 2007: Release 1.2 available</a>
</li>
<li>
<a href="#17+January+2007%3A+Solr+graduates+from+Incubator">17 January 2007: Solr graduates from Incubator</a>
</li>
<li>
<a href="#22+December+2006%3A+Release+1.1.0+available">22 December 2006: Release 1.1.0 available</a>
</li>
<li>
<a href="#15+August+2006%3A+Solr+at+ApacheCon+US">15 August 2006: Solr at ApacheCon US</a>
</li>
<li>
<a href="#21+April+2006%3A+Solr+at+ApacheCon">21 April 2006: Solr at ApacheCon</a>
</li>
<li>
<a href="#21+February+2006%3A+nightly+builds">21 February 2006: nightly builds</a>
</li>
<li>
<a href="#17+January+2006%3A+Solr+Joins+Apache+Incubator">17 January 2006: Solr Joins Apache Incubator</a>
</li>
</ul>
</body>
</html>

View File

@ -0,0 +1,3 @@
Example text document
This is a simple example for a plain text document, indexed to Solr

View File

@ -54,7 +54,7 @@
-->
<maxBufferedDocs>10</maxBufferedDocs>
<mergePolicy class="org.apache.lucene.index.LogDocMergePolicy"/>
<lockType>single</lockType>
<lockType>native</lockType>
<unlockOnStartup>true</unlockOnStartup>
</indexConfig>

View File

@ -228,32 +228,35 @@ public class StatsComponentTest extends AbstractSolrTestCase {
}
public void doTestFacetStatisticsResult(String f) throws Exception {
assertU(adoc("id", "1", f, "10", "active_s", "true"));
assertU(adoc("id", "2", f, "20", "active_s", "true"));
assertU(adoc("id", "3", f, "30", "active_s", "false"));
assertU(adoc("id", "4", f, "40", "active_s", "false"));
assertU(adoc("id", "1", f, "10", "active_s", "true", "other_s", "foo"));
assertU(adoc("id", "2", f, "20", "active_s", "true", "other_s", "bar"));
assertU(adoc("id", "3", f, "30", "active_s", "false", "other_s", "foo"));
assertU(adoc("id", "4", f, "40", "active_s", "false", "other_s", "foo"));
assertU(commit());
assertQ("test value for active_s=true", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s","indent","true")
, "//lst[@name='true']/double[@name='min'][.='10.0']"
, "//lst[@name='true']/double[@name='max'][.='20.0']"
, "//lst[@name='true']/double[@name='sum'][.='30.0']"
, "//lst[@name='true']/long[@name='count'][.='2']"
, "//lst[@name='true']/long[@name='missing'][.='0']"
, "//lst[@name='true']/double[@name='sumOfSquares'][.='500.0']"
, "//lst[@name='true']/double[@name='mean'][.='15.0']"
, "//lst[@name='true']/double[@name='stddev'][.='7.0710678118654755']"
final String pre = "//lst[@name='stats_fields']/lst[@name='"+f+"']/lst[@name='facets']/lst[@name='active_s']";
assertQ("test value for active_s=true", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s","stats.facet","other_s","indent","true")
, "*[count("+pre+")=1]"
, pre+"/lst[@name='true']/double[@name='min'][.='10.0']"
, pre+"/lst[@name='true']/double[@name='max'][.='20.0']"
, pre+"/lst[@name='true']/double[@name='sum'][.='30.0']"
, pre+"/lst[@name='true']/long[@name='count'][.='2']"
, pre+"/lst[@name='true']/long[@name='missing'][.='0']"
, pre+"/lst[@name='true']/double[@name='sumOfSquares'][.='500.0']"
, pre+"/lst[@name='true']/double[@name='mean'][.='15.0']"
, pre+"/lst[@name='true']/double[@name='stddev'][.='7.0710678118654755']"
);
assertQ("test value for active_s=false", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s")
, "//lst[@name='false']/double[@name='min'][.='30.0']"
, "//lst[@name='false']/double[@name='max'][.='40.0']"
, "//lst[@name='false']/double[@name='sum'][.='70.0']"
, "//lst[@name='false']/long[@name='count'][.='2']"
, "//lst[@name='false']/long[@name='missing'][.='0']"
, "//lst[@name='false']/double[@name='sumOfSquares'][.='2500.0']"
, "//lst[@name='false']/double[@name='mean'][.='35.0']"
, "//lst[@name='false']/double[@name='stddev'][.='7.0710678118654755']"
, pre+"/lst[@name='false']/double[@name='min'][.='30.0']"
, pre+"/lst[@name='false']/double[@name='max'][.='40.0']"
, pre+"/lst[@name='false']/double[@name='sum'][.='70.0']"
, pre+"/lst[@name='false']/long[@name='count'][.='2']"
, pre+"/lst[@name='false']/long[@name='missing'][.='0']"
, pre+"/lst[@name='false']/double[@name='sumOfSquares'][.='2500.0']"
, pre+"/lst[@name='false']/double[@name='mean'][.='35.0']"
, pre+"/lst[@name='false']/double[@name='stddev'][.='7.0710678118654755']"
);
}

View File

@ -0,0 +1,237 @@
package org.apache.solr.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.util.SimplePostTool.PageFetcher;
import org.apache.solr.util.SimplePostTool.PageFetcherResult;
import org.junit.Before;
import org.junit.Test;
public class SimplePostToolTest extends SolrTestCaseJ4 {
SimplePostTool t_file, t_file_auto, t_file_rec, t_web, t_test;
PageFetcher pf;
@Before
public void setUp() throws Exception {
super.setUp();
String[] args = {"-"};
System.setProperty("data", "files");
t_file = SimplePostTool.parseArgsAndInit(args);
System.setProperty("auto", "yes");
t_file_auto = SimplePostTool.parseArgsAndInit(args);
System.setProperty("recursive", "yes");
t_file_rec = SimplePostTool.parseArgsAndInit(args);
System.setProperty("data", "web");
t_web = SimplePostTool.parseArgsAndInit(args);
System.setProperty("params", "param1=foo&param2=bar");
t_test = SimplePostTool.parseArgsAndInit(args);
pf = new MockPageFetcher();
SimplePostTool.pageFetcher = pf;
SimplePostTool.mockMode = true;
}
@Test
public void testParseArgsAndInit() {
assertEquals(false, t_file.auto);
assertEquals(true, t_file_auto.auto);
assertEquals(0, t_file_auto.recursive);
assertEquals(999, t_file_rec.recursive);
assertEquals(true, t_file.commit);
assertEquals(false, t_file.optimize);
assertEquals(null, t_file.out);
assertEquals(1, t_web.recursive);
assertEquals(10, t_web.delay);
assertNotNull(t_test.solrUrl);
}
@Test
public void testNormalizeUrlEnding() {
assertEquals("http://example.com", SimplePostTool.normalizeUrlEnding("http://example.com/"));
assertEquals("http://example.com", SimplePostTool.normalizeUrlEnding("http://example.com/#foo?bar=baz"));
assertEquals("http://example.com/index.html", SimplePostTool.normalizeUrlEnding("http://example.com/index.html#hello"));
}
@Test
public void testComputeFullUrl() throws MalformedURLException {
assertEquals("http://example.com/index.html", t_web.computeFullUrl(new URL("http://example.com/"), "/index.html"));
assertEquals("http://example.com/index.html", t_web.computeFullUrl(new URL("http://example.com/foo/bar/"), "/index.html"));
assertEquals("http://example.com/fil.html", t_web.computeFullUrl(new URL("http://example.com/foo.htm?baz#hello"), "fil.html"));
// TODO: How to know what is the base if URL path ends with "foo"??
// assertEquals("http://example.com/fil.html", t_web.computeFullUrl(new URL("http://example.com/foo?baz#hello"), "fil.html"));
assertEquals(null, t_web.computeFullUrl(new URL("http://example.com/"), "fil.jpg"));
assertEquals(null, t_web.computeFullUrl(new URL("http://example.com/"), "mailto:hello@foo.bar"));
assertEquals(null, t_web.computeFullUrl(new URL("http://example.com/"), "ftp://server/file"));
}
@Test
public void testTypeSupported() {
assertTrue(t_web.typeSupported("application/pdf"));
assertTrue(t_web.typeSupported("text/xml"));
assertFalse(t_web.typeSupported("text/foo"));
t_web.fileTypes = "doc,xls,ppt";
t_web.globFileFilter = t_web.getFileFilterFromFileTypes(t_web.fileTypes);
assertFalse(t_web.typeSupported("application/pdf"));
assertTrue(t_web.typeSupported("application/msword"));
}
@Test
public void testIsOn() {
assertTrue(SimplePostTool.isOn("true"));
assertTrue(SimplePostTool.isOn("1"));
assertFalse(SimplePostTool.isOn("off"));
}
@Test
public void testAppendParam() {
assertEquals("http://example.com?foo=bar", SimplePostTool.appendParam("http://example.com", "foo=bar"));
assertEquals("http://example.com/?a=b&foo=bar", SimplePostTool.appendParam("http://example.com/?a=b", "foo=bar"));
}
@Test
public void testAppendUrlPath() throws MalformedURLException {
assertEquals(new URL("http://example.com/a?foo=bar"), SimplePostTool.appendUrlPath(new URL("http://example.com?foo=bar"), "/a"));
}
@Test
public void testGuessType() {
File f = new File("foo.doc");
assertEquals("application/msword", SimplePostTool.guessType(f));
f = new File("foobar");
assertEquals(null, SimplePostTool.guessType(f));
}
@Test
public void testDoFilesMode() {
t_file_auto.recursive = 0;
File dir = getFile("exampledocs");
int num = t_file_auto.postFiles(new File[] {dir}, 0, null, null);
assertEquals(2, num);
}
@Test
public void testDoWebMode() {
// Uses mock pageFetcher
t_web.delay = 0;
t_web.recursive = 5;
int num = t_web.postWebPages(new String[] {"http://example.com/#removeme"}, 0, null);
assertEquals(5, num);
t_web.recursive = 1;
num = t_web.postWebPages(new String[] {"http://example.com/"}, 0, null);
assertEquals(3, num);
// Without respecting robots.txt
SimplePostTool.pageFetcher.robotsCache.clear();
t_web.recursive = 5;
num = t_web.postWebPages(new String[] {"http://example.com/#removeme"}, 0, null);
assertEquals(6, num);
}
@Test
public void testRobotsExclusion() throws MalformedURLException {
assertFalse(SimplePostTool.pageFetcher.isDisallowedByRobots(new URL("http://example.com/")));
assertTrue(SimplePostTool.pageFetcher.isDisallowedByRobots(new URL("http://example.com/disallowed")));
assertTrue("There should be two entries parsed from robots.txt", SimplePostTool.pageFetcher.robotsCache.get("example.com").size() == 2);
}
class MockPageFetcher extends PageFetcher {
HashMap<String,String> htmlMap = new HashMap<String,String>();
HashMap<String,Set<URL>> linkMap = new HashMap<String,Set<URL>>();
public MockPageFetcher() throws IOException {
(new SimplePostTool()).super();
htmlMap.put("http://example.com", "<html><body><a href=\"http://example.com/page1\">page1</a><a href=\"http://example.com/page2\">page2</a></body></html>");
htmlMap.put("http://example.com/index.html", "<html><body><a href=\"http://example.com/page1\">page1</a><a href=\"http://example.com/page2\">page2</a></body></html>");
htmlMap.put("http://example.com/page1", "<html><body><a href=\"http://example.com/page1/foo\"></body></html>");
htmlMap.put("http://example.com/page1/foo", "<html><body><a href=\"http://example.com/page1/foo/bar\"></body></html>");
htmlMap.put("http://example.com/page1/foo/bar", "<html><body><a href=\"http://example.com/page1\"></body></html>");
htmlMap.put("http://example.com/page2", "<html><body><a href=\"http://example.com/\"><a href=\"http://example.com/disallowed\"/></body></html>");
htmlMap.put("http://example.com/disallowed", "<html><body><a href=\"http://example.com/\"></body></html>");
Set<URL> s = new HashSet<URL>();
s.add(new URL("http://example.com/page1"));
s.add(new URL("http://example.com/page2"));
linkMap.put("http://example.com", s);
linkMap.put("http://example.com/index.html", s);
s = new HashSet<URL>();
s.add(new URL("http://example.com/page1/foo"));
linkMap.put("http://example.com/page1", s);
s = new HashSet<URL>();
s.add(new URL("http://example.com/page1/foo/bar"));
linkMap.put("http://example.com/page1/foo", s);
s = new HashSet<URL>();
s.add(new URL("http://example.com/disallowed"));
linkMap.put("http://example.com/page2", s);
// Simulate a robots.txt file with comments and a few disallows
StringBuilder sb = new StringBuilder();
sb.append("# Comments appear after the \"#\" symbol at the start of a line, or after a directive\n");
sb.append("User-agent: * # match all bots\n");
sb.append("Disallow: # This is void\n");
sb.append("Disallow: /disallow # Disallow this path\n");
sb.append("Disallow: /nonexistingpath # Disallow this path\n");
this.robotsCache.put("example.com", SimplePostTool.pageFetcher.
parseRobotsTxt(new ByteArrayInputStream(sb.toString().getBytes("UTF-8"))));
}
@Override
public PageFetcherResult readPageFromUrl(URL u) {
PageFetcherResult res = (new SimplePostTool()).new PageFetcherResult();
if (isDisallowedByRobots(u)) {
res.httpStatus = 403;
return res;
}
res.httpStatus = 200;
res.contentType = "text/html";
try {
res.content = htmlMap.get(u.toString()).getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException();
}
return res;
}
@Override
public Set<URL> getLinksFromWebPage(URL u, InputStream is, String type, URL postUrl) {
Set<URL> s = linkMap.get(SimplePostTool.normalizeUrlEnding(u.toString()));
if(s == null)
s = new HashSet<URL>();
return s;
}
}
}

Some files were not shown because too many files have changed in this diff Show More