LUCENE-3892: merge trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1374578 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-08-18 13:35:11 +00:00
commit bfcd96c689
169 changed files with 3911 additions and 720 deletions

View File

@ -145,21 +145,11 @@
<classpathentry kind="lib" path="solr/contrib/extraction/lib/bcmail-jdk15-1.45.jar"/> <classpathentry kind="lib" path="solr/contrib/extraction/lib/bcmail-jdk15-1.45.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/bcprov-jdk15-1.45.jar"/> <classpathentry kind="lib" path="solr/contrib/extraction/lib/bcprov-jdk15-1.45.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/boilerpipe-1.1.0.jar"/> <classpathentry kind="lib" path="solr/contrib/extraction/lib/boilerpipe-1.1.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/commons-compress-1.3.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/dom4j-1.6.1.jar"/> <classpathentry kind="lib" path="solr/contrib/extraction/lib/dom4j-1.6.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/fontbox-1.6.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/jempbox-1.6.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/metadata-extractor-2.4.0-beta-1.jar"/> <classpathentry kind="lib" path="solr/contrib/extraction/lib/metadata-extractor-2.4.0-beta-1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/netcdf-4.2-min.jar"/> <classpathentry kind="lib" path="solr/contrib/extraction/lib/netcdf-4.2-min.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/pdfbox-1.6.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-3.8-beta5.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-ooxml-3.8-beta5.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-ooxml-schemas-3.8-beta5.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-scratchpad-3.8-beta5.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/rome-0.9.jar"/> <classpathentry kind="lib" path="solr/contrib/extraction/lib/rome-0.9.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tagsoup-1.2.1.jar"/> <classpathentry kind="lib" path="solr/contrib/extraction/lib/tagsoup-1.2.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-core-1.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-parsers-1.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/xmlbeans-2.3.0.jar"/> <classpathentry kind="lib" path="solr/contrib/extraction/lib/xmlbeans-2.3.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/langid/lib/langdetect-1.1-20120112.jar"/> <classpathentry kind="lib" path="solr/contrib/langid/lib/langdetect-1.1-20120112.jar"/>
<classpathentry kind="lib" path="solr/contrib/langid/lib/jsonic-1.2.7.jar"/> <classpathentry kind="lib" path="solr/contrib/langid/lib/jsonic-1.2.7.jar"/>
@ -175,5 +165,25 @@
<classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-collections-3.2.1.jar"/> <classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-collections-3.2.1.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="lucene/test-framework/lib/randomizedtesting-runner-2.0.0.rc5.jar"/> <classpathentry kind="lib" path="lucene/test-framework/lib/randomizedtesting-runner-2.0.0.rc5.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/apache-mime4j-core-0.7.2.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/apache-mime4j-dom-0.7.2.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/commons-compress-1.4.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/fontbox-1.7.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/icu4j-49.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/isoparser-1.0-RC-1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/jdom-1.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/jempbox-1.7.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/juniversalchardet-1.0.3.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/pdfbox-1.7.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-3.8.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-ooxml-3.8.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-ooxml-schemas-3.8.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-scratchpad-3.8.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-core-1.2.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-parsers-1.2.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/vorbis-java-core-0.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/vorbis-java-tika-0.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/xercesImpl-2.9.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/extraction/lib/xz-1.0.jar"/>
<classpathentry kind="output" path="bin/other"/> <classpathentry kind="output" path="bin/other"/>
</classpath> </classpath>

View File

@ -75,6 +75,14 @@ Bug Fixes
encoders / stemmers via the ResourceLoader now instead of Class.forName(). encoders / stemmers via the ResourceLoader now instead of Class.forName().
Solr users should now no longer have to embed these in its war. (David Smiley) Solr users should now no longer have to embed these in its war. (David Smiley)
* SOLR-3737: StempelPolishStemFilterFactory loaded its stemmer table incorrectly.
Also, ensure immutability and use only one instance of this table in RAM (lazy
loaded) since its quite large. (sausarkar, Steven Rowe, Robert Muir)
* LUCENE-4310: MappingCharFilter was failing to match input strings
containing non-BMP Unicode characters. (Dawid Weiss, Robert Muir,
Mike McCandless)
Build Build
* LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for * LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for

View File

@ -111,9 +111,8 @@ public class NormalizeCharMap {
final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs); final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
final IntsRef scratch = new IntsRef(); final IntsRef scratch = new IntsRef();
for(Map.Entry<String,String> ent : pendingPairs.entrySet()) { for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
builder.add(Util.toUTF32(ent.getKey(), scratch), builder.add(Util.toUTF16(ent.getKey(), scratch),
new CharsRef(ent.getValue())); new CharsRef(ent.getValue()));
} }
map = builder.finish(); map = builder.finish();
pendingPairs.clear(); pendingPairs.clear();

View File

@ -33,6 +33,7 @@ import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil; import org.apache.lucene.util._TestUtil;
public class TestMappingCharFilter extends BaseTokenStreamTestCase { public class TestMappingCharFilter extends BaseTokenStreamTestCase {
@ -55,6 +56,11 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
builder.add( "empty", "" ); builder.add( "empty", "" );
// BMP (surrogate pair):
builder.add(UnicodeUtil.newString(new int[] {0x1D122}, 0, 1), "fclef");
builder.add("\uff01", "full-width-exclamation");
normMap = builder.build(); normMap = builder.build();
} }
@ -128,6 +134,18 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
assertTokenStreamContents(ts, new String[0], new int[]{}, new int[]{}, 5); assertTokenStreamContents(ts, new String[0], new int[]{}, new int[]{}, 5);
} }
public void testNonBMPChar() throws Exception {
CharFilter cs = new MappingCharFilter( normMap, new StringReader( UnicodeUtil.newString(new int[] {0x1D122}, 0, 1) ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"fclef"}, new int[]{0}, new int[]{2}, 2);
}
public void testFullWidthChar() throws Exception {
CharFilter cs = new MappingCharFilter( normMap, new StringReader( "\uff01") );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"full-width-exclamation"}, new int[]{0}, new int[]{1}, 1);
}
// //
// 1111111111222 // 1111111111222
// 01234567890123456789012 // 01234567890123456789012

View File

@ -58,6 +58,13 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
return DefaultsHolder.DEFAULT_STOP_SET; return DefaultsHolder.DEFAULT_STOP_SET;
} }
/**
* Returns an unmodifiable instance of the default stemmer table.
*/
public static Trie getDefaultTable() {
return DefaultsHolder.DEFAULT_TABLE;
}
/** /**
* Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
* accesses the static final set the first time.; * accesses the static final set the first time.;

View File

@ -17,28 +17,17 @@ package org.apache.lucene.analysis.stempel;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pl.PolishAnalyzer;
import org.apache.lucene.analysis.stempel.StempelFilter; import org.apache.lucene.analysis.stempel.StempelFilter;
import org.apache.lucene.analysis.stempel.StempelStemmer; import org.apache.lucene.analysis.stempel.StempelStemmer;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.egothor.stemmer.Trie;
/** /**
* Factory for {@link StempelFilter} using a Polish stemming table. * Factory for {@link StempelFilter} using a Polish stemming table.
*/ */
public class StempelPolishStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware { public class StempelPolishStemFilterFactory extends TokenFilterFactory {
private Trie stemmer = null;
private static final String STEMTABLE = "/org/apache/lucene/analysis/pl/stemmer_20000.tbl";
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new StempelFilter(input, new StempelStemmer(stemmer)); return new StempelFilter(input, new StempelStemmer(PolishAnalyzer.getDefaultTable()));
}
public void inform(ResourceLoader loader) throws IOException {
stemmer = StempelStemmer.load(loader.openResource(STEMTABLE));
} }
} }

View File

@ -332,7 +332,7 @@ public class Trie {
* @param key the key * @param key the key
* @param cmd the patch command * @param cmd the patch command
*/ */
public void add(CharSequence key, CharSequence cmd) { void add(CharSequence key, CharSequence cmd) {
if (key == null || cmd == null) { if (key == null || cmd == null) {
return; return;
} }

View File

@ -22,7 +22,6 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
/** /**
* Tests for {@link StempelPolishStemFilterFactory} * Tests for {@link StempelPolishStemFilterFactory}
@ -31,7 +30,6 @@ public class TestStempelPolishStemFilterFactory extends BaseTokenStreamTestCase
public void testBasics() throws Exception { public void testBasics() throws Exception {
StringReader document = new StringReader("studenta studenci"); StringReader document = new StringReader("studenta studenci");
StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory(); StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory();
factory.inform(new ClasspathResourceLoader(getClass()));
TokenStream ts = factory.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, document)); TokenStream ts = factory.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, document));
assertTokenStreamContents(ts, assertTokenStreamContents(ts,
new String[] { "student", "student" }); new String[] { "student", "student" });

View File

@ -234,10 +234,10 @@
<check-missing-javadocs dir="build/docs/analyzers-stempel" level="class"/> <check-missing-javadocs dir="build/docs/analyzers-stempel" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-uima" level="class"/> <check-missing-javadocs dir="build/docs/analyzers-uima" level="class"/>
<check-missing-javadocs dir="build/docs/benchmark" level="class"/> <check-missing-javadocs dir="build/docs/benchmark" level="class"/>
<!-- core: problems --> <check-missing-javadocs dir="build/docs/core" level="class"/>
<check-missing-javadocs dir="build/docs/demo" level="class"/> <check-missing-javadocs dir="build/docs/demo" level="class"/>
<check-missing-javadocs dir="build/docs/facet" level="class"/> <check-missing-javadocs dir="build/docs/facet" level="class"/>
<!-- grouping: problems --> <check-missing-javadocs dir="build/docs/grouping" level="class"/>
<!-- highlighter: problems --> <!-- highlighter: problems -->
<check-missing-javadocs dir="build/docs/join" level="class"/> <check-missing-javadocs dir="build/docs/join" level="class"/>
<check-missing-javadocs dir="build/docs/memory" level="class"/> <check-missing-javadocs dir="build/docs/memory" level="class"/>
@ -247,7 +247,7 @@
<check-missing-javadocs dir="build/docs/sandbox" level="class"/> <check-missing-javadocs dir="build/docs/sandbox" level="class"/>
<!-- spatial: problems --> <!-- spatial: problems -->
<check-missing-javadocs dir="build/docs/suggest" level="class"/> <check-missing-javadocs dir="build/docs/suggest" level="class"/>
<!-- test-framework: problems --> <check-missing-javadocs dir="build/docs/test-framework" level="class"/>
</sequential> </sequential>
</target> </target>

View File

@ -26,11 +26,15 @@ import org.apache.lucene.index.TermState;
* terms dict. * terms dict.
*/ */
public class BlockTermState extends OrdTermState { public class BlockTermState extends OrdTermState {
public int docFreq; // how many docs have this term /** how many docs have this term */
public long totalTermFreq; // total number of occurrences of this term public int docFreq;
/** total number of occurrences of this term */
public long totalTermFreq;
public int termBlockOrd; // the term's ord in the current block /** the term's ord in the current block */
public long blockFilePointer; // fp into the terms dict primary file (_X.tim) that holds this term public int termBlockOrd;
/** fp into the terms dict primary file (_X.tim) that holds this term */
public long blockFilePointer;
@Override @Override
public void copyFrom(TermState _other) { public void copyFrom(TermState _other) {

View File

@ -36,7 +36,7 @@ import org.apache.lucene.util.MathUtil;
*/ */
public abstract class MultiLevelSkipListReader { public abstract class MultiLevelSkipListReader {
// the maximum number of skip levels possible for this index /** the maximum number of skip levels possible for this index */
protected int maxNumberOfSkipLevels; protected int maxNumberOfSkipLevels;
// number of levels in this skip list // number of levels in this skip list

View File

@ -52,7 +52,7 @@ import org.apache.lucene.util.MathUtil;
*/ */
public abstract class MultiLevelSkipListWriter { public abstract class MultiLevelSkipListWriter {
// number of levels in this skip list /** number of levels in this skip list */
protected int numberOfSkipLevels; protected int numberOfSkipLevels;
// the skip interval in the list with level = 0 // the skip interval in the list with level = 0
@ -93,8 +93,8 @@ public abstract class MultiLevelSkipListWriter {
} }
} }
/** creates new buffers or empties the existing ones */
protected void resetSkip() { protected void resetSkip() {
// creates new buffers or empties the existing ones
if (skipBuffer == null) { if (skipBuffer == null) {
init(); init();
} else { } else {

View File

@ -1796,7 +1796,7 @@ public class DirectPostingsFormat extends PostingsFormat {
} }
// Docs + freqs: // Docs + freqs:
public final static class HighFreqDocsEnum extends DocsEnum { private final static class HighFreqDocsEnum extends DocsEnum {
private int[] docIDs; private int[] docIDs;
private int[] freqs; private int[] freqs;
private final Bits liveDocs; private final Bits liveDocs;
@ -1969,7 +1969,7 @@ public class DirectPostingsFormat extends PostingsFormat {
} }
// TODO: specialize offsets and not // TODO: specialize offsets and not
public final static class HighFreqDocsAndPositionsEnum extends DocsAndPositionsEnum { private final static class HighFreqDocsAndPositionsEnum extends DocsAndPositionsEnum {
private int[] docIDs; private int[] docIDs;
private int[] freqs; private int[] freqs;
private int[][] positions; private int[][] positions;

View File

@ -36,7 +36,7 @@ public abstract class IntIndexInput implements Closeable {
public abstract Index index() throws IOException; public abstract Index index() throws IOException;
// TODO: -- can we simplify this? /** Records a single skip-point in the {@link IntIndexInput.Reader}. */
public abstract static class Index { public abstract static class Index {
public abstract void read(DataInput indexIn, boolean absolute) throws IOException; public abstract void read(DataInput indexIn, boolean absolute) throws IOException;
@ -50,6 +50,7 @@ public abstract class IntIndexInput implements Closeable {
public abstract Index clone(); public abstract Index clone();
} }
/** Reads int values. */
public abstract static class Reader { public abstract static class Reader {
/** Reads next single int */ /** Reads next single int */

View File

@ -38,6 +38,7 @@ public abstract class IntIndexOutput implements Closeable {
* >= 0. */ * >= 0. */
public abstract void write(int v) throws IOException; public abstract void write(int v) throws IOException;
/** Records a single skip-point in the IndexOutput. */
public abstract static class Index { public abstract static class Index {
/** Internally records the current location */ /** Internally records the current location */

View File

@ -22,8 +22,15 @@ import org.apache.lucene.store.IOContext;
import java.io.IOException; import java.io.IOException;
/** @lucene.experimental */ /** Provides int reader and writer to specified files.
*
* @lucene.experimental */
public abstract class IntStreamFactory { public abstract class IntStreamFactory {
/** Create an {@link IntIndexInput} on the provided
* fileName. */
public abstract IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException; public abstract IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException;
/** Create an {@link IntIndexOutput} on the provided
* fileName. */
public abstract IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException; public abstract IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException;
} }

View File

@ -119,10 +119,13 @@ public class DocTermOrds {
protected final String field; protected final String field;
protected int numTermsInField; protected int numTermsInField;
protected long termInstances; // total number of references to term numbers /** total number of references to term numbers */
protected long termInstances;
private long memsz; private long memsz;
protected int total_time; // total time to uninvert the field /** total time to uninvert the field */
protected int phase1_time; // time for phase1 of the uninvert process protected int total_time;
/** time for phase1 of the uninvert process */
protected int phase1_time;
protected int[] index; protected int[] index;
protected byte[][] tnums = new byte[256][]; protected byte[][] tnums = new byte[256][];
@ -234,7 +237,7 @@ public class DocTermOrds {
protected void setActualDocFreq(int termNum, int df) throws IOException { protected void setActualDocFreq(int termNum, int df) throws IOException {
} }
// Call this only once (if you subclass!) /** Call this only once (if you subclass!) */
protected void uninvert(final AtomicReader reader, final BytesRef termPrefix) throws IOException { protected void uninvert(final AtomicReader reader, final BytesRef termPrefix) throws IOException {
//System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix); //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
final long startTime = System.currentTimeMillis(); final long startTime = System.currentTimeMillis();

View File

@ -267,11 +267,11 @@ public class FieldInfos implements Iterable<FieldInfo> {
return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType); return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType);
} }
// NOTE: this method does not carry over termVector /** NOTE: this method does not carry over termVector
// booleans nor docValuesType; the indexer chain * booleans nor docValuesType; the indexer chain
// (TermVectorsConsumerPerField, DocFieldProcessor) must * (TermVectorsConsumerPerField, DocFieldProcessor) must
// set these fields when they succeed in consuming * set these fields when they succeed in consuming
// the document: * the document */
public FieldInfo addOrUpdate(String name, IndexableFieldType fieldType) { public FieldInfo addOrUpdate(String name, IndexableFieldType fieldType) {
// TODO: really, indexer shouldn't even call this // TODO: really, indexer shouldn't even call this
// method (it's only called from DocFieldProcessor); // method (it's only called from DocFieldProcessor);

View File

@ -243,6 +243,10 @@ public abstract class MergePolicy implements java.io.Closeable, Cloneable {
} }
} }
/** Thrown when a merge was explicity aborted because
* {@link IndexWriter#close(boolean)} was called with
* <code>false</code>. Normally this exception is
* privately caught and suppresed by {@link IndexWriter}. */
public static class MergeAbortedException extends IOException { public static class MergeAbortedException extends IOException {
public MergeAbortedException() { public MergeAbortedException() {
super("merge is aborted"); super("merge is aborted");

View File

@ -29,6 +29,9 @@ import org.apache.lucene.util.packed.PackedInts;
* @lucene.experimental */ * @lucene.experimental */
public class MergeState { public class MergeState {
/**
* Remaps docids around deletes during merge
*/
public static abstract class DocMap { public static abstract class DocMap {
private final Bits liveDocs; private final Bits liveDocs;
@ -197,6 +200,9 @@ public class MergeState {
public SegmentReader[] matchingSegmentReaders; public SegmentReader[] matchingSegmentReaders;
public int matchedCount; public int matchedCount;
/**
* Class for recording units of work when merging segments.
*/
public static class CheckAbort { public static class CheckAbort {
private double workCount; private double workCount;
private final MergePolicy.OneMerge merge; private final MergePolicy.OneMerge merge;

View File

@ -43,7 +43,7 @@ import org.apache.lucene.util.packed.PackedInts.Reader;
* @lucene.experimental * @lucene.experimental
* @lucene.internal * @lucene.internal
*/ */
public class MultiDocValues extends DocValues { class MultiDocValues extends DocValues {
private static DocValuesPuller DEFAULT_PULLER = new DocValuesPuller(); private static DocValuesPuller DEFAULT_PULLER = new DocValuesPuller();
private static final DocValuesPuller NORMS_PULLER = new DocValuesPuller() { private static final DocValuesPuller NORMS_PULLER = new DocValuesPuller() {

View File

@ -143,6 +143,8 @@ public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
} }
// TODO: implement bulk read more efficiently than super // TODO: implement bulk read more efficiently than super
/** Holds a {@link DocsAndPositionsEnum} along with the
* corresponding {@link ReaderSlice}. */
public final static class EnumWithSlice { public final static class EnumWithSlice {
public DocsAndPositionsEnum docsAndPositionsEnum; public DocsAndPositionsEnum docsAndPositionsEnum;
public ReaderSlice slice; public ReaderSlice slice;

View File

@ -122,6 +122,8 @@ public final class MultiDocsEnum extends DocsEnum {
} }
// TODO: implement bulk read more efficiently than super // TODO: implement bulk read more efficiently than super
/** Holds a {@link DocsEnum} along with the
* corresponding {@link ReaderSlice}. */
public final static class EnumWithSlice { public final static class EnumWithSlice {
public DocsEnum docsEnum; public DocsEnum docsEnum;
public ReaderSlice slice; public ReaderSlice slice;

View File

@ -133,7 +133,8 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfoPerCom
// or wrote; this is normally the same as generation except if // or wrote; this is normally the same as generation except if
// there was an IOException that had interrupted a commit // there was an IOException that had interrupted a commit
public Map<String,String> userData = Collections.<String,String>emptyMap(); // Opaque Map<String, String> that user can specify during IndexWriter.commit /** Opaque Map&lt;String, String&gt; that user can specify during IndexWriter.commit */
public Map<String,String> userData = Collections.<String,String>emptyMap();
private List<SegmentInfoPerCommit> segments = new ArrayList<SegmentInfoPerCommit>(); private List<SegmentInfoPerCommit> segments = new ArrayList<SegmentInfoPerCommit>();

View File

@ -30,11 +30,11 @@ public class SegmentReadState {
public final FieldInfos fieldInfos; public final FieldInfos fieldInfos;
public final IOContext context; public final IOContext context;
// NOTE: if this is < 0, that means "defer terms index /** NOTE: if this is &lt; 0, that means "defer terms index
// load until needed". But if the codec must load the * load until needed". But if the codec must load the
// terms index on init (preflex is the only once currently * terms index on init (preflex is the only once currently
// that must do so), then it should negate this value to * that must do so), then it should negate this value to
// get the app's terms divisor: * get the app's terms divisor */
public int termsIndexDivisor; public int termsIndexDivisor;
public final String segmentSuffix; public final String segmentSuffix;

View File

@ -33,11 +33,11 @@ public class SegmentWriteState {
public final FieldInfos fieldInfos; public final FieldInfos fieldInfos;
public int delCountOnFlush; public int delCountOnFlush;
// Deletes to apply while we are flushing the segment. A /** Deletes to apply while we are flushing the segment. A
// Term is enrolled in here if it was deleted at one * Term is enrolled in here if it was deleted at one
// point, and it's mapped to the docIDUpto, meaning any * point, and it's mapped to the docIDUpto, meaning any
// docID < docIDUpto containing this term should be * docID &lt; docIDUpto containing this term should be
// deleted. * deleted. */
public final BufferedDeletes segDeletes; public final BufferedDeletes segDeletes;
// Lazily created: // Lazily created:

View File

@ -32,6 +32,9 @@ import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
/** /**
* Utility class for merging SortedBytes DocValues
* instances.
*
* @lucene.internal * @lucene.internal
*/ */
public final class SortedBytesMergeUtils { public final class SortedBytesMergeUtils {
@ -54,7 +57,14 @@ public final class SortedBytesMergeUtils {
} }
return new MergeContext(comp, mergeDocCount, size, type); return new MergeContext(comp, mergeDocCount, size, type);
} }
/**
* Encapsulates contextual information about the merge.
* This class holds document id to ordinal mappings, offsets for
* variable length values and the comparator to sort the merged
* bytes.
*
* @lucene.internal
*/
public static final class MergeContext { public static final class MergeContext {
private final Comparator<BytesRef> comp; private final Comparator<BytesRef> comp;
private final BytesRef missingValue = new BytesRef(); private final BytesRef missingValue = new BytesRef();
@ -169,10 +179,36 @@ public final class SortedBytesMergeUtils {
return merger.currentOrd; return merger.currentOrd;
} }
/**
* Implementation of this interface consume the merged bytes with their
* corresponding ordinal and byte offset. The offset is the byte offset in
* target sorted source where the currently merged {@link BytesRef} instance
* should be stored at.
*/
public static interface BytesRefConsumer { public static interface BytesRefConsumer {
/**
* Consumes a single {@link BytesRef}. The provided {@link BytesRef}
* instances are strictly increasing with respect to the used
* {@link Comparator} used for merging
*
* @param ref
* the {@link BytesRef} to consume
* @param ord
* the ordinal of the given {@link BytesRef} in the merge target
* @param offset
* the byte offset of the given {@link BytesRef} in the merge
* target
* @throws IOException
* if an {@link IOException} occurs
*/
public void consume(BytesRef ref, int ord, long offset) throws IOException; public void consume(BytesRef ref, int ord, long offset) throws IOException;
} }
/**
* A simple {@link BytesRefConsumer} that writes the merged {@link BytesRef}
* instances sequentially to an {@link IndexOutput}.
*/
public static final class IndexOutputBytesRefConsumer implements BytesRefConsumer { public static final class IndexOutputBytesRefConsumer implements BytesRefConsumer {
private final IndexOutput datOut; private final IndexOutput datOut;
@ -187,6 +223,14 @@ public final class SortedBytesMergeUtils {
} }
} }
/**
* {@link RecordMerger} merges a list of {@link SortedSourceSlice} lazily by
* consuming the sorted source records one by one and de-duplicates records
* that are shared across slices. The algorithm is based on a lazy priority queue
* that prevents reading merge sources into heap memory.
*
* @lucene.internal
*/
private static final class RecordMerger { private static final class RecordMerger {
private final MergeQueue queue; private final MergeQueue queue;
private final SortedSourceSlice[] top; private final SortedSourceSlice[] top;
@ -231,6 +275,12 @@ public final class SortedBytesMergeUtils {
} }
} }
/**
* {@link SortedSourceSlice} represents a single {@link SortedSource} merge candidate.
* It encapsulates ordinal and pre-calculated target doc id to ordinal mappings.
* This class also holds state private to the merge process.
* @lucene.internal
*/
public static class SortedSourceSlice { public static class SortedSourceSlice {
final SortedSource source; final SortedSource source;
final int readerIdx; final int readerIdx;

View File

@ -45,6 +45,9 @@ import org.apache.lucene.util.packed.PackedInts;
*/ */
public interface FieldCache { public interface FieldCache {
/**
* Placeholder indicating creation of this cache is currently in-progress.
*/
public static final class CreationPlaceholder { public static final class CreationPlaceholder {
Object value; Object value;
} }

View File

@ -194,6 +194,9 @@ public abstract class FieldComparator<T> {
* than the provided value. */ * than the provided value. */
public abstract int compareDocToValue(int doc, T value) throws IOException; public abstract int compareDocToValue(int doc, T value) throws IOException;
/**
* Base FieldComparator class for numeric types
*/
public static abstract class NumericComparator<T extends Number> extends FieldComparator<T> { public static abstract class NumericComparator<T extends Number> extends FieldComparator<T> {
protected final T missingValue; protected final T missingValue;
protected final String field; protected final String field;

View File

@ -33,6 +33,10 @@ import org.apache.lucene.util.PriorityQueue;
*/ */
public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> extends PriorityQueue<T> { public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> extends PriorityQueue<T> {
/**
* Extension of ScoreDoc to also store the
* {@link FieldComparator} slot.
*/
public static class Entry extends ScoreDoc { public static class Entry extends ScoreDoc {
public int slot; public int slot;

View File

@ -398,12 +398,17 @@ public class FuzzyTermsEnum extends TermsEnum {
return scale_factor; return scale_factor;
} }
/** @lucene.internal */ /**
* reuses compiled automata across different segments,
* because they are independent of the index
* @lucene.internal */
public static interface LevenshteinAutomataAttribute extends Attribute { public static interface LevenshteinAutomataAttribute extends Attribute {
public List<CompiledAutomaton> automata(); public List<CompiledAutomaton> automata();
} }
/** @lucene.internal */ /**
* Stores compiled automata as a list (indexed by edit distance)
* @lucene.internal */
public static final class LevenshteinAutomataAttributeImpl extends AttributeImpl implements LevenshteinAutomataAttribute { public static final class LevenshteinAutomataAttributeImpl extends AttributeImpl implements LevenshteinAutomataAttribute {
private final List<CompiledAutomaton> automata = new ArrayList<CompiledAutomaton>(); private final List<CompiledAutomaton> automata = new ArrayList<CompiledAutomaton>();

View File

@ -82,7 +82,7 @@ public class IndexSearcher {
// in the next release // in the next release
protected final IndexReaderContext readerContext; protected final IndexReaderContext readerContext;
protected final List<AtomicReaderContext> leafContexts; protected final List<AtomicReaderContext> leafContexts;
// used with executor - each slice holds a set of leafs executed within one thread /** used with executor - each slice holds a set of leafs executed within one thread */
protected final LeafSlice[] leafSlices; protected final LeafSlice[] leafSlices;
// These are only used for multi-threaded search // These are only used for multi-threaded search

View File

@ -32,7 +32,11 @@ import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
/** @lucene.internal Only public to be accessible by spans package. */ /**
* Base rewrite method that translates each term into a query, and keeps
* the scores as computed by the query.
* <p>
* @lucene.internal Only public to be accessible by spans package. */
public abstract class ScoringRewrite<Q extends Query> extends TermCollectingRewrite<Q> { public abstract class ScoringRewrite<Q extends Query> extends TermCollectingRewrite<Q> {
/** A rewrite method that first translates each term into /** A rewrite method that first translates each term into

View File

@ -38,6 +38,9 @@ import org.apache.lucene.util.StringHelper;
*/ */
public class SortField { public class SortField {
/**
* Specifies the type of the terms to be sorted, or special types such as CUSTOM
*/
public static enum Type { public static enum Type {
/** Sort by document score (relevance). Sort values are Float and higher /** Sort by document score (relevance). Sort values are Float and higher

View File

@ -225,6 +225,8 @@ public class TimeLimitingCollector extends Collector {
} }
/** /**
* Thread used to timeout search requests.
* Can be stopped completely with {@link TimerThread#stopTimer()}
* @lucene.experimental * @lucene.experimental
*/ */
public static final class TimerThread extends Thread { public static final class TimerThread extends Thread {

View File

@ -33,8 +33,8 @@ import org.apache.lucene.util.PriorityQueue;
*/ */
public abstract class TopDocsCollector<T extends ScoreDoc> extends Collector { public abstract class TopDocsCollector<T extends ScoreDoc> extends Collector {
// This is used in case topDocs() is called with illegal parameters, or there /** This is used in case topDocs() is called with illegal parameters, or there
// simply aren't (enough) results. * simply aren't (enough) results. */
protected static final TopDocs EMPTY_TOPDOCS = new TopDocs(0, new ScoreDoc[0], Float.NaN); protected static final TopDocs EMPTY_TOPDOCS = new TopDocs(0, new ScoreDoc[0], Float.NaN);
/** /**

View File

@ -436,6 +436,9 @@ public abstract class FSDirectory extends Directory {
return chunkSize; return chunkSize;
} }
/**
* Writes output with {@link RandomAccessFile#write(byte[], int, int)}
*/
protected static class FSIndexOutput extends BufferedIndexOutput { protected static class FSIndexOutput extends BufferedIndexOutput {
private final FSDirectory parent; private final FSDirectory parent;
private final String name; private final String name;

View File

@ -106,6 +106,9 @@ public class NIOFSDirectory extends FSDirectory {
}; };
} }
/**
* Reads bytes with {@link FileChannel#read(ByteBuffer, long)}
*/
protected static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput { protected static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput {
private ByteBuffer byteBuf; // wraps the buffer for NIO private ByteBuffer byteBuf; // wraps the buffer for NIO

View File

@ -19,7 +19,9 @@ package org.apache.lucene.store;
import java.util.ArrayList; import java.util.ArrayList;
/** @lucene.internal */ /**
* Represents a file in RAM as a list of byte[] buffers.
* @lucene.internal */
public class RAMFile { public class RAMFile {
protected ArrayList<byte[]> buffers = new ArrayList<byte[]>(); protected ArrayList<byte[]> buffers = new ArrayList<byte[]>();
long length; long length;

View File

@ -85,8 +85,16 @@ public class SimpleFSDirectory extends FSDirectory {
}; };
} }
/**
* Reads bytes with {@link RandomAccessFile#seek(long)} followed by
* {@link RandomAccessFile#read(byte[], int, int)}.
*/
protected static class SimpleFSIndexInput extends BufferedIndexInput { protected static class SimpleFSIndexInput extends BufferedIndexInput {
/**
* Extension of RandomAccessFile that tracks if the file is
* open.
*/
protected static class Descriptor extends RandomAccessFile { protected static class Descriptor extends RandomAccessFile {
// remember if the file is open, so that we don't try to close it // remember if the file is open, so that we don't try to close it
// more than once // more than once

View File

@ -117,10 +117,13 @@ public final class ByteBlockPool {
public byte[][] buffers = new byte[10][]; public byte[][] buffers = new byte[10][];
int bufferUpto = -1; // Which buffer we are upto int bufferUpto = -1; // Which buffer we are upto
public int byteUpto = BYTE_BLOCK_SIZE; // Where we are in head buffer /** Where we are in head buffer */
public int byteUpto = BYTE_BLOCK_SIZE;
public byte[] buffer; // Current head buffer /** Current head buffer */
public int byteOffset = -BYTE_BLOCK_SIZE; // Current head offset public byte[] buffer;
/** Current head offset */
public int byteOffset = -BYTE_BLOCK_SIZE;
private final Allocator allocator; private final Allocator allocator;

View File

@ -48,6 +48,11 @@ public class FuzzySet {
public static final int FUZZY_SERIALIZATION_VERSION=1; public static final int FUZZY_SERIALIZATION_VERSION=1;
/**
* Result from {@link FuzzySet#contains(BytesRef)}:
* can never return definitively YES (always MAYBE),
* but can sometimes definitely return NO.
*/
public enum ContainsResult { public enum ContainsResult {
MAYBE, NO MAYBE, NO
}; };

View File

@ -158,7 +158,7 @@ public final class FST<T> {
private final boolean packed; private final boolean packed;
private PackedInts.Reader nodeRefToAddress; private PackedInts.Reader nodeRefToAddress;
// If arc has this label then that arc is final/accepted /** If arc has this label then that arc is final/accepted */
public static final int END_LABEL = -1; public static final int END_LABEL = -1;
private boolean allowArrayArcs = true; private boolean allowArrayArcs = true;
@ -174,7 +174,7 @@ public final class FST<T> {
// building an FST w/ willPackFST=true: // building an FST w/ willPackFST=true:
int node; int node;
// To node (ord or address): /** To node (ord or address) */
public int target; public int target;
byte flags; byte flags;
@ -542,8 +542,8 @@ public final class FST<T> {
return v; return v;
} }
// returns true if the node at this address has any /** returns true if the node at this address has any
// outgoing arcs * outgoing arcs */
public static<T> boolean targetHasArcs(Arc<T> arc) { public static<T> boolean targetHasArcs(Arc<T> arc) {
return arc.target > 0; return arc.target > 0;
} }

View File

@ -767,6 +767,19 @@ public final class Util {
} }
} }
/** Just maps each UTF16 unit (char) to the ints in an
* IntsRef. */
public static IntsRef toUTF16(CharSequence s, IntsRef scratch) {
final int charLimit = s.length();
scratch.offset = 0;
scratch.length = charLimit;
scratch.grow(charLimit);
for (int idx = 0; idx < charLimit; idx++) {
scratch.ints[idx] = (int) s.charAt(idx);
}
return scratch;
}
/** Decodes the Unicode codepoints from the provided /** Decodes the Unicode codepoints from the provided
* CharSequence and places them in the provided scratch * CharSequence and places them in the provided scratch
* IntsRef, which must not be null, returning it. */ * IntsRef, which must not be null, returning it. */

View File

@ -734,7 +734,7 @@ public class PackedInts {
} }
return new Packed64(in, valueCount, bitsPerValue); return new Packed64(in, valueCount, bitsPerValue);
default: default:
throw new AssertionError("Unknwown Writer format: " + format); throw new AssertionError("Unknown Writer format: " + format);
} }
} }

View File

@ -20,12 +20,13 @@ package org.apache.lucene.search;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.Norm; import org.apache.lucene.index.Norm;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
@ -36,7 +37,7 @@ import org.apache.lucene.util.LuceneTestCase;
public class TestSimilarityProvider extends LuceneTestCase { public class TestSimilarityProvider extends LuceneTestCase {
private Directory directory; private Directory directory;
private IndexReader reader; private DirectoryReader reader;
private IndexSearcher searcher; private IndexSearcher searcher;
@Override @Override
@ -75,8 +76,9 @@ public class TestSimilarityProvider extends LuceneTestCase {
public void testBasics() throws Exception { public void testBasics() throws Exception {
// sanity check of norms writer // sanity check of norms writer
// TODO: generalize // TODO: generalize
byte fooNorms[] = (byte[]) MultiDocValues.getNormDocValues(reader, "foo").getSource().getArray(); AtomicReader slow = new SlowCompositeReaderWrapper(reader);
byte barNorms[] = (byte[]) MultiDocValues.getNormDocValues(reader, "bar").getSource().getArray(); byte fooNorms[] = (byte[]) slow.normValues("foo").getSource().getArray();
byte barNorms[] = (byte[]) slow.normValues("bar").getSource().getArray();
for (int i = 0; i < fooNorms.length; i++) { for (int i = 0; i < fooNorms.length; i++) {
assertFalse(fooNorms[i] == barNorms[i]); assertFalse(fooNorms[i] == barNorms[i]);
} }

View File

@ -0,0 +1,49 @@
package org.apache.lucene.util.junitcompat;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import org.apache.lucene.util._TestUtil;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.JUnitCore;
import org.junit.runner.Result;
public class TestLeaveFilesIfTestFails extends WithNestedTests {
public TestLeaveFilesIfTestFails() {
super(true);
}
public static class Nested1 extends WithNestedTests.AbstractNestedTest {
static File file;
public void testDummy() {
file = _TestUtil.getTempDir("leftover");
file.mkdirs();
fail();
}
}
@Test
public void testLeaveFilesIfTestFails() {
Result r = JUnitCore.runClasses(Nested1.class);
Assert.assertEquals(1, r.getFailureCount());
Assert.assertTrue(Nested1.file.exists());
Nested1.file.delete();
}
}

View File

@ -44,6 +44,10 @@ public abstract class AbstractDistinctValuesCollector<GC extends AbstractDistinc
public void setScorer(Scorer scorer) throws IOException { public void setScorer(Scorer scorer) throws IOException {
} }
/**
* Returned by {@link AbstractDistinctValuesCollector#getGroups()},
* representing the value and set of distinct values for the group.
*/
public abstract static class GroupCount<GROUP_VALUE_TYPE> { public abstract static class GroupCount<GROUP_VALUE_TYPE> {
public final GROUP_VALUE_TYPE groupValue; public final GROUP_VALUE_TYPE groupValue;

View File

@ -17,7 +17,12 @@
package org.apache.lucene.search.grouping; package org.apache.lucene.search.grouping;
/** @lucene.internal */ import org.apache.lucene.search.FieldComparator; // javadocs
/**
* Expert: representation of a group in {@link AbstractFirstPassGroupingCollector},
* tracking the top doc and {@link FieldComparator} slot.
* @lucene.internal */
public class CollectedSearchGroup<T> extends SearchGroup<T> { public class CollectedSearchGroup<T> extends SearchGroup<T> {
int topDoc; int topDoc;
int comparatorSlot; int comparatorSlot;

View File

@ -90,6 +90,28 @@
</sequential> </sequential>
</macrodef> </macrodef>
<property name="test-framework.jar" value="${common.dir}/build/test-framework/lucene-test-framework-${version}.jar"/>
<target name="check-test-framework-uptodate" unless="test-framework.uptodate">
<module-uptodate name="test-framework" jarfile="${test-framework.jar}" property="test-framework.uptodate"/>
</target>
<target name="jar-test-framework" unless="test-framework.uptodate" depends="check-test-framework-uptodate">
<ant dir="${common.dir}/test-framework" target="jar-core" inheritall="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
<property name="test-framework.uptodate" value="true"/>
</target>
<property name="test-framework-javadoc.jar" value="${common.dir}/build/test-framework/lucene-test-framework-${version}-javadoc.jar"/>
<target name="check-test-framework-javadocs-uptodate" unless="test-framework-javadocs.uptodate">
<module-uptodate name="test-framework" jarfile="${test-framework-javadoc.jar}" property="test-framework-javadocs.uptodate"/>
</target>
<target name="javadocs-test-framework" unless="test-framework-javadocs.uptodate" depends="check-test-framework-javadocs-uptodate">
<ant dir="${common.dir}/test-framework" target="javadocs" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
<property name="test-framework-javadocs.uptodate" value="true"/>
</target>
<property name="queryparser.jar" value="${common.dir}/build/queryparser/lucene-queryparser-${version}.jar"/> <property name="queryparser.jar" value="${common.dir}/build/queryparser/lucene-queryparser-${version}.jar"/>
<target name="check-queryparser-uptodate" unless="queryparser.uptodate"> <target name="check-queryparser-uptodate" unless="queryparser.uptodate">
<module-uptodate name="queryparser" jarfile="${queryparser.jar}" property="queryparser.uptodate"/> <module-uptodate name="queryparser" jarfile="${queryparser.jar}" property="queryparser.uptodate"/>

View File

@ -24,6 +24,9 @@ import org.apache.lucene.queries.function.docvalues.DoubleDocValues;
import java.io.IOException; import java.io.IOException;
import java.util.Map; import java.util.Map;
/**
* Function that returns a constant double value for every document.
*/
public class DoubleConstValueSource extends ConstNumberSource { public class DoubleConstValueSource extends ConstNumberSource {
final double constant; final double constant;
private final float fv; private final float fv;

View File

@ -28,7 +28,13 @@ import org.apache.lucene.util.BytesRef;
import java.io.IOException; import java.io.IOException;
import java.util.Map; import java.util.Map;
/** @lucene.internal */ /**
* Function that returns {@link TFIDFSimilarity #idf(long, long)}
* for every document.
* <p>
* Note that the configured Similarity for the field must be
* a subclass of {@link TFIDFSimilarity}
* @lucene.internal */
public class IDFValueSource extends DocFreqValueSource { public class IDFValueSource extends DocFreqValueSource {
public IDFValueSource(String field, String val, String indexedField, BytesRef indexedBytes) { public IDFValueSource(String field, String val, String indexedField, BytesRef indexedBytes) {
super(field, val, indexedField, indexedBytes); super(field, val, indexedField, indexedBytes);

View File

@ -30,6 +30,10 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Depending on the boolean value of the <code>ifSource</code> function,
* returns the value of the <code>trueSource</code> or <code>falseSource</code> function.
*/
public class IfFunction extends BoolFunction { public class IfFunction extends BoolFunction {
private final ValueSource ifSource; private final ValueSource ifSource;
private final ValueSource trueSource; private final ValueSource trueSource;

View File

@ -17,6 +17,7 @@
package org.apache.lucene.queries.function.valuesource; package org.apache.lucene.queries.function.valuesource;
import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader; // javadocs
import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
@ -24,6 +25,11 @@ import org.apache.lucene.search.IndexSearcher;
import java.io.IOException; import java.io.IOException;
import java.util.Map; import java.util.Map;
/**
* Returns the value of {@link IndexReader#maxDoc()}
* for every document. This is the number of documents
* including deletions.
*/
public class MaxDocValueSource extends ValueSource { public class MaxDocValueSource extends ValueSource {
public String name() { public String name() {
return "maxdoc"; return "maxdoc";

View File

@ -28,6 +28,13 @@ import org.apache.lucene.search.similarities.TFIDFSimilarity;
import java.io.IOException; import java.io.IOException;
import java.util.Map; import java.util.Map;
/**
* Function that returns {@link TFIDFSimilarity#decodeNormValue(byte)}
* for every document.
* <p>
* Note that the configured Similarity for the field must be
* a subclass of {@link TFIDFSimilarity}
* @lucene.internal */
public class NormValueSource extends ValueSource { public class NormValueSource extends ValueSource {
protected final String field; protected final String field;
public NormValueSource(String field) { public NormValueSource(String field) {

View File

@ -30,7 +30,10 @@ import java.io.IOException;
import java.util.Map; import java.util.Map;
/** /**
* <code>TotalTermFreqValueSource</code> returns the total term freq (sum of term freqs across all docuyments). * <code>SumTotalTermFreqValueSource</code> returns the number of tokens.
* (sum of term freqs across all documents, across all terms).
* Returns -1 if frequencies were omitted for the field, or if
* the codec doesn't support this statistic.
* @lucene.internal * @lucene.internal
*/ */
public class SumTotalTermFreqValueSource extends ValueSource { public class SumTotalTermFreqValueSource extends ValueSource {

View File

@ -28,6 +28,13 @@ import org.apache.lucene.util.BytesRef;
import java.io.IOException; import java.io.IOException;
import java.util.Map; import java.util.Map;
/**
* Function that returns {@link TFIDFSimilarity#tf(int)}
* for every document.
* <p>
* Note that the configured Similarity for the field must be
* a subclass of {@link TFIDFSimilarity}
* @lucene.internal */
public class TFValueSource extends TermFreqValueSource { public class TFValueSource extends TermFreqValueSource {
public TFValueSource(String field, String val, String indexedField, BytesRef indexedBytes) { public TFValueSource(String field, String val, String indexedField, BytesRef indexedBytes) {
super(field, val, indexedField, indexedBytes); super(field, val, indexedField, indexedBytes);

View File

@ -26,6 +26,13 @@ import org.apache.lucene.util.BytesRef;
import java.io.IOException; import java.io.IOException;
import java.util.Map; import java.util.Map;
/**
* Function that returns {@link DocsEnum#freq()} for the
* supplied term in every document.
* <p>
* If the term does not exist in the document, returns 0.
* If frequencies are omitted, returns 1.
*/
public class TermFreqValueSource extends DocFreqValueSource { public class TermFreqValueSource extends DocFreqValueSource {
public TermFreqValueSource(String field, String val, String indexedField, BytesRef indexedBytes) { public TermFreqValueSource(String field, String val, String indexedField, BytesRef indexedBytes) {
super(field, val, indexedField, indexedBytes); super(field, val, indexedField, indexedBytes);

View File

@ -28,7 +28,10 @@ import java.io.IOException;
import java.util.Map; import java.util.Map;
/** /**
* <code>TotalTermFreqValueSource</code> returns the total term freq (sum of term freqs across all docuyments). * <code>TotalTermFreqValueSource</code> returns the total term freq
* (sum of term freqs across all documents).
* Returns -1 if frequencies were omitted for the field, or if
* the codec doesn't support this statistic.
* @lucene.internal * @lucene.internal
*/ */
public class TotalTermFreqValueSource extends ValueSource { public class TotalTermFreqValueSource extends ValueSource {

View File

@ -29,7 +29,9 @@ import org.apache.lucene.util._TestUtil;
// a MockRemovesTokensTF, ideally subclassing FilteringTF // a MockRemovesTokensTF, ideally subclassing FilteringTF
// (in modules/analysis) // (in modules/analysis)
// Randomly injects holes: /**
* Randomly injects holes (similar to what a stopfilter would do)
*/
public final class MockHoleInjectingTokenFilter extends TokenFilter { public final class MockHoleInjectingTokenFilter extends TokenFilter {
private final long randomSeed; private final long randomSeed;

View File

@ -27,8 +27,9 @@ import java.io.Reader;
/** /**
* * Wraps a whitespace tokenizer with a filter that sets
* * the first token, and odd tokens to posinc=1, and all others
* to 0, encoding the position as pos: XXX in the payload.
**/ **/
public final class MockPayloadAnalyzer extends Analyzer { public final class MockPayloadAnalyzer extends Analyzer {

View File

@ -30,6 +30,7 @@ import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.PostingsWriterBase; import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.TermsIndexReaderBase; import org.apache.lucene.codecs.TermsIndexReaderBase;
import org.apache.lucene.codecs.TermsIndexWriterBase; import org.apache.lucene.codecs.TermsIndexWriterBase;
import org.apache.lucene.codecs.lucene40.Lucene40Codec; // javadocs
import org.apache.lucene.codecs.lucene40.Lucene40PostingsReader; import org.apache.lucene.codecs.lucene40.Lucene40PostingsReader;
import org.apache.lucene.codecs.lucene40.Lucene40PostingsWriter; import org.apache.lucene.codecs.lucene40.Lucene40PostingsWriter;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
@ -39,6 +40,10 @@ import org.apache.lucene.util.BytesRef;
// TODO: we could make separate base class that can wrapp // TODO: we could make separate base class that can wrapp
// any PostingsBaseFormat and make it ord-able... // any PostingsBaseFormat and make it ord-able...
/**
* Customized version of {@link Lucene40Codec} that uses
* {@link FixedGapTermsIndexWriter}.
*/
public class Lucene40WithOrds extends PostingsFormat { public class Lucene40WithOrds extends PostingsFormat {
public Lucene40WithOrds() { public Lucene40WithOrds() {

View File

@ -72,6 +72,9 @@ public class MockFixedIntBlockPostingsFormat extends PostingsFormat {
return new MockIntFactory(blockSize); return new MockIntFactory(blockSize);
} }
/**
* Encodes blocks as vInts of a fixed block size.
*/
public static class MockIntFactory extends IntStreamFactory { public static class MockIntFactory extends IntStreamFactory {
private final int blockSize; private final int blockSize;

View File

@ -70,6 +70,10 @@ public class MockVariableIntBlockPostingsFormat extends PostingsFormat {
return getName() + "(baseBlockSize="+ baseBlockSize + ")"; return getName() + "(baseBlockSize="+ baseBlockSize + ")";
} }
/**
* If the first value is <= 3, writes baseBlockSize vInts at once,
* otherwise writes 2*baseBlockSize vInts.
*/
public static class MockIntFactory extends IntStreamFactory { public static class MockIntFactory extends IntStreamFactory {
private final int baseBlockSize; private final int baseBlockSize;

View File

@ -25,7 +25,10 @@ import org.apache.lucene.codecs.sep.IntStreamFactory;
import java.io.IOException; import java.io.IOException;
/** @lucene.experimental */ /**
* Encodes ints directly as vInts with {@link MockSingleIntIndexOutput}
* @lucene.experimental
*/
public class MockSingleIntFactory extends IntStreamFactory { public class MockSingleIntFactory extends IntStreamFactory {
@Override @Override
public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException { public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException {

View File

@ -28,7 +28,7 @@ import org.apache.lucene.store.IndexInput;
/** Reads IndexInputs written with {@link /** Reads IndexInputs written with {@link
* MockSingleIntIndexOutput}. NOTE: this class is just for * MockSingleIntIndexOutput}. NOTE: this class is just for
* demonstration puprposes (it is a very slow way to read a * demonstration purposes (it is a very slow way to read a
* block of ints). * block of ints).
* *
* @lucene.experimental * @lucene.experimental
@ -54,6 +54,9 @@ public class MockSingleIntIndexInput extends IntIndexInput {
in.close(); in.close();
} }
/**
* Just reads a vInt directly from the file.
*/
public static class Reader extends IntIndexInput.Reader { public static class Reader extends IntIndexInput.Reader {
// clone: // clone:
private final IndexInput in; private final IndexInput in;

View File

@ -68,7 +68,7 @@ public class AlcoholicMergePolicy extends LogMergePolicy {
return info.sizeInBytes(); return info.sizeInBytes();
} }
public static enum Drink { private static enum Drink {
Beer(15), Wine(17), Champagne(21), WhiteRussian(22), SingleMalt(30); Beer(15), Wine(17), Champagne(21), WhiteRussian(22), SingleMalt(30);
@ -77,11 +77,6 @@ public class AlcoholicMergePolicy extends LogMergePolicy {
Drink(long drunkFactor) { Drink(long drunkFactor) {
this.drunkFactor = drunkFactor; this.drunkFactor = drunkFactor;
} }
public long drunk() {
return drunkFactor;
}
} }
} }

View File

@ -23,6 +23,10 @@ import java.util.Iterator;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
import java.util.Set; import java.util.Set;
/**
* A {@link FilterAtomicReader} that exposes only a subset
* of fields from the underlying wrapped reader.
*/
public final class FieldFilterAtomicReader extends FilterAtomicReader { public final class FieldFilterAtomicReader extends FilterAtomicReader {
private final Set<String> fields; private final Set<String> fields;

View File

@ -42,9 +42,15 @@ import org.apache.lucene.util._TestUtil;
// - doc blocks? so we can test joins/grouping... // - doc blocks? so we can test joins/grouping...
// - controlled consistency (NRTMgr) // - controlled consistency (NRTMgr)
/**
* Base test class for simulating distributed search across multiple shards.
*/
public abstract class ShardSearchingTestBase extends LuceneTestCase { public abstract class ShardSearchingTestBase extends LuceneTestCase {
// TODO: maybe SLM should throw this instead of returning null... // TODO: maybe SLM should throw this instead of returning null...
/**
* Thrown when the lease for a searcher has expired.
*/
public static class SearcherExpiredException extends RuntimeException { public static class SearcherExpiredException extends RuntimeException {
public SearcherExpiredException(String message) { public SearcherExpiredException(String message) {
super(message); super(message);
@ -604,6 +610,9 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
} }
} }
/**
* An IndexSearcher and associated version (lease)
*/
protected static class SearcherAndVersion { protected static class SearcherAndVersion {
public final IndexSearcher searcher; public final IndexSearcher searcher;
public final long version; public final long version;

View File

@ -146,6 +146,12 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
preventDoubleWrite = value; preventDoubleWrite = value;
} }
/**
* Enum for controlling hard disk throttling.
* Set via {@link MockDirectoryWrapper #setThrottling(Throttling)}
* <p>
* WARNING: can make tests very slow.
*/
public static enum Throttling { public static enum Throttling {
/** always emulate a slow hard disk. could be very slow! */ /** always emulate a slow hard disk. could be very slow! */
ALWAYS, ALWAYS,

View File

@ -24,13 +24,17 @@ import java.io.*;
*/ */
final class CloseableFile implements Closeable { final class CloseableFile implements Closeable {
private final File file; private final File file;
private final TestRuleMarkFailure failureMarker;
public CloseableFile(File file) { public CloseableFile(File file, TestRuleMarkFailure failureMarker) {
this.file = file; this.file = file;
this.failureMarker = failureMarker;
} }
@Override @Override
public void close() throws IOException { public void close() throws IOException {
// only if there were no other test failures.
if (failureMarker.wasSuccessful()) {
if (file.exists()) { if (file.exists()) {
try { try {
_TestUtil.rmDir(file); _TestUtil.rmDir(file);
@ -46,3 +50,4 @@ final class CloseableFile implements Closeable {
} }
} }
} }
}

View File

@ -18,6 +18,7 @@ package org.apache.lucene.util;
*/ */
/** /**
* Converts numbers to english strings for testing.
* @lucene.internal * @lucene.internal
*/ */
public final class English { public final class English {

View File

@ -26,6 +26,9 @@ package org.apache.lucene.util;
* @lucene.internal */ * @lucene.internal */
public abstract class RollingBuffer<T extends RollingBuffer.Resettable> { public abstract class RollingBuffer<T extends RollingBuffer.Resettable> {
/**
* Implement to reset an instance
*/
public static interface Resettable { public static interface Resettable {
public void reset(); public void reset();
} }

View File

@ -1,10 +1,5 @@
package org.apache.lucene.util; package org.apache.lucene.util;
import org.apache.lucene.search.FieldCache;
import org.junit.rules.TestRule;
import org.junit.runner.Description;
import org.junit.runners.model.Statement;
/* /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -22,6 +17,30 @@ import org.junit.runners.model.Statement;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.FieldCacheSanityChecker; // javadocs
import org.junit.rules.TestRule;
import org.junit.runner.Description;
import org.junit.runners.model.Statement;
/**
* This rule will fail the test if it has insane field caches.
* <p>
* calling assertSaneFieldCaches here isn't as useful as having test
* classes call it directly from the scope where the index readers
* are used, because they could be gc'ed just before this tearDown
* method is called.
* <p>
* But it's better then nothing.
* <p>
* If you are testing functionality that you know for a fact
* "violates" FieldCache sanity, then you should either explicitly
* call purgeFieldCache at the end of your test method, or refactor
* your Test class so that the inconsistent FieldCache usages are
* isolated in distinct test methods
*
* @see FieldCacheSanityChecker
*/
public class TestRuleFieldCacheSanity implements TestRule { public class TestRuleFieldCacheSanity implements TestRule {
@Override @Override
@ -33,18 +52,6 @@ public class TestRuleFieldCacheSanity implements TestRule {
Throwable problem = null; Throwable problem = null;
try { try {
// calling assertSaneFieldCaches here isn't as useful as having test
// classes call it directly from the scope where the index readers
// are used, because they could be gc'ed just before this tearDown
// method is called.
//
// But it's better then nothing.
//
// If you are testing functionality that you know for a fact
// "violates" FieldCache sanity, then you should either explicitly
// call purgeFieldCache at the end of your test method, or refactor
// your Test class so that the inconsistent FieldCache usages are
// isolated in distinct test methods
LuceneTestCase.assertSaneFieldCaches(d.getDisplayName()); LuceneTestCase.assertSaneFieldCaches(d.getDisplayName());
} catch (Throwable t) { } catch (Throwable t) {
problem = t; problem = t;

View File

@ -21,6 +21,10 @@ import org.junit.rules.TestRule;
import org.junit.runner.Description; import org.junit.runner.Description;
import org.junit.runners.model.Statement; import org.junit.runners.model.Statement;
/**
* Stores the suite name so you can retrieve it
* from {@link #getTestClass()}
*/
public class TestRuleStoreClassName implements TestRule { public class TestRuleStoreClassName implements TestRule {
private volatile Description description; private volatile Description description;

View File

@ -94,7 +94,7 @@ public class _TestUtil {
try { try {
File f = createTempFile(desc, "tmp", LuceneTestCase.TEMP_DIR); File f = createTempFile(desc, "tmp", LuceneTestCase.TEMP_DIR);
f.delete(); f.delete();
LuceneTestCase.closeAfterSuite(new CloseableFile(f)); LuceneTestCase.closeAfterSuite(new CloseableFile(f, LuceneTestCase.suiteFailureMarker));
return f; return f;
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
@ -136,7 +136,7 @@ public class _TestUtil {
rmDir(destDir); rmDir(destDir);
destDir.mkdir(); destDir.mkdir();
LuceneTestCase.closeAfterSuite(new CloseableFile(destDir)); LuceneTestCase.closeAfterSuite(new CloseableFile(destDir, LuceneTestCase.suiteFailureMarker));
while (entries.hasMoreElements()) { while (entries.hasMoreElements()) {
ZipEntry entry = entries.nextElement(); ZipEntry entry = entries.nextElement();

View File

@ -127,6 +127,13 @@ public class AutomatonTestUtil {
return code; return code;
} }
/**
* Lets you retrieve random strings accepted
* by an Automaton.
* <p>
* Once created, call {@link #getRandomAcceptedString(Random)}
* to get a new string (in UTF-32 codepoints).
*/
public static class RandomAcceptedStrings { public static class RandomAcceptedStrings {
private final Map<Transition,Boolean> leadsToAccept; private final Map<Transition,Boolean> leadsToAccept;

View File

@ -26,6 +26,14 @@ $Id$
================== 4.0.0 ================== ================== 4.0.0 ==================
Versions of Major Components
---------------------
Apache Tika 1.2
Carrot2 3.5.0
Velocity 1.6.4 and Velocity Tools 2.0
Apache UIMA 2.3.1
Apache ZooKeeper 3.3.6
Upgrading from Solr 4.0.0-BETA Upgrading from Solr 4.0.0-BETA
---------------------- ----------------------
@ -63,6 +71,27 @@ Bug Fixes
* SOLR-3649: Fixed bug in JavabinLoader that caused deleteById(List<String> ids) * SOLR-3649: Fixed bug in JavabinLoader that caused deleteById(List<String> ids)
to not work in SolrJ (siren) to not work in SolrJ (siren)
* SOLR-3730: Rollback is not implemented quite right and can cause corner case fails in
SolrCloud tests. (rmuir, Mark Miller)
* SOLR-2981: Fixed StatsComponent to no longer return duplicated information
when requesting multiple stats.facet fields.
(Roman Kliewer via hossman)
Other Changes
----------------------
* SOLR-3690: Fixed binary release packages to include dependencie needed for
the solr-test-framework (hossman)
* SOLR-2857: The /update/json and /update/csv URLs were restored to aid
in the migration of existing clients. (yonik)
* SOLR-3691: SimplePostTool: Mode for crawling/posting web pages
See http://wiki.apache.org/solr/ExtractingRequestHandler for examples (janhoy)
* SOLR-3707: Upgrade Solr to Tika 1.2 (janhoy)
================== 4.0.0-BETA =================== ================== 4.0.0-BETA ===================
@ -271,7 +300,6 @@ Other Changes
Also, the configuration itself can be passed using the "dataConfig" parameter rather than Also, the configuration itself can be passed using the "dataConfig" parameter rather than
using a file (this previously worked in debug mode only). When configuration errors are using a file (this previously worked in debug mode only). When configuration errors are
encountered, the error message is returned in XML format. (James Dyer) encountered, the error message is returned in XML format. (James Dyer)
* SOLR-3439: Make SolrCell easier to use out of the box. Also improves "/browse" to display * SOLR-3439: Make SolrCell easier to use out of the box. Also improves "/browse" to display
rich-text documents correctly, along with facets for author and content_type. rich-text documents correctly, along with facets for author and content_type.
With the new "content" field, highlighting of body is supported. See also SOLR-3672 for With the new "content" field, highlighting of body is supported. See also SOLR-3672 for

View File

@ -310,12 +310,11 @@ Copyright 2004 Sun Microsystems, Inc. (Rome JAR)
Copyright 2002-2008 by John Cowan (TagSoup -- http://ccil.org/~cowan/XML/tagsoup/) Copyright 2002-2008 by John Cowan (TagSoup -- http://ccil.org/~cowan/XML/tagsoup/)
Copyright (C) 1999-2007 Shigeru Chiba. All Rights Reserved.
(Javassist, MPL licensed: http://www.csg.ci.i.u-tokyo.ac.jp/~chiba/javassist/)
Copyright (C) 1994-2007 by the Xiph.org Foundation, http://www.xiph.org/ (OggVorbis) Copyright (C) 1994-2007 by the Xiph.org Foundation, http://www.xiph.org/ (OggVorbis)
Scannotation (C) Bill Burke Copyright 2012 Kohei Taketa juniversalchardet (http://code.google.com/p/juniversalchardet/)
Lasse Collin and others, XZ for Java (http://tukaani.org/xz/java.html)
========================================================================= =========================================================================
== Language Detection Notices == == Language Detection Notices ==

View File

@ -386,8 +386,9 @@
<tarfileset dir="." <tarfileset dir="."
prefix="${fullnamever}" prefix="${fullnamever}"
includes="LICENSE.txt NOTICE.txt CHANGES.txt README.txt example/** includes="LICENSE.txt NOTICE.txt CHANGES.txt README.txt example/**
client/README.txt client/ruby/solr-ruby/** contrib/**/lib/** client/README.txt client/ruby/solr-ruby/**
contrib/**/README.txt licenses/**" contrib/**/lib/** contrib/**/README.txt
licenses/**"
excludes="lib/README.committers.txt **/data/ **/logs/* excludes="lib/README.committers.txt **/data/ **/logs/*
**/classes/ **/*.sh **/ivy.xml **/build.xml **/classes/ **/*.sh **/ivy.xml **/build.xml
**/bin/ **/*.iml **/*.ipr **/*.iws **/pom.xml **/bin/ **/*.iml **/*.ipr **/*.iws **/pom.xml
@ -401,7 +402,9 @@
includes="example/**/*.sh example/**/bin/" /> includes="example/**/*.sh example/**/bin/" />
<tarfileset dir="." <tarfileset dir="."
prefix="${fullnamever}" prefix="${fullnamever}"
includes="dist/*.jar dist/*.war dist/solrj-lib/*" includes="dist/*.jar dist/*.war
dist/solrj-lib/*
dist/test-framework/**"
excludes="**/*.tgz **/*.zip **/*.md5 **/*src*.jar **/*docs*.jar **/*.sha1" /> excludes="**/*.tgz **/*.zip **/*.md5 **/*src*.jar **/*docs*.jar **/*.sha1" />
<tarfileset dir="${dest}/docs" <tarfileset dir="${dest}/docs"
prefix="${fullnamever}/docs" /> prefix="${fullnamever}/docs" />

View File

@ -193,7 +193,7 @@
<property name="lucenedocs" location="${common.dir}/build/docs"/> <property name="lucenedocs" location="${common.dir}/build/docs"/>
<!-- dependency to ensure all lucene javadocs are present --> <!-- dependency to ensure all lucene javadocs are present -->
<target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-analyzers-uima,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial"/> <target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-analyzers-uima,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial,javadocs-test-framework"/>
<!-- create javadocs for the current module --> <!-- create javadocs for the current module -->
<target name="javadocs" depends="compile-core,define-lucene-javadoc-url,lucene-javadocs"> <target name="javadocs" depends="compile-core,define-lucene-javadoc-url,lucene-javadocs">

View File

@ -20,36 +20,36 @@
<info organisation="org.apache.solr" module="extraction"/> <info organisation="org.apache.solr" module="extraction"/>
<dependencies> <dependencies>
<!-- Tika JARs --> <!-- Tika JARs -->
<dependency org="org.apache.tika" name="tika-core" rev="1.1" transitive="false"/> <dependency org="org.apache.tika" name="tika-core" rev="1.2" transitive="false"/>
<dependency org="org.apache.tika" name="tika-parsers" rev="1.1" transitive="false"/> <dependency org="org.apache.tika" name="tika-parsers" rev="1.2" transitive="false"/>
<!-- Tika dependencies - see http://tika.apache.org/1.1/gettingstarted.html#Using_Tika_as_a_Maven_dependency --> <!-- Tika dependencies - see http://tika.apache.org/1.2/gettingstarted.html#Using_Tika_as_a_Maven_dependency -->
<!-- When upgrading Tika, upgrade dependencies versions and add any new ones <!-- When upgrading Tika, upgrade dependencies versions and add any new ones
(except slf4j-api, commons-codec, commons-logging, geronimo-stax-api_1.0_spec) --> (except slf4j-api, commons-codec, commons-logging, geronimo-stax-api_1.0_spec) -->
<dependency org="org.gagravarr" name="vorbis-java-tika" rev="0.1" transitive="false"/> <dependency org="org.gagravarr" name="vorbis-java-tika" rev="0.1" transitive="false"/>
<dependency org="org.gagravarr" name="vorbis-java-core" rev="0.1" transitive="false"/> <dependency org="org.gagravarr" name="vorbis-java-core" rev="0.1" transitive="false"/>
<dependency org="edu.ucar" name="netcdf" rev="4.2-min" transitive="false"/> <dependency org="edu.ucar" name="netcdf" rev="4.2-min" transitive="false"/>
<dependency org="org.apache.james" name="apache-mime4j-core" rev="0.7" transitive="false"/> <dependency org="org.apache.james" name="apache-mime4j-core" rev="0.7.2" transitive="false"/>
<dependency org="org.apache.james" name="apache-mime4j-dom" rev="0.7" transitive="false"/> <dependency org="org.apache.james" name="apache-mime4j-dom" rev="0.7.2" transitive="false"/>
<dependency org="org.apache.commons" name="commons-compress" rev="1.3" transitive="false"/> <dependency org="org.apache.commons" name="commons-compress" rev="1.4.1" transitive="false"/>
<dependency org="org.apache.pdfbox" name="pdfbox" rev="1.6.0" transitive="false"/> <dependency org="org.apache.pdfbox" name="pdfbox" rev="1.7.0" transitive="false"/>
<dependency org="org.apache.pdfbox" name="fontbox" rev="1.6.0" transitive="false"/> <dependency org="org.apache.pdfbox" name="fontbox" rev="1.7.0" transitive="false"/>
<dependency org="org.apache.pdfbox" name="jempbox" rev="1.6.0" transitive="false"/> <dependency org="org.apache.pdfbox" name="jempbox" rev="1.7.0" transitive="false"/>
<dependency org="org.bouncycastle" name="bcmail-jdk15" rev="1.45" transitive="false"/> <dependency org="org.bouncycastle" name="bcmail-jdk15" rev="1.45" transitive="false"/>
<dependency org="org.bouncycastle" name="bcprov-jdk15" rev="1.45" transitive="false"/> <dependency org="org.bouncycastle" name="bcprov-jdk15" rev="1.45" transitive="false"/>
<dependency org="org.apache.poi" name="poi" rev="3.8-beta5" transitive="false"/> <dependency org="org.apache.poi" name="poi" rev="3.8" transitive="false"/>
<dependency org="org.apache.poi" name="poi-scratchpad" rev="3.8-beta5" transitive="false"/> <dependency org="org.apache.poi" name="poi-scratchpad" rev="3.8" transitive="false"/>
<dependency org="org.apache.poi" name="poi-ooxml" rev="3.8-beta5" transitive="false"/> <dependency org="org.apache.poi" name="poi-ooxml" rev="3.8" transitive="false"/>
<dependency org="org.apache.poi" name="poi-ooxml-schemas" rev="3.8-beta5" transitive="false"/> <dependency org="org.apache.poi" name="poi-ooxml-schemas" rev="3.8" transitive="false"/>
<dependency org="org.apache.xmlbeans" name="xmlbeans" rev="2.3.0" transitive="false"/> <dependency org="org.apache.xmlbeans" name="xmlbeans" rev="2.3.0" transitive="false"/>
<dependency org="dom4j" name="dom4j" rev="1.6.1" transitive="false"/> <dependency org="dom4j" name="dom4j" rev="1.6.1" transitive="false"/>
<dependency org="org.ccil.cowan.tagsoup" name="tagsoup" rev="1.2.1" transitive="false"/> <dependency org="org.ccil.cowan.tagsoup" name="tagsoup" rev="1.2.1" transitive="false"/>
<dependency org="com.googlecode.mp4parser" name="isoparser" rev="1.0-beta-5" transitive="false"/> <dependency org="com.googlecode.mp4parser" name="isoparser" rev="1.0-RC-1" transitive="false"/>
<dependency org="net.sf.scannotation" name="scannotation" rev="1.0.2" transitive="false"/>
<dependency org="javassist" name="javassist" rev="3.6.0.GA" transitive="false"/>
<dependency org="com.drewnoakes" name="metadata-extractor" rev="2.4.0-beta-1" transitive="false"/> <dependency org="com.drewnoakes" name="metadata-extractor" rev="2.4.0-beta-1" transitive="false"/>
<dependency org="de.l3s.boilerpipe" name="boilerpipe" rev="1.1.0" transitive="false"/> <dependency org="de.l3s.boilerpipe" name="boilerpipe" rev="1.1.0" transitive="false"/>
<dependency org="rome" name="rome" rev="0.9" transitive="false"/> <dependency org="rome" name="rome" rev="0.9" transitive="false"/>
<dependency org="jdom" name="jdom" rev="1.0" transitive="false"/> <dependency org="jdom" name="jdom" rev="1.0" transitive="false"/>
<dependency org="com.googlecode.juniversalchardet" name="juniversalchardet" rev="1.0.3" transitive="false"/>
<dependency org="org.tukaani" name="xz" rev="1.0" transitive="false"/>
<!-- Other ExtracingRequestHandler dependencies --> <!-- Other ExtracingRequestHandler dependencies -->
<dependency org="com.ibm.icu" name="icu4j" rev="49.1" transitive="false"/> <dependency org="com.ibm.icu" name="icu4j" rev="49.1" transitive="false"/>
<dependency org="xerces" name="xercesImpl" rev="2.9.1" transitive="false"/> <dependency org="xerces" name="xercesImpl" rev="2.9.1" transitive="false"/>

View File

@ -64,8 +64,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
"fmap.producer", "extractedProducer", "fmap.producer", "extractedProducer",
"fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords", "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
"fmap.Creation-Date", "extractedDate", "fmap.Creation-Date", "extractedDate",
"fmap.AAPL:Keywords", "ignored_a", "uprefix", "ignored_",
"fmap.xmpTPg:NPages", "ignored_a",
"fmap.Author", "extractedAuthor", "fmap.Author", "extractedAuthor",
"fmap.content", "extractedContent", "fmap.content", "extractedContent",
"literal.id", "one", "literal.id", "one",
@ -81,6 +80,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
"fmap.Author", "extractedAuthor", "fmap.Author", "extractedAuthor",
"fmap.language", "extractedLanguage", "fmap.language", "extractedLanguage",
"literal.id", "two", "literal.id", "two",
"uprefix", "ignored_",
"fmap.content", "extractedContent", "fmap.content", "extractedContent",
"fmap.Last-Modified", "extractedDate" "fmap.Last-Modified", "extractedDate"
); );
@ -136,6 +136,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
"fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords", "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
"fmap.Author", "extractedAuthor", "fmap.Author", "extractedAuthor",
"literal.id", "three", "literal.id", "three",
"uprefix", "ignored_",
"fmap.content", "extractedContent", "fmap.content", "extractedContent",
"fmap.language", "extractedLanguage", "fmap.language", "extractedLanguage",
"fmap.Last-Modified", "extractedDate" "fmap.Last-Modified", "extractedDate"
@ -206,6 +207,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
"fmap.Author", "extractedAuthor", "fmap.Author", "extractedAuthor",
"fmap.content", "extractedContent", "fmap.content", "extractedContent",
"literal.id", "one", "literal.id", "one",
"uprefix", "ignored_",
"fmap.language", "extractedLanguage", "fmap.language", "extractedLanguage",
"literal.extractionLiteralMV", "one", "literal.extractionLiteralMV", "one",
"literal.extractionLiteralMV", "two", "literal.extractionLiteralMV", "two",
@ -374,9 +376,8 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
loadLocal("extraction/arabic.pdf", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer", loadLocal("extraction/arabic.pdf", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer",
"fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords", "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
"fmap.Creation-Date", "extractedDate", "fmap.Creation-Date", "extractedDate",
"fmap.AAPL:Keywords", "ignored_a",
"fmap.xmpTPg:NPages", "ignored_a",
"fmap.Author", "extractedAuthor", "fmap.Author", "extractedAuthor",
"uprefix", "ignored_",
"fmap.content", "wdf_nocase", "fmap.content", "wdf_nocase",
"literal.id", "one", "literal.id", "one",
"fmap.Last-Modified", "extractedDate"); "fmap.Last-Modified", "extractedDate");
@ -404,8 +405,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
loadLocal("extraction/password-is-solrcell.docx", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer", loadLocal("extraction/password-is-solrcell.docx", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer",
"fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords", "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
"fmap.Creation-Date", "extractedDate", "fmap.Creation-Date", "extractedDate",
"fmap.AAPL:Keywords", "ignored_a", "uprefix", "ignored_",
"fmap.xmpTPg:NPages", "ignored_a",
"fmap.Author", "extractedAuthor", "fmap.Author", "extractedAuthor",
"fmap.content", "wdf_nocase", "fmap.content", "wdf_nocase",
"literal.id", "one", "literal.id", "one",
@ -462,8 +462,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
"fmap.content", "extractedContent", "fmap.content", "extractedContent",
"fmap.language", "extractedLanguage", "fmap.language", "extractedLanguage",
"fmap.Creation-Date", "extractedDate", "fmap.Creation-Date", "extractedDate",
"fmap.AAPL:Keywords", "ignored_a", "uprefix", "ignored_",
"fmap.xmpTPg:NPages", "ignored_a",
"fmap.Last-Modified", "extractedDate"); "fmap.Last-Modified", "extractedDate");
// Here the literal value should override the Tika-parsed title: // Here the literal value should override the Tika-parsed title:
@ -478,8 +477,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
"fmap.content", "extractedContent", "fmap.content", "extractedContent",
"fmap.language", "extractedLanguage", "fmap.language", "extractedLanguage",
"fmap.Creation-Date", "extractedDate", "fmap.Creation-Date", "extractedDate",
"fmap.AAPL:Keywords", "ignored_a", "uprefix", "ignored_",
"fmap.xmpTPg:NPages", "ignored_a",
"fmap.Last-Modified", "extractedDate"); "fmap.Last-Modified", "extractedDate");
// Here we mimic the old behaviour where literals are added, not overridden // Here we mimic the old behaviour where literals are added, not overridden
@ -498,8 +496,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
"fmap.content", "extractedContent", "fmap.content", "extractedContent",
"fmap.language", "extractedLanguage", "fmap.language", "extractedLanguage",
"fmap.Creation-Date", "extractedDate", "fmap.Creation-Date", "extractedDate",
"fmap.AAPL:Keywords", "ignored_a", "uprefix", "ignored_",
"fmap.xmpTPg:NPages", "ignored_a",
"fmap.Last-Modified", "extractedDate"); "fmap.Last-Modified", "extractedDate");
assertU(commit()); assertU(commit());

View File

@ -125,6 +125,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
} }
cacheValue.refCnt--; cacheValue.refCnt--;
if (cacheValue.refCnt == 0 && cacheValue.doneWithDir) { if (cacheValue.refCnt == 0 && cacheValue.doneWithDir) {
log.info("Closing directory:" + cacheValue.path);
directory.close(); directory.close();
byDirectoryCache.remove(directory); byDirectoryCache.remove(directory);
byPathCache.remove(cacheValue.path); byPathCache.remove(cacheValue.path);
@ -194,6 +195,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
byDirectoryCache.put(directory, newCacheValue); byDirectoryCache.put(directory, newCacheValue);
byPathCache.put(fullPath, newCacheValue); byPathCache.put(fullPath, newCacheValue);
log.info("return new directory for " + fullPath + " forceNew:" + forceNew);
} else { } else {
cacheValue.refCnt++; cacheValue.refCnt++;
} }

View File

@ -1554,7 +1554,7 @@ public final class SolrCore implements SolrInfoMBean {
} catch (Throwable e) { } catch (Throwable e) {
// do not allow decref() operations to fail since they are typically called in finally blocks // do not allow decref() operations to fail since they are typically called in finally blocks
// and throwing another exception would be very unexpected. // and throwing another exception would be very unexpected.
SolrException.log(log, "Error closing searcher:", e); SolrException.log(log, "Error closing searcher:" + this, e);
} }
} }
}; };

View File

@ -29,7 +29,7 @@ public class CSVRequestHandler extends UpdateRequestHandler {
public void init(NamedList args) { public void init(NamedList args) {
super.init(args); super.init(args);
setAssumeContentType("application/csv"); setAssumeContentType("application/csv");
log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler"); // log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler");
} }
//////////////////////// SolrInfoMBeans methods ////////////////////// //////////////////////// SolrInfoMBeans methods //////////////////////

View File

@ -29,7 +29,7 @@ public class JsonUpdateRequestHandler extends UpdateRequestHandler {
public void init(NamedList args) { public void init(NamedList args) {
super.init(args); super.init(args);
setAssumeContentType("application/json"); setAssumeContentType("application/json");
log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler"); // log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler");
} }
//////////////////////// SolrInfoMBeans methods ////////////////////// //////////////////////// SolrInfoMBeans methods //////////////////////

View File

@ -384,7 +384,7 @@ public class SnapPuller {
// may be closed // may be closed
core.getDirectoryFactory().doneWithDirectory(oldDirectory); core.getDirectoryFactory().doneWithDirectory(oldDirectory);
} }
doCommit(); doCommit(isFullCopyNeeded);
} }
replicationStartTime = 0; replicationStartTime = 0;
@ -533,11 +533,11 @@ public class SnapPuller {
return sb; return sb;
} }
private void doCommit() throws IOException { private void doCommit(boolean isFullCopyNeeded) throws IOException {
SolrQueryRequest req = new LocalSolrQueryRequest(solrCore, SolrQueryRequest req = new LocalSolrQueryRequest(solrCore,
new ModifiableSolrParams()); new ModifiableSolrParams());
// reboot the writer on the new index and get a new searcher // reboot the writer on the new index and get a new searcher
solrCore.getUpdateHandler().newIndexWriter(true); solrCore.getUpdateHandler().newIndexWriter(isFullCopyNeeded);
try { try {
// first try to open an NRT searcher so that the new // first try to open an NRT searcher so that the new

View File

@ -182,8 +182,8 @@ abstract class AbstractStatsValues<T> implements StatsValues {
for (Map.Entry<String, StatsValues> e2 : entry.getValue().entrySet()) { for (Map.Entry<String, StatsValues> e2 : entry.getValue().entrySet()) {
nl2.add(e2.getKey(), e2.getValue().getStatsValues()); nl2.add(e2.getKey(), e2.getValue().getStatsValues());
} }
res.add(FACETS, nl);
} }
res.add(FACETS, nl);
return res; return res;
} }

View File

@ -74,8 +74,7 @@ public final class DefaultSolrCoreState extends SolrCoreState {
} }
if (indexWriter == null) { if (indexWriter == null) {
indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2", indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2", false);
false, false);
} }
if (refCntWriter == null) { if (refCntWriter == null) {
refCntWriter = new RefCounted<IndexWriter>(indexWriter) { refCntWriter = new RefCounted<IndexWriter>(indexWriter) {
@ -113,15 +112,25 @@ public final class DefaultSolrCoreState extends SolrCoreState {
try { try {
if (indexWriter != null) { if (indexWriter != null) {
if (!rollback) {
try { try {
log.info("Closing old IndexWriter... core=" + coreName); log.info("Closing old IndexWriter... core=" + coreName);
indexWriter.close(); indexWriter.close();
} catch (Throwable t) { } catch (Throwable t) {
SolrException.log(log, "Error closing old IndexWriter. core=" + coreName, t); SolrException.log(log, "Error closing old IndexWriter. core="
+ coreName, t);
}
} else {
try {
log.info("Rollback old IndexWriter... core=" + coreName);
indexWriter.rollback();
} catch (Throwable t) {
SolrException.log(log, "Error rolling back old IndexWriter. core="
+ coreName, t);
} }
} }
indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2", }
false, true); indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2", true);
log.info("New IndexWriter is ready to be used."); log.info("New IndexWriter is ready to be used.");
// we need to null this so it picks up the new writer next get call // we need to null this so it picks up the new writer next get call
refCntWriter = null; refCntWriter = null;
@ -174,14 +183,12 @@ public final class DefaultSolrCoreState extends SolrCoreState {
@Override @Override
public synchronized void rollbackIndexWriter(SolrCore core) throws IOException { public synchronized void rollbackIndexWriter(SolrCore core) throws IOException {
indexWriter.rollback();
newIndexWriter(core, true); newIndexWriter(core, true);
} }
protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name, protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name, boolean forceNewDirectory) throws IOException {
boolean removeAllExisting, boolean forceNewDirectory) throws IOException {
return new SolrIndexWriter(name, core.getNewIndexDir(), return new SolrIndexWriter(name, core.getNewIndexDir(),
core.getDirectoryFactory(), removeAllExisting, core.getSchema(), core.getDirectoryFactory(), false, core.getSchema(),
core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec(), forceNewDirectory); core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec(), forceNewDirectory);
} }

View File

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update;
import org.apache.solr.common.util.FastOutputStream;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
/** @lucene.internal */
public class MemOutputStream extends FastOutputStream {
public List<byte[]> buffers = new LinkedList<byte[]>();
public MemOutputStream(byte[] tempBuffer) {
super(null, tempBuffer, 0);
}
@Override
public void flush(byte[] arr, int offset, int len) throws IOException {
if (arr == buf && offset==0 && len==buf.length) {
buffers.add(buf); // steal the buffer
buf = new byte[8192];
} else if (len > 0) {
byte[] newBuf = new byte[len];
System.arraycopy(arr, offset, newBuf, 0, len);
buffers.add(newBuf);
}
}
public void writeAll(FastOutputStream fos) throws IOException {
for (byte[] buffer : buffers) {
fos.write(buffer);
}
if (pos > 0) {
fos.write(buf, 0, pos);
}
}
}

View File

@ -141,6 +141,8 @@ public class SolrIndexWriter extends IndexWriter {
super.rollback(); super.rollback();
} finally { } finally {
isClosed = true; isClosed = true;
directoryFactory.release(getDirectory());
numCloses.incrementAndGet();
} }
} }

View File

@ -775,31 +775,3 @@ class ChannelFastInputStream extends FastInputStream {
} }
class MemOutputStream extends FastOutputStream {
public List<byte[]> buffers = new LinkedList<byte[]>();
public MemOutputStream(byte[] tempBuffer) {
super(null, tempBuffer, 0);
}
@Override
public void flush(byte[] arr, int offset, int len) throws IOException {
if (arr == buf && offset==0 && len==buf.length) {
buffers.add(buf); // steal the buffer
buf = new byte[8192];
} else if (len > 0) {
byte[] newBuf = new byte[len];
System.arraycopy(arr, offset, newBuf, 0, len);
buffers.add(newBuf);
}
}
public void writeAll(FastOutputStream fos) throws IOException {
for (byte[] buffer : buffers) {
fos.write(buffer);
}
if (pos > 0) {
fos.write(buf, 0, pos);
}
}
}

View File

@ -28,7 +28,7 @@ public class FastWriter extends Writer {
// it won't cause double buffering. // it won't cause double buffering.
private static final int BUFSIZE = 8192; private static final int BUFSIZE = 8192;
protected final Writer sink; protected final Writer sink;
protected final char[] buf; protected char[] buf;
protected int pos; protected int pos;
public FastWriter(Writer w) { public FastWriter(Writer w) {
@ -69,42 +69,64 @@ public class FastWriter extends Writer {
} }
@Override @Override
public void write(char cbuf[], int off, int len) throws IOException { public void write(char arr[], int off, int len) throws IOException {
for(;;) {
int space = buf.length - pos; int space = buf.length - pos;
if (len < space) {
System.arraycopy(cbuf, off, buf, pos, len); if (len <= space) {
System.arraycopy(arr, off, buf, pos, len);
pos += len; pos += len;
} else if (len<BUFSIZE) { return;
// if the data to write is small enough, buffer it. } else if (len > buf.length) {
System.arraycopy(cbuf, off, buf, pos, space); if (pos>0) {
flush(buf, 0, buf.length);
pos = len-space;
System.arraycopy(cbuf, off+space, buf, 0, pos);
} else {
flush(buf,0,pos); // flush flush(buf,0,pos); // flush
pos=0; pos=0;
}
// don't buffer, just write to sink // don't buffer, just write to sink
flush(cbuf, off, len); flush(arr, off, len);
return;
}
// buffer is too big to fit in the free space, but
// not big enough to warrant writing on its own.
// write whatever we can fit, then flush and iterate.
System.arraycopy(arr, off, buf, pos, space);
flush(buf, 0, buf.length);
pos = 0;
off += space;
len -= space;
} }
} }
@Override @Override
public void write(String str, int off, int len) throws IOException { public void write(String str, int off, int len) throws IOException {
for(;;) {
int space = buf.length - pos; int space = buf.length - pos;
if (len < space) {
if (len <= space) {
str.getChars(off, off+len, buf, pos); str.getChars(off, off+len, buf, pos);
pos += len; pos += len;
} else if (len<BUFSIZE) { return;
// if the data to write is small enough, buffer it. } else if (len > buf.length) {
str.getChars(off, off+space, buf, pos); if (pos>0) {
flush(buf, 0, buf.length);
str.getChars(off+space, off+len, buf, 0);
pos = len-space;
} else {
flush(buf,0,pos); // flush flush(buf,0,pos); // flush
pos=0; pos=0;
}
// don't buffer, just write to sink // don't buffer, just write to sink
flush(str, off, len); flush(str, off, len);
return;
}
// buffer is too big to fit in the free space, but
// not big enough to warrant writing on its own.
// write whatever we can fit, then flush and iterate.
str.getChars(off, off+space, buf, pos);
flush(buf, 0, buf.length);
pos = 0;
off += space;
len -= space;
} }
} }

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,49 @@
<html>
<head>
<title>Welcome to Solr</title>
</head>
<body>
<p>
Here is some text
</p>
<div>Here is some text in a div</div>
<div>This has a <a href="http://www.apache.org">link</a>.</div>
<a href="#news">News</a>
<ul class="minitoc">
<li>
<a href="#03+October+2008+-+Solr+Logo+Contest">03 October 2008 - Solr Logo Contest</a>
</li>
<li>
<a href="#15+September+2008+-+Solr+1.3.0+Available">15 September 2008 - Solr 1.3.0 Available</a>
</li>
<li>
<a href="#28+August+2008+-+Lucene%2FSolr+at+ApacheCon+New+Orleans">28 August 2008 - Lucene/Solr at ApacheCon New Orleans</a>
</li>
<li>
<a href="#03+September+2007+-+Lucene+at+ApacheCon+Atlanta">03 September 2007 - Lucene at ApacheCon Atlanta</a>
</li>
<li>
<a href="#06+June+2007%3A+Release+1.2+available">06 June 2007: Release 1.2 available</a>
</li>
<li>
<a href="#17+January+2007%3A+Solr+graduates+from+Incubator">17 January 2007: Solr graduates from Incubator</a>
</li>
<li>
<a href="#22+December+2006%3A+Release+1.1.0+available">22 December 2006: Release 1.1.0 available</a>
</li>
<li>
<a href="#15+August+2006%3A+Solr+at+ApacheCon+US">15 August 2006: Solr at ApacheCon US</a>
</li>
<li>
<a href="#21+April+2006%3A+Solr+at+ApacheCon">21 April 2006: Solr at ApacheCon</a>
</li>
<li>
<a href="#21+February+2006%3A+nightly+builds">21 February 2006: nightly builds</a>
</li>
<li>
<a href="#17+January+2006%3A+Solr+Joins+Apache+Incubator">17 January 2006: Solr Joins Apache Incubator</a>
</li>
</ul>
</body>
</html>

View File

@ -0,0 +1,3 @@
Example text document
This is a simple example for a plain text document, indexed to Solr

View File

@ -54,7 +54,7 @@
--> -->
<maxBufferedDocs>10</maxBufferedDocs> <maxBufferedDocs>10</maxBufferedDocs>
<mergePolicy class="org.apache.lucene.index.LogDocMergePolicy"/> <mergePolicy class="org.apache.lucene.index.LogDocMergePolicy"/>
<lockType>single</lockType> <lockType>native</lockType>
<unlockOnStartup>true</unlockOnStartup> <unlockOnStartup>true</unlockOnStartup>
</indexConfig> </indexConfig>

View File

@ -228,32 +228,35 @@ public class StatsComponentTest extends AbstractSolrTestCase {
} }
public void doTestFacetStatisticsResult(String f) throws Exception { public void doTestFacetStatisticsResult(String f) throws Exception {
assertU(adoc("id", "1", f, "10", "active_s", "true")); assertU(adoc("id", "1", f, "10", "active_s", "true", "other_s", "foo"));
assertU(adoc("id", "2", f, "20", "active_s", "true")); assertU(adoc("id", "2", f, "20", "active_s", "true", "other_s", "bar"));
assertU(adoc("id", "3", f, "30", "active_s", "false")); assertU(adoc("id", "3", f, "30", "active_s", "false", "other_s", "foo"));
assertU(adoc("id", "4", f, "40", "active_s", "false")); assertU(adoc("id", "4", f, "40", "active_s", "false", "other_s", "foo"));
assertU(commit()); assertU(commit());
assertQ("test value for active_s=true", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s","indent","true") final String pre = "//lst[@name='stats_fields']/lst[@name='"+f+"']/lst[@name='facets']/lst[@name='active_s']";
, "//lst[@name='true']/double[@name='min'][.='10.0']"
, "//lst[@name='true']/double[@name='max'][.='20.0']" assertQ("test value for active_s=true", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s","stats.facet","other_s","indent","true")
, "//lst[@name='true']/double[@name='sum'][.='30.0']" , "*[count("+pre+")=1]"
, "//lst[@name='true']/long[@name='count'][.='2']" , pre+"/lst[@name='true']/double[@name='min'][.='10.0']"
, "//lst[@name='true']/long[@name='missing'][.='0']" , pre+"/lst[@name='true']/double[@name='max'][.='20.0']"
, "//lst[@name='true']/double[@name='sumOfSquares'][.='500.0']" , pre+"/lst[@name='true']/double[@name='sum'][.='30.0']"
, "//lst[@name='true']/double[@name='mean'][.='15.0']" , pre+"/lst[@name='true']/long[@name='count'][.='2']"
, "//lst[@name='true']/double[@name='stddev'][.='7.0710678118654755']" , pre+"/lst[@name='true']/long[@name='missing'][.='0']"
, pre+"/lst[@name='true']/double[@name='sumOfSquares'][.='500.0']"
, pre+"/lst[@name='true']/double[@name='mean'][.='15.0']"
, pre+"/lst[@name='true']/double[@name='stddev'][.='7.0710678118654755']"
); );
assertQ("test value for active_s=false", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s") assertQ("test value for active_s=false", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s")
, "//lst[@name='false']/double[@name='min'][.='30.0']" , pre+"/lst[@name='false']/double[@name='min'][.='30.0']"
, "//lst[@name='false']/double[@name='max'][.='40.0']" , pre+"/lst[@name='false']/double[@name='max'][.='40.0']"
, "//lst[@name='false']/double[@name='sum'][.='70.0']" , pre+"/lst[@name='false']/double[@name='sum'][.='70.0']"
, "//lst[@name='false']/long[@name='count'][.='2']" , pre+"/lst[@name='false']/long[@name='count'][.='2']"
, "//lst[@name='false']/long[@name='missing'][.='0']" , pre+"/lst[@name='false']/long[@name='missing'][.='0']"
, "//lst[@name='false']/double[@name='sumOfSquares'][.='2500.0']" , pre+"/lst[@name='false']/double[@name='sumOfSquares'][.='2500.0']"
, "//lst[@name='false']/double[@name='mean'][.='35.0']" , pre+"/lst[@name='false']/double[@name='mean'][.='35.0']"
, "//lst[@name='false']/double[@name='stddev'][.='7.0710678118654755']" , pre+"/lst[@name='false']/double[@name='stddev'][.='7.0710678118654755']"
); );
} }

View File

@ -0,0 +1,237 @@
package org.apache.solr.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.util.SimplePostTool.PageFetcher;
import org.apache.solr.util.SimplePostTool.PageFetcherResult;
import org.junit.Before;
import org.junit.Test;
public class SimplePostToolTest extends SolrTestCaseJ4 {
SimplePostTool t_file, t_file_auto, t_file_rec, t_web, t_test;
PageFetcher pf;
@Before
public void setUp() throws Exception {
super.setUp();
String[] args = {"-"};
System.setProperty("data", "files");
t_file = SimplePostTool.parseArgsAndInit(args);
System.setProperty("auto", "yes");
t_file_auto = SimplePostTool.parseArgsAndInit(args);
System.setProperty("recursive", "yes");
t_file_rec = SimplePostTool.parseArgsAndInit(args);
System.setProperty("data", "web");
t_web = SimplePostTool.parseArgsAndInit(args);
System.setProperty("params", "param1=foo&param2=bar");
t_test = SimplePostTool.parseArgsAndInit(args);
pf = new MockPageFetcher();
SimplePostTool.pageFetcher = pf;
SimplePostTool.mockMode = true;
}
@Test
public void testParseArgsAndInit() {
assertEquals(false, t_file.auto);
assertEquals(true, t_file_auto.auto);
assertEquals(0, t_file_auto.recursive);
assertEquals(999, t_file_rec.recursive);
assertEquals(true, t_file.commit);
assertEquals(false, t_file.optimize);
assertEquals(null, t_file.out);
assertEquals(1, t_web.recursive);
assertEquals(10, t_web.delay);
assertNotNull(t_test.solrUrl);
}
@Test
public void testNormalizeUrlEnding() {
assertEquals("http://example.com", SimplePostTool.normalizeUrlEnding("http://example.com/"));
assertEquals("http://example.com", SimplePostTool.normalizeUrlEnding("http://example.com/#foo?bar=baz"));
assertEquals("http://example.com/index.html", SimplePostTool.normalizeUrlEnding("http://example.com/index.html#hello"));
}
@Test
public void testComputeFullUrl() throws MalformedURLException {
assertEquals("http://example.com/index.html", t_web.computeFullUrl(new URL("http://example.com/"), "/index.html"));
assertEquals("http://example.com/index.html", t_web.computeFullUrl(new URL("http://example.com/foo/bar/"), "/index.html"));
assertEquals("http://example.com/fil.html", t_web.computeFullUrl(new URL("http://example.com/foo.htm?baz#hello"), "fil.html"));
// TODO: How to know what is the base if URL path ends with "foo"??
// assertEquals("http://example.com/fil.html", t_web.computeFullUrl(new URL("http://example.com/foo?baz#hello"), "fil.html"));
assertEquals(null, t_web.computeFullUrl(new URL("http://example.com/"), "fil.jpg"));
assertEquals(null, t_web.computeFullUrl(new URL("http://example.com/"), "mailto:hello@foo.bar"));
assertEquals(null, t_web.computeFullUrl(new URL("http://example.com/"), "ftp://server/file"));
}
@Test
public void testTypeSupported() {
assertTrue(t_web.typeSupported("application/pdf"));
assertTrue(t_web.typeSupported("text/xml"));
assertFalse(t_web.typeSupported("text/foo"));
t_web.fileTypes = "doc,xls,ppt";
t_web.globFileFilter = t_web.getFileFilterFromFileTypes(t_web.fileTypes);
assertFalse(t_web.typeSupported("application/pdf"));
assertTrue(t_web.typeSupported("application/msword"));
}
@Test
public void testIsOn() {
assertTrue(SimplePostTool.isOn("true"));
assertTrue(SimplePostTool.isOn("1"));
assertFalse(SimplePostTool.isOn("off"));
}
@Test
public void testAppendParam() {
assertEquals("http://example.com?foo=bar", SimplePostTool.appendParam("http://example.com", "foo=bar"));
assertEquals("http://example.com/?a=b&foo=bar", SimplePostTool.appendParam("http://example.com/?a=b", "foo=bar"));
}
@Test
public void testAppendUrlPath() throws MalformedURLException {
assertEquals(new URL("http://example.com/a?foo=bar"), SimplePostTool.appendUrlPath(new URL("http://example.com?foo=bar"), "/a"));
}
@Test
public void testGuessType() {
File f = new File("foo.doc");
assertEquals("application/msword", SimplePostTool.guessType(f));
f = new File("foobar");
assertEquals(null, SimplePostTool.guessType(f));
}
@Test
public void testDoFilesMode() {
t_file_auto.recursive = 0;
File dir = getFile("exampledocs");
int num = t_file_auto.postFiles(new File[] {dir}, 0, null, null);
assertEquals(2, num);
}
@Test
public void testDoWebMode() {
// Uses mock pageFetcher
t_web.delay = 0;
t_web.recursive = 5;
int num = t_web.postWebPages(new String[] {"http://example.com/#removeme"}, 0, null);
assertEquals(5, num);
t_web.recursive = 1;
num = t_web.postWebPages(new String[] {"http://example.com/"}, 0, null);
assertEquals(3, num);
// Without respecting robots.txt
SimplePostTool.pageFetcher.robotsCache.clear();
t_web.recursive = 5;
num = t_web.postWebPages(new String[] {"http://example.com/#removeme"}, 0, null);
assertEquals(6, num);
}
@Test
public void testRobotsExclusion() throws MalformedURLException {
assertFalse(SimplePostTool.pageFetcher.isDisallowedByRobots(new URL("http://example.com/")));
assertTrue(SimplePostTool.pageFetcher.isDisallowedByRobots(new URL("http://example.com/disallowed")));
assertTrue("There should be two entries parsed from robots.txt", SimplePostTool.pageFetcher.robotsCache.get("example.com").size() == 2);
}
class MockPageFetcher extends PageFetcher {
HashMap<String,String> htmlMap = new HashMap<String,String>();
HashMap<String,Set<URL>> linkMap = new HashMap<String,Set<URL>>();
public MockPageFetcher() throws IOException {
(new SimplePostTool()).super();
htmlMap.put("http://example.com", "<html><body><a href=\"http://example.com/page1\">page1</a><a href=\"http://example.com/page2\">page2</a></body></html>");
htmlMap.put("http://example.com/index.html", "<html><body><a href=\"http://example.com/page1\">page1</a><a href=\"http://example.com/page2\">page2</a></body></html>");
htmlMap.put("http://example.com/page1", "<html><body><a href=\"http://example.com/page1/foo\"></body></html>");
htmlMap.put("http://example.com/page1/foo", "<html><body><a href=\"http://example.com/page1/foo/bar\"></body></html>");
htmlMap.put("http://example.com/page1/foo/bar", "<html><body><a href=\"http://example.com/page1\"></body></html>");
htmlMap.put("http://example.com/page2", "<html><body><a href=\"http://example.com/\"><a href=\"http://example.com/disallowed\"/></body></html>");
htmlMap.put("http://example.com/disallowed", "<html><body><a href=\"http://example.com/\"></body></html>");
Set<URL> s = new HashSet<URL>();
s.add(new URL("http://example.com/page1"));
s.add(new URL("http://example.com/page2"));
linkMap.put("http://example.com", s);
linkMap.put("http://example.com/index.html", s);
s = new HashSet<URL>();
s.add(new URL("http://example.com/page1/foo"));
linkMap.put("http://example.com/page1", s);
s = new HashSet<URL>();
s.add(new URL("http://example.com/page1/foo/bar"));
linkMap.put("http://example.com/page1/foo", s);
s = new HashSet<URL>();
s.add(new URL("http://example.com/disallowed"));
linkMap.put("http://example.com/page2", s);
// Simulate a robots.txt file with comments and a few disallows
StringBuilder sb = new StringBuilder();
sb.append("# Comments appear after the \"#\" symbol at the start of a line, or after a directive\n");
sb.append("User-agent: * # match all bots\n");
sb.append("Disallow: # This is void\n");
sb.append("Disallow: /disallow # Disallow this path\n");
sb.append("Disallow: /nonexistingpath # Disallow this path\n");
this.robotsCache.put("example.com", SimplePostTool.pageFetcher.
parseRobotsTxt(new ByteArrayInputStream(sb.toString().getBytes("UTF-8"))));
}
@Override
public PageFetcherResult readPageFromUrl(URL u) {
PageFetcherResult res = (new SimplePostTool()).new PageFetcherResult();
if (isDisallowedByRobots(u)) {
res.httpStatus = 403;
return res;
}
res.httpStatus = 200;
res.contentType = "text/html";
try {
res.content = htmlMap.get(u.toString()).getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException();
}
return res;
}
@Override
public Set<URL> getLinksFromWebPage(URL u, InputStream is, String type, URL postUrl) {
Set<URL> s = linkMap.get(SimplePostTool.normalizeUrlEnding(u.toString()));
if(s == null)
s = new HashSet<URL>();
return s;
}
}
}

Some files were not shown because too many files have changed in this diff Show More