diff --git a/dev-tools/eclipse/dot.classpath b/dev-tools/eclipse/dot.classpath index c24f5f0b416..1d2abc15758 100644 --- a/dev-tools/eclipse/dot.classpath +++ b/dev-tools/eclipse/dot.classpath @@ -1,126 +1,126 @@ - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -133,47 +133,57 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 45d5be021ec..2d8d20e329e 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -75,6 +75,14 @@ Bug Fixes encoders / stemmers via the ResourceLoader now instead of Class.forName(). Solr users should now no longer have to embed these in its war. (David Smiley) +* SOLR-3737: StempelPolishStemFilterFactory loaded its stemmer table incorrectly. + Also, ensure immutability and use only one instance of this table in RAM (lazy + loaded) since its quite large. (sausarkar, Steven Rowe, Robert Muir) + +* LUCENE-4310: MappingCharFilter was failing to match input strings + containing non-BMP Unicode characters. (Dawid Weiss, Robert Muir, + Mike McCandless) + Build * LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java index c6470611d2c..c22203a76a4 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java @@ -111,9 +111,8 @@ public class NormalizeCharMap { final org.apache.lucene.util.fst.Builder builder = new org.apache.lucene.util.fst.Builder(FST.INPUT_TYPE.BYTE2, outputs); final IntsRef scratch = new IntsRef(); for(Map.Entry ent : pendingPairs.entrySet()) { - builder.add(Util.toUTF32(ent.getKey(), scratch), + builder.add(Util.toUTF16(ent.getKey(), scratch), new CharsRef(ent.getValue())); - } map = builder.finish(); pendingPairs.clear(); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java index c4fab5519c0..358ab2d7b74 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java @@ -33,6 +33,7 @@ import org.apache.lucene.analysis.CharFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util._TestUtil; public class TestMappingCharFilter extends BaseTokenStreamTestCase { @@ -55,6 +56,11 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase { builder.add( "empty", "" ); + // BMP (surrogate pair): + builder.add(UnicodeUtil.newString(new int[] {0x1D122}, 0, 1), "fclef"); + + builder.add("\uff01", "full-width-exclamation"); + normMap = builder.build(); } @@ -128,6 +134,18 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase { assertTokenStreamContents(ts, new String[0], new int[]{}, new int[]{}, 5); } + public void testNonBMPChar() throws Exception { + CharFilter cs = new MappingCharFilter( normMap, new StringReader( UnicodeUtil.newString(new int[] {0x1D122}, 0, 1) ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"fclef"}, new int[]{0}, new int[]{2}, 2); + } + + public void testFullWidthChar() throws Exception { + CharFilter cs = new MappingCharFilter( normMap, new StringReader( "\uff01") ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"full-width-exclamation"}, new int[]{0}, new int[]{1}, 1); + } + // // 1111111111222 // 01234567890123456789012 diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java index 32d0665c5ed..f1a8364c0de 100644 --- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java +++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java @@ -58,6 +58,13 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase { return DefaultsHolder.DEFAULT_STOP_SET; } + /** + * Returns an unmodifiable instance of the default stemmer table. + */ + public static Trie getDefaultTable() { + return DefaultsHolder.DEFAULT_TABLE; + } + /** * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class * accesses the static final set the first time.; diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelPolishStemFilterFactory.java b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelPolishStemFilterFactory.java index 9dce52feb3e..64abe3c84c9 100644 --- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelPolishStemFilterFactory.java +++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelPolishStemFilterFactory.java @@ -17,28 +17,17 @@ package org.apache.lucene.analysis.stempel; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.pl.PolishAnalyzer; import org.apache.lucene.analysis.stempel.StempelFilter; import org.apache.lucene.analysis.stempel.StempelStemmer; -import org.apache.lucene.analysis.util.ResourceLoader; -import org.apache.lucene.analysis.util.ResourceLoaderAware; import org.apache.lucene.analysis.util.TokenFilterFactory; -import org.egothor.stemmer.Trie; /** * Factory for {@link StempelFilter} using a Polish stemming table. */ -public class StempelPolishStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware { - private Trie stemmer = null; - private static final String STEMTABLE = "/org/apache/lucene/analysis/pl/stemmer_20000.tbl"; - +public class StempelPolishStemFilterFactory extends TokenFilterFactory { public TokenStream create(TokenStream input) { - return new StempelFilter(input, new StempelStemmer(stemmer)); - } - - public void inform(ResourceLoader loader) throws IOException { - stemmer = StempelStemmer.load(loader.openResource(STEMTABLE)); + return new StempelFilter(input, new StempelStemmer(PolishAnalyzer.getDefaultTable())); } } diff --git a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Trie.java b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Trie.java index b32011fcf4f..20ea467efad 100644 --- a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Trie.java +++ b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Trie.java @@ -332,7 +332,7 @@ public class Trie { * @param key the key * @param cmd the patch command */ - public void add(CharSequence key, CharSequence cmd) { + void add(CharSequence key, CharSequence cmd) { if (key == null || cmd == null) { return; } diff --git a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java index 8c57d4ab1cc..e633c72b470 100644 --- a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java +++ b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java @@ -22,7 +22,6 @@ import java.io.StringReader; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceTokenizer; -import org.apache.lucene.analysis.util.ClasspathResourceLoader; /** * Tests for {@link StempelPolishStemFilterFactory} @@ -31,7 +30,6 @@ public class TestStempelPolishStemFilterFactory extends BaseTokenStreamTestCase public void testBasics() throws Exception { StringReader document = new StringReader("studenta studenci"); StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory(); - factory.inform(new ClasspathResourceLoader(getClass())); TokenStream ts = factory.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, document)); assertTokenStreamContents(ts, new String[] { "student", "student" }); diff --git a/lucene/build.xml b/lucene/build.xml index 82a65544788..784faf73889 100644 --- a/lucene/build.xml +++ b/lucene/build.xml @@ -234,10 +234,10 @@ - + - + @@ -247,7 +247,7 @@ - + diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java index 15c77beacc3..131b33973c6 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java @@ -26,11 +26,15 @@ import org.apache.lucene.index.TermState; * terms dict. */ public class BlockTermState extends OrdTermState { - public int docFreq; // how many docs have this term - public long totalTermFreq; // total number of occurrences of this term + /** how many docs have this term */ + public int docFreq; + /** total number of occurrences of this term */ + public long totalTermFreq; - public int termBlockOrd; // the term's ord in the current block - public long blockFilePointer; // fp into the terms dict primary file (_X.tim) that holds this term + /** the term's ord in the current block */ + public int termBlockOrd; + /** fp into the terms dict primary file (_X.tim) that holds this term */ + public long blockFilePointer; @Override public void copyFrom(TermState _other) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java index 73274b29849..0c25648875c 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java @@ -36,7 +36,7 @@ import org.apache.lucene.util.MathUtil; */ public abstract class MultiLevelSkipListReader { - // the maximum number of skip levels possible for this index + /** the maximum number of skip levels possible for this index */ protected int maxNumberOfSkipLevels; // number of levels in this skip list diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java index 0c80d702c78..ba5be04d332 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java @@ -52,7 +52,7 @@ import org.apache.lucene.util.MathUtil; */ public abstract class MultiLevelSkipListWriter { - // number of levels in this skip list + /** number of levels in this skip list */ protected int numberOfSkipLevels; // the skip interval in the list with level = 0 @@ -93,8 +93,8 @@ public abstract class MultiLevelSkipListWriter { } } + /** creates new buffers or empties the existing ones */ protected void resetSkip() { - // creates new buffers or empties the existing ones if (skipBuffer == null) { init(); } else { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java index 03ebc6caf44..b8dc734d197 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java @@ -1796,7 +1796,7 @@ public class DirectPostingsFormat extends PostingsFormat { } // Docs + freqs: - public final static class HighFreqDocsEnum extends DocsEnum { + private final static class HighFreqDocsEnum extends DocsEnum { private int[] docIDs; private int[] freqs; private final Bits liveDocs; @@ -1969,7 +1969,7 @@ public class DirectPostingsFormat extends PostingsFormat { } // TODO: specialize offsets and not - public final static class HighFreqDocsAndPositionsEnum extends DocsAndPositionsEnum { + private final static class HighFreqDocsAndPositionsEnum extends DocsAndPositionsEnum { private int[] docIDs; private int[] freqs; private int[][] positions; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/sep/IntIndexInput.java b/lucene/core/src/java/org/apache/lucene/codecs/sep/IntIndexInput.java index d75b9bfb595..4b01d80d052 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/sep/IntIndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/sep/IntIndexInput.java @@ -36,7 +36,7 @@ public abstract class IntIndexInput implements Closeable { public abstract Index index() throws IOException; - // TODO: -- can we simplify this? + /** Records a single skip-point in the {@link IntIndexInput.Reader}. */ public abstract static class Index { public abstract void read(DataInput indexIn, boolean absolute) throws IOException; @@ -50,6 +50,7 @@ public abstract class IntIndexInput implements Closeable { public abstract Index clone(); } + /** Reads int values. */ public abstract static class Reader { /** Reads next single int */ diff --git a/lucene/core/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java b/lucene/core/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java index 14723d2574b..fd1eb49b276 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java @@ -38,6 +38,7 @@ public abstract class IntIndexOutput implements Closeable { * >= 0. */ public abstract void write(int v) throws IOException; + /** Records a single skip-point in the IndexOutput. */ public abstract static class Index { /** Internally records the current location */ diff --git a/lucene/core/src/java/org/apache/lucene/codecs/sep/IntStreamFactory.java b/lucene/core/src/java/org/apache/lucene/codecs/sep/IntStreamFactory.java index 091d1a72d61..eace0335a8e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/sep/IntStreamFactory.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/sep/IntStreamFactory.java @@ -22,8 +22,15 @@ import org.apache.lucene.store.IOContext; import java.io.IOException; -/** @lucene.experimental */ +/** Provides int reader and writer to specified files. + * + * @lucene.experimental */ public abstract class IntStreamFactory { + /** Create an {@link IntIndexInput} on the provided + * fileName. */ public abstract IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException; + + /** Create an {@link IntIndexOutput} on the provided + * fileName. */ public abstract IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException; } diff --git a/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java b/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java index 500dc3eb8f5..93ac2bb4c81 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java @@ -119,10 +119,13 @@ public class DocTermOrds { protected final String field; protected int numTermsInField; - protected long termInstances; // total number of references to term numbers + /** total number of references to term numbers */ + protected long termInstances; private long memsz; - protected int total_time; // total time to uninvert the field - protected int phase1_time; // time for phase1 of the uninvert process + /** total time to uninvert the field */ + protected int total_time; + /** time for phase1 of the uninvert process */ + protected int phase1_time; protected int[] index; protected byte[][] tnums = new byte[256][]; @@ -234,7 +237,7 @@ public class DocTermOrds { protected void setActualDocFreq(int termNum, int df) throws IOException { } - // Call this only once (if you subclass!) + /** Call this only once (if you subclass!) */ protected void uninvert(final AtomicReader reader, final BytesRef termPrefix) throws IOException { //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix); final long startTime = System.currentTimeMillis(); diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java index 3dc1b0fc8c5..4a603ca1740 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java @@ -267,11 +267,11 @@ public class FieldInfos implements Iterable { return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType); } - // NOTE: this method does not carry over termVector - // booleans nor docValuesType; the indexer chain - // (TermVectorsConsumerPerField, DocFieldProcessor) must - // set these fields when they succeed in consuming - // the document: + /** NOTE: this method does not carry over termVector + * booleans nor docValuesType; the indexer chain + * (TermVectorsConsumerPerField, DocFieldProcessor) must + * set these fields when they succeed in consuming + * the document */ public FieldInfo addOrUpdate(String name, IndexableFieldType fieldType) { // TODO: really, indexer shouldn't even call this // method (it's only called from DocFieldProcessor); diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java index be5420c103a..dc0263eb4ae 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java @@ -243,6 +243,10 @@ public abstract class MergePolicy implements java.io.Closeable, Cloneable { } } + /** Thrown when a merge was explicity aborted because + * {@link IndexWriter#close(boolean)} was called with + * false. Normally this exception is + * privately caught and suppresed by {@link IndexWriter}. */ public static class MergeAbortedException extends IOException { public MergeAbortedException() { super("merge is aborted"); diff --git a/lucene/core/src/java/org/apache/lucene/index/MergeState.java b/lucene/core/src/java/org/apache/lucene/index/MergeState.java index fbfe063a5a7..a14da90899f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergeState.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergeState.java @@ -29,6 +29,9 @@ import org.apache.lucene.util.packed.PackedInts; * @lucene.experimental */ public class MergeState { + /** + * Remaps docids around deletes during merge + */ public static abstract class DocMap { private final Bits liveDocs; @@ -197,6 +200,9 @@ public class MergeState { public SegmentReader[] matchingSegmentReaders; public int matchedCount; + /** + * Class for recording units of work when merging segments. + */ public static class CheckAbort { private double workCount; private final MergePolicy.OneMerge merge; diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java index abfc979c0dd..a079cd07d0c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java @@ -43,7 +43,7 @@ import org.apache.lucene.util.packed.PackedInts.Reader; * @lucene.experimental * @lucene.internal */ -public class MultiDocValues extends DocValues { +class MultiDocValues extends DocValues { private static DocValuesPuller DEFAULT_PULLER = new DocValuesPuller(); private static final DocValuesPuller NORMS_PULLER = new DocValuesPuller() { diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java index 062890a844d..9bf4559b37e 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java @@ -143,6 +143,8 @@ public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum { } // TODO: implement bulk read more efficiently than super + /** Holds a {@link DocsAndPositionsEnum} along with the + * corresponding {@link ReaderSlice}. */ public final static class EnumWithSlice { public DocsAndPositionsEnum docsAndPositionsEnum; public ReaderSlice slice; diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java index 2d0fd252d06..af58ac427b8 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java @@ -122,6 +122,8 @@ public final class MultiDocsEnum extends DocsEnum { } // TODO: implement bulk read more efficiently than super + /** Holds a {@link DocsEnum} along with the + * corresponding {@link ReaderSlice}. */ public final static class EnumWithSlice { public DocsEnum docsEnum; public ReaderSlice slice; diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index b92d31a1b65..f1b938e3dab 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -133,7 +133,8 @@ public final class SegmentInfos implements Cloneable, Iterable userData = Collections.emptyMap(); // Opaque Map that user can specify during IndexWriter.commit + /** Opaque Map<String, String> that user can specify during IndexWriter.commit */ + public Map userData = Collections.emptyMap(); private List segments = new ArrayList(); diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentReadState.java b/lucene/core/src/java/org/apache/lucene/index/SegmentReadState.java index 71b03463968..79be20cc9b6 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentReadState.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentReadState.java @@ -30,11 +30,11 @@ public class SegmentReadState { public final FieldInfos fieldInfos; public final IOContext context; - // NOTE: if this is < 0, that means "defer terms index - // load until needed". But if the codec must load the - // terms index on init (preflex is the only once currently - // that must do so), then it should negate this value to - // get the app's terms divisor: + /** NOTE: if this is < 0, that means "defer terms index + * load until needed". But if the codec must load the + * terms index on init (preflex is the only once currently + * that must do so), then it should negate this value to + * get the app's terms divisor */ public int termsIndexDivisor; public final String segmentSuffix; diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java b/lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java index 6b161df91d6..0c3dd5cf2eb 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java @@ -33,11 +33,11 @@ public class SegmentWriteState { public final FieldInfos fieldInfos; public int delCountOnFlush; - // Deletes to apply while we are flushing the segment. A - // Term is enrolled in here if it was deleted at one - // point, and it's mapped to the docIDUpto, meaning any - // docID < docIDUpto containing this term should be - // deleted. + /** Deletes to apply while we are flushing the segment. A + * Term is enrolled in here if it was deleted at one + * point, and it's mapped to the docIDUpto, meaning any + * docID < docIDUpto containing this term should be + * deleted. */ public final BufferedDeletes segDeletes; // Lazily created: diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java b/lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java index 023af1343f2..c4b4998bf4b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java @@ -32,6 +32,9 @@ import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.packed.PackedInts; /** + * Utility class for merging SortedBytes DocValues + * instances. + * * @lucene.internal */ public final class SortedBytesMergeUtils { @@ -54,7 +57,14 @@ public final class SortedBytesMergeUtils { } return new MergeContext(comp, mergeDocCount, size, type); } - + /** + * Encapsulates contextual information about the merge. + * This class holds document id to ordinal mappings, offsets for + * variable length values and the comparator to sort the merged + * bytes. + * + * @lucene.internal + */ public static final class MergeContext { private final Comparator comp; private final BytesRef missingValue = new BytesRef(); @@ -169,10 +179,36 @@ public final class SortedBytesMergeUtils { return merger.currentOrd; } + /** + * Implementation of this interface consume the merged bytes with their + * corresponding ordinal and byte offset. The offset is the byte offset in + * target sorted source where the currently merged {@link BytesRef} instance + * should be stored at. + */ public static interface BytesRefConsumer { + + /** + * Consumes a single {@link BytesRef}. The provided {@link BytesRef} + * instances are strictly increasing with respect to the used + * {@link Comparator} used for merging + * + * @param ref + * the {@link BytesRef} to consume + * @param ord + * the ordinal of the given {@link BytesRef} in the merge target + * @param offset + * the byte offset of the given {@link BytesRef} in the merge + * target + * @throws IOException + * if an {@link IOException} occurs + */ public void consume(BytesRef ref, int ord, long offset) throws IOException; } + /** + * A simple {@link BytesRefConsumer} that writes the merged {@link BytesRef} + * instances sequentially to an {@link IndexOutput}. + */ public static final class IndexOutputBytesRefConsumer implements BytesRefConsumer { private final IndexOutput datOut; @@ -186,7 +222,15 @@ public final class SortedBytesMergeUtils { currentMergedBytes.length); } } - + + /** + * {@link RecordMerger} merges a list of {@link SortedSourceSlice} lazily by + * consuming the sorted source records one by one and de-duplicates records + * that are shared across slices. The algorithm is based on a lazy priority queue + * that prevents reading merge sources into heap memory. + * + * @lucene.internal + */ private static final class RecordMerger { private final MergeQueue queue; private final SortedSourceSlice[] top; @@ -231,6 +275,12 @@ public final class SortedBytesMergeUtils { } } + /** + * {@link SortedSourceSlice} represents a single {@link SortedSource} merge candidate. + * It encapsulates ordinal and pre-calculated target doc id to ordinal mappings. + * This class also holds state private to the merge process. + * @lucene.internal + */ public static class SortedSourceSlice { final SortedSource source; final int readerIdx; diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java index 590d784d77f..ff5994016cb 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java @@ -45,6 +45,9 @@ import org.apache.lucene.util.packed.PackedInts; */ public interface FieldCache { + /** + * Placeholder indicating creation of this cache is currently in-progress. + */ public static final class CreationPlaceholder { Object value; } diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java index 83b44bf9b56..9d1fbafd7bf 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java @@ -194,6 +194,9 @@ public abstract class FieldComparator { * than the provided value. */ public abstract int compareDocToValue(int doc, T value) throws IOException; + /** + * Base FieldComparator class for numeric types + */ public static abstract class NumericComparator extends FieldComparator { protected final T missingValue; protected final String field; diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java b/lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java index 5a4d44425f8..97a9974c6f1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java @@ -33,6 +33,10 @@ import org.apache.lucene.util.PriorityQueue; */ public abstract class FieldValueHitQueue extends PriorityQueue { + /** + * Extension of ScoreDoc to also store the + * {@link FieldComparator} slot. + */ public static class Entry extends ScoreDoc { public int slot; diff --git a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java index 05e9b34f792..304b4a36acb 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java @@ -398,12 +398,17 @@ public class FuzzyTermsEnum extends TermsEnum { return scale_factor; } - /** @lucene.internal */ + /** + * reuses compiled automata across different segments, + * because they are independent of the index + * @lucene.internal */ public static interface LevenshteinAutomataAttribute extends Attribute { public List automata(); } - /** @lucene.internal */ + /** + * Stores compiled automata as a list (indexed by edit distance) + * @lucene.internal */ public static final class LevenshteinAutomataAttributeImpl extends AttributeImpl implements LevenshteinAutomataAttribute { private final List automata = new ArrayList(); diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index 389bf3f9abf..68ffa73108d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -82,7 +82,7 @@ public class IndexSearcher { // in the next release protected final IndexReaderContext readerContext; protected final List leafContexts; - // used with executor - each slice holds a set of leafs executed within one thread + /** used with executor - each slice holds a set of leafs executed within one thread */ protected final LeafSlice[] leafSlices; // These are only used for multi-threaded search diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java b/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java index 53b5dc27f75..662e00f3542 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java @@ -32,7 +32,11 @@ import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; -/** @lucene.internal Only public to be accessible by spans package. */ +/** + * Base rewrite method that translates each term into a query, and keeps + * the scores as computed by the query. + *

+ * @lucene.internal Only public to be accessible by spans package. */ public abstract class ScoringRewrite extends TermCollectingRewrite { /** A rewrite method that first translates each term into diff --git a/lucene/core/src/java/org/apache/lucene/search/SortField.java b/lucene/core/src/java/org/apache/lucene/search/SortField.java index 35e07c4ab5b..b0e1b0db1f7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SortField.java +++ b/lucene/core/src/java/org/apache/lucene/search/SortField.java @@ -38,6 +38,9 @@ import org.apache.lucene.util.StringHelper; */ public class SortField { + /** + * Specifies the type of the terms to be sorted, or special types such as CUSTOM + */ public static enum Type { /** Sort by document score (relevance). Sort values are Float and higher diff --git a/lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java b/lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java index 1f8c472b40e..1694789d492 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java @@ -225,6 +225,8 @@ public class TimeLimitingCollector extends Collector { } /** + * Thread used to timeout search requests. + * Can be stopped completely with {@link TimerThread#stopTimer()} * @lucene.experimental */ public static final class TimerThread extends Thread { diff --git a/lucene/core/src/java/org/apache/lucene/search/TopDocsCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopDocsCollector.java index afbae972645..5e7dd50406f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopDocsCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopDocsCollector.java @@ -33,8 +33,8 @@ import org.apache.lucene.util.PriorityQueue; */ public abstract class TopDocsCollector extends Collector { - // This is used in case topDocs() is called with illegal parameters, or there - // simply aren't (enough) results. + /** This is used in case topDocs() is called with illegal parameters, or there + * simply aren't (enough) results. */ protected static final TopDocs EMPTY_TOPDOCS = new TopDocs(0, new ScoreDoc[0], Float.NaN); /** diff --git a/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java b/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java index adf3f7099ae..d5151718430 100644 --- a/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java +++ b/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java @@ -436,6 +436,9 @@ public abstract class FSDirectory extends Directory { return chunkSize; } + /** + * Writes output with {@link RandomAccessFile#write(byte[], int, int)} + */ protected static class FSIndexOutput extends BufferedIndexOutput { private final FSDirectory parent; private final String name; diff --git a/lucene/core/src/java/org/apache/lucene/store/NIOFSDirectory.java b/lucene/core/src/java/org/apache/lucene/store/NIOFSDirectory.java index 5f483fc6ba6..5098542a375 100644 --- a/lucene/core/src/java/org/apache/lucene/store/NIOFSDirectory.java +++ b/lucene/core/src/java/org/apache/lucene/store/NIOFSDirectory.java @@ -106,6 +106,9 @@ public class NIOFSDirectory extends FSDirectory { }; } + /** + * Reads bytes with {@link FileChannel#read(ByteBuffer, long)} + */ protected static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput { private ByteBuffer byteBuf; // wraps the buffer for NIO diff --git a/lucene/core/src/java/org/apache/lucene/store/RAMFile.java b/lucene/core/src/java/org/apache/lucene/store/RAMFile.java index e34610779f1..b89d308f41a 100644 --- a/lucene/core/src/java/org/apache/lucene/store/RAMFile.java +++ b/lucene/core/src/java/org/apache/lucene/store/RAMFile.java @@ -19,7 +19,9 @@ package org.apache.lucene.store; import java.util.ArrayList; -/** @lucene.internal */ +/** + * Represents a file in RAM as a list of byte[] buffers. + * @lucene.internal */ public class RAMFile { protected ArrayList buffers = new ArrayList(); long length; diff --git a/lucene/core/src/java/org/apache/lucene/store/SimpleFSDirectory.java b/lucene/core/src/java/org/apache/lucene/store/SimpleFSDirectory.java index e2deb92892a..e74e642ece9 100644 --- a/lucene/core/src/java/org/apache/lucene/store/SimpleFSDirectory.java +++ b/lucene/core/src/java/org/apache/lucene/store/SimpleFSDirectory.java @@ -85,8 +85,16 @@ public class SimpleFSDirectory extends FSDirectory { }; } + /** + * Reads bytes with {@link RandomAccessFile#seek(long)} followed by + * {@link RandomAccessFile#read(byte[], int, int)}. + */ protected static class SimpleFSIndexInput extends BufferedIndexInput { + /** + * Extension of RandomAccessFile that tracks if the file is + * open. + */ protected static class Descriptor extends RandomAccessFile { // remember if the file is open, so that we don't try to close it // more than once diff --git a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java index 3378a3e2141..9a863ffdd5b 100644 --- a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java +++ b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java @@ -117,10 +117,13 @@ public final class ByteBlockPool { public byte[][] buffers = new byte[10][]; int bufferUpto = -1; // Which buffer we are upto - public int byteUpto = BYTE_BLOCK_SIZE; // Where we are in head buffer + /** Where we are in head buffer */ + public int byteUpto = BYTE_BLOCK_SIZE; - public byte[] buffer; // Current head buffer - public int byteOffset = -BYTE_BLOCK_SIZE; // Current head offset + /** Current head buffer */ + public byte[] buffer; + /** Current head offset */ + public int byteOffset = -BYTE_BLOCK_SIZE; private final Allocator allocator; diff --git a/lucene/core/src/java/org/apache/lucene/util/FuzzySet.java b/lucene/core/src/java/org/apache/lucene/util/FuzzySet.java index dec9a6e67b2..a7593767652 100644 --- a/lucene/core/src/java/org/apache/lucene/util/FuzzySet.java +++ b/lucene/core/src/java/org/apache/lucene/util/FuzzySet.java @@ -48,6 +48,11 @@ public class FuzzySet { public static final int FUZZY_SERIALIZATION_VERSION=1; + /** + * Result from {@link FuzzySet#contains(BytesRef)}: + * can never return definitively YES (always MAYBE), + * but can sometimes definitely return NO. + */ public enum ContainsResult { MAYBE, NO }; diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java index dc099696b60..cd97d7086e4 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java @@ -158,7 +158,7 @@ public final class FST { private final boolean packed; private PackedInts.Reader nodeRefToAddress; - // If arc has this label then that arc is final/accepted + /** If arc has this label then that arc is final/accepted */ public static final int END_LABEL = -1; private boolean allowArrayArcs = true; @@ -174,7 +174,7 @@ public final class FST { // building an FST w/ willPackFST=true: int node; - // To node (ord or address): + /** To node (ord or address) */ public int target; byte flags; @@ -542,8 +542,8 @@ public final class FST { return v; } - // returns true if the node at this address has any - // outgoing arcs + /** returns true if the node at this address has any + * outgoing arcs */ public static boolean targetHasArcs(Arc arc) { return arc.target > 0; } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java index 55823a3f829..0dbc7966eb5 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java @@ -767,6 +767,19 @@ public final class Util { } } + /** Just maps each UTF16 unit (char) to the ints in an + * IntsRef. */ + public static IntsRef toUTF16(CharSequence s, IntsRef scratch) { + final int charLimit = s.length(); + scratch.offset = 0; + scratch.length = charLimit; + scratch.grow(charLimit); + for (int idx = 0; idx < charLimit; idx++) { + scratch.ints[idx] = (int) s.charAt(idx); + } + return scratch; + } + /** Decodes the Unicode codepoints from the provided * CharSequence and places them in the provided scratch * IntsRef, which must not be null, returning it. */ diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java b/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java index 229b51f8314..6d92351c8c0 100644 --- a/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java +++ b/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java @@ -734,7 +734,7 @@ public class PackedInts { } return new Packed64(in, valueCount, bitsPerValue); default: - throw new AssertionError("Unknwown Writer format: " + format); + throw new AssertionError("Unknown Writer format: " + format); } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSimilarityProvider.java b/lucene/core/src/test/org/apache/lucene/search/TestSimilarityProvider.java index 9df5afbcdbc..4efee8654ee 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSimilarityProvider.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSimilarityProvider.java @@ -20,12 +20,13 @@ package org.apache.lucene.search; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FieldInvertState; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.Norm; import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.Term; import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; import org.apache.lucene.search.similarities.Similarity; @@ -36,7 +37,7 @@ import org.apache.lucene.util.LuceneTestCase; public class TestSimilarityProvider extends LuceneTestCase { private Directory directory; - private IndexReader reader; + private DirectoryReader reader; private IndexSearcher searcher; @Override @@ -75,8 +76,9 @@ public class TestSimilarityProvider extends LuceneTestCase { public void testBasics() throws Exception { // sanity check of norms writer // TODO: generalize - byte fooNorms[] = (byte[]) MultiDocValues.getNormDocValues(reader, "foo").getSource().getArray(); - byte barNorms[] = (byte[]) MultiDocValues.getNormDocValues(reader, "bar").getSource().getArray(); + AtomicReader slow = new SlowCompositeReaderWrapper(reader); + byte fooNorms[] = (byte[]) slow.normValues("foo").getSource().getArray(); + byte barNorms[] = (byte[]) slow.normValues("bar").getSource().getArray(); for (int i = 0; i < fooNorms.length; i++) { assertFalse(fooNorms[i] == barNorms[i]); } diff --git a/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestLeaveFilesIfTestFails.java b/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestLeaveFilesIfTestFails.java new file mode 100644 index 00000000000..e749b290a02 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestLeaveFilesIfTestFails.java @@ -0,0 +1,49 @@ +package org.apache.lucene.util.junitcompat; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; + +import org.apache.lucene.util._TestUtil; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.JUnitCore; +import org.junit.runner.Result; + +public class TestLeaveFilesIfTestFails extends WithNestedTests { + public TestLeaveFilesIfTestFails() { + super(true); + } + + public static class Nested1 extends WithNestedTests.AbstractNestedTest { + static File file; + public void testDummy() { + file = _TestUtil.getTempDir("leftover"); + file.mkdirs(); + fail(); + } + } + + @Test + public void testLeaveFilesIfTestFails() { + Result r = JUnitCore.runClasses(Nested1.class); + Assert.assertEquals(1, r.getFailureCount()); + Assert.assertTrue(Nested1.file.exists()); + Nested1.file.delete(); + } +} diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractDistinctValuesCollector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractDistinctValuesCollector.java index a3138a22761..419189b1b5f 100644 --- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractDistinctValuesCollector.java +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractDistinctValuesCollector.java @@ -44,6 +44,10 @@ public abstract class AbstractDistinctValuesCollector { public final GROUP_VALUE_TYPE groupValue; diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/CollectedSearchGroup.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/CollectedSearchGroup.java index c793f27224d..63c8871ed30 100644 --- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/CollectedSearchGroup.java +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/CollectedSearchGroup.java @@ -17,7 +17,12 @@ package org.apache.lucene.search.grouping; -/** @lucene.internal */ +import org.apache.lucene.search.FieldComparator; // javadocs + +/** + * Expert: representation of a group in {@link AbstractFirstPassGroupingCollector}, + * tracking the top doc and {@link FieldComparator} slot. + * @lucene.internal */ public class CollectedSearchGroup extends SearchGroup { int topDoc; int comparatorSlot; diff --git a/lucene/module-build.xml b/lucene/module-build.xml index 87baa3d3180..233a0ef53a0 100644 --- a/lucene/module-build.xml +++ b/lucene/module-build.xml @@ -90,6 +90,28 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DoubleConstValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DoubleConstValueSource.java index 5fad61e0bb4..a3cea886254 100755 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DoubleConstValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DoubleConstValueSource.java @@ -24,6 +24,9 @@ import org.apache.lucene.queries.function.docvalues.DoubleDocValues; import java.io.IOException; import java.util.Map; +/** + * Function that returns a constant double value for every document. + */ public class DoubleConstValueSource extends ConstNumberSource { final double constant; private final float fv; diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IDFValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IDFValueSource.java index 319d755eaa4..0c620203462 100755 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IDFValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IDFValueSource.java @@ -28,7 +28,13 @@ import org.apache.lucene.util.BytesRef; import java.io.IOException; import java.util.Map; -/** @lucene.internal */ +/** + * Function that returns {@link TFIDFSimilarity #idf(long, long)} + * for every document. + *

+ * Note that the configured Similarity for the field must be + * a subclass of {@link TFIDFSimilarity} + * @lucene.internal */ public class IDFValueSource extends DocFreqValueSource { public IDFValueSource(String field, String val, String indexedField, BytesRef indexedBytes) { super(field, val, indexedField, indexedBytes); diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IfFunction.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IfFunction.java index ff693b5c9ab..fde5c5c528a 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IfFunction.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IfFunction.java @@ -30,6 +30,10 @@ import java.util.List; import java.util.Map; +/** + * Depending on the boolean value of the ifSource function, + * returns the value of the trueSource or falseSource function. + */ public class IfFunction extends BoolFunction { private final ValueSource ifSource; private final ValueSource trueSource; diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MaxDocValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MaxDocValueSource.java index 789f047253f..66c58e578a0 100755 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MaxDocValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MaxDocValueSource.java @@ -17,6 +17,7 @@ package org.apache.lucene.queries.function.valuesource; import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; // javadocs import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.IndexSearcher; @@ -24,6 +25,11 @@ import org.apache.lucene.search.IndexSearcher; import java.io.IOException; import java.util.Map; +/** + * Returns the value of {@link IndexReader#maxDoc()} + * for every document. This is the number of documents + * including deletions. + */ public class MaxDocValueSource extends ValueSource { public String name() { return "maxdoc"; diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java index 88b357c5b4d..acf454d1e5b 100755 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java @@ -28,6 +28,13 @@ import org.apache.lucene.search.similarities.TFIDFSimilarity; import java.io.IOException; import java.util.Map; +/** + * Function that returns {@link TFIDFSimilarity#decodeNormValue(byte)} + * for every document. + *

+ * Note that the configured Similarity for the field must be + * a subclass of {@link TFIDFSimilarity} + * @lucene.internal */ public class NormValueSource extends ValueSource { protected final String field; public NormValueSource(String field) { diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SumTotalTermFreqValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SumTotalTermFreqValueSource.java index 54e9dac08fb..e9ab075e3b9 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SumTotalTermFreqValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SumTotalTermFreqValueSource.java @@ -30,7 +30,10 @@ import java.io.IOException; import java.util.Map; /** - * TotalTermFreqValueSource returns the total term freq (sum of term freqs across all docuyments). + * SumTotalTermFreqValueSource returns the number of tokens. + * (sum of term freqs across all documents, across all terms). + * Returns -1 if frequencies were omitted for the field, or if + * the codec doesn't support this statistic. * @lucene.internal */ public class SumTotalTermFreqValueSource extends ValueSource { diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java index d8803ace2a7..f0e4a9c8fbb 100755 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java @@ -28,6 +28,13 @@ import org.apache.lucene.util.BytesRef; import java.io.IOException; import java.util.Map; +/** + * Function that returns {@link TFIDFSimilarity#tf(int)} + * for every document. + *

+ * Note that the configured Similarity for the field must be + * a subclass of {@link TFIDFSimilarity} + * @lucene.internal */ public class TFValueSource extends TermFreqValueSource { public TFValueSource(String field, String val, String indexedField, BytesRef indexedBytes) { super(field, val, indexedField, indexedBytes); diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java index eab10bcf835..c2b06542f01 100755 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java @@ -26,6 +26,13 @@ import org.apache.lucene.util.BytesRef; import java.io.IOException; import java.util.Map; +/** + * Function that returns {@link DocsEnum#freq()} for the + * supplied term in every document. + *

+ * If the term does not exist in the document, returns 0. + * If frequencies are omitted, returns 1. + */ public class TermFreqValueSource extends DocFreqValueSource { public TermFreqValueSource(String field, String val, String indexedField, BytesRef indexedBytes) { super(field, val, indexedField, indexedBytes); diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TotalTermFreqValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TotalTermFreqValueSource.java index 52cede17cc0..65b2abd1af5 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TotalTermFreqValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TotalTermFreqValueSource.java @@ -28,7 +28,10 @@ import java.io.IOException; import java.util.Map; /** - * TotalTermFreqValueSource returns the total term freq (sum of term freqs across all docuyments). + * TotalTermFreqValueSource returns the total term freq + * (sum of term freqs across all documents). + * Returns -1 if frequencies were omitted for the field, or if + * the codec doesn't support this statistic. * @lucene.internal */ public class TotalTermFreqValueSource extends ValueSource { diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockHoleInjectingTokenFilter.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockHoleInjectingTokenFilter.java index 7685da23405..1718c1ce213 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockHoleInjectingTokenFilter.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockHoleInjectingTokenFilter.java @@ -29,7 +29,9 @@ import org.apache.lucene.util._TestUtil; // a MockRemovesTokensTF, ideally subclassing FilteringTF // (in modules/analysis) -// Randomly injects holes: +/** + * Randomly injects holes (similar to what a stopfilter would do) + */ public final class MockHoleInjectingTokenFilter extends TokenFilter { private final long randomSeed; diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockPayloadAnalyzer.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockPayloadAnalyzer.java index 2c17c78b3cf..ebc32408ec6 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockPayloadAnalyzer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockPayloadAnalyzer.java @@ -27,8 +27,9 @@ import java.io.Reader; /** - * - * + * Wraps a whitespace tokenizer with a filter that sets + * the first token, and odd tokens to posinc=1, and all others + * to 0, encoding the position as pos: XXX in the payload. **/ public final class MockPayloadAnalyzer extends Analyzer { diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java index 5f34483d32b..38c0dcc3246 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java @@ -30,6 +30,7 @@ import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.codecs.PostingsWriterBase; import org.apache.lucene.codecs.TermsIndexReaderBase; import org.apache.lucene.codecs.TermsIndexWriterBase; +import org.apache.lucene.codecs.lucene40.Lucene40Codec; // javadocs import org.apache.lucene.codecs.lucene40.Lucene40PostingsReader; import org.apache.lucene.codecs.lucene40.Lucene40PostingsWriter; import org.apache.lucene.index.SegmentReadState; @@ -39,6 +40,10 @@ import org.apache.lucene.util.BytesRef; // TODO: we could make separate base class that can wrapp // any PostingsBaseFormat and make it ord-able... +/** + * Customized version of {@link Lucene40Codec} that uses + * {@link FixedGapTermsIndexWriter}. + */ public class Lucene40WithOrds extends PostingsFormat { public Lucene40WithOrds() { diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java index 8a0e9e157d8..468e1b51b73 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java @@ -72,6 +72,9 @@ public class MockFixedIntBlockPostingsFormat extends PostingsFormat { return new MockIntFactory(blockSize); } + /** + * Encodes blocks as vInts of a fixed block size. + */ public static class MockIntFactory extends IntStreamFactory { private final int blockSize; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java index 8f569561d0e..1a38cb34e0d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java @@ -70,6 +70,10 @@ public class MockVariableIntBlockPostingsFormat extends PostingsFormat { return getName() + "(baseBlockSize="+ baseBlockSize + ")"; } + /** + * If the first value is <= 3, writes baseBlockSize vInts at once, + * otherwise writes 2*baseBlockSize vInts. + */ public static class MockIntFactory extends IntStreamFactory { private final int baseBlockSize; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntFactory.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntFactory.java index 8c48f1f1541..ca42debdca2 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntFactory.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntFactory.java @@ -25,7 +25,10 @@ import org.apache.lucene.codecs.sep.IntStreamFactory; import java.io.IOException; -/** @lucene.experimental */ +/** + * Encodes ints directly as vInts with {@link MockSingleIntIndexOutput} + * @lucene.experimental + */ public class MockSingleIntFactory extends IntStreamFactory { @Override public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException { diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexInput.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexInput.java index e49fd3dbb7e..18248fad5aa 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexInput.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexInput.java @@ -28,7 +28,7 @@ import org.apache.lucene.store.IndexInput; /** Reads IndexInputs written with {@link * MockSingleIntIndexOutput}. NOTE: this class is just for - * demonstration puprposes (it is a very slow way to read a + * demonstration purposes (it is a very slow way to read a * block of ints). * * @lucene.experimental @@ -54,6 +54,9 @@ public class MockSingleIntIndexInput extends IntIndexInput { in.close(); } + /** + * Just reads a vInt directly from the file. + */ public static class Reader extends IntIndexInput.Reader { // clone: private final IndexInput in; diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/AlcoholicMergePolicy.java b/lucene/test-framework/src/java/org/apache/lucene/index/AlcoholicMergePolicy.java index 64d54ecb80a..1ea95e12cca 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/AlcoholicMergePolicy.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/AlcoholicMergePolicy.java @@ -68,7 +68,7 @@ public class AlcoholicMergePolicy extends LogMergePolicy { return info.sizeInBytes(); } - public static enum Drink { + private static enum Drink { Beer(15), Wine(17), Champagne(21), WhiteRussian(22), SingleMalt(30); @@ -77,11 +77,6 @@ public class AlcoholicMergePolicy extends LogMergePolicy { Drink(long drunkFactor) { this.drunkFactor = drunkFactor; } - - public long drunk() { - return drunkFactor; - } - } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java b/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java index 8e364b338d4..e2fcc2b725f 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java @@ -23,6 +23,10 @@ import java.util.Iterator; import java.util.NoSuchElementException; import java.util.Set; +/** + * A {@link FilterAtomicReader} that exposes only a subset + * of fields from the underlying wrapped reader. + */ public final class FieldFilterAtomicReader extends FilterAtomicReader { private final Set fields; diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java b/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java index fc98095728f..e99f02d9cca 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java @@ -42,9 +42,15 @@ import org.apache.lucene.util._TestUtil; // - doc blocks? so we can test joins/grouping... // - controlled consistency (NRTMgr) +/** + * Base test class for simulating distributed search across multiple shards. + */ public abstract class ShardSearchingTestBase extends LuceneTestCase { // TODO: maybe SLM should throw this instead of returning null... + /** + * Thrown when the lease for a searcher has expired. + */ public static class SearcherExpiredException extends RuntimeException { public SearcherExpiredException(String message) { super(message); @@ -604,6 +610,9 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase { } } + /** + * An IndexSearcher and associated version (lease) + */ protected static class SearcherAndVersion { public final IndexSearcher searcher; public final long version; diff --git a/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java b/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java index 5aff1dd43ce..c956f1f6e75 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java +++ b/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java @@ -146,6 +146,12 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper { preventDoubleWrite = value; } + /** + * Enum for controlling hard disk throttling. + * Set via {@link MockDirectoryWrapper #setThrottling(Throttling)} + *

+ * WARNING: can make tests very slow. + */ public static enum Throttling { /** always emulate a slow hard disk. could be very slow! */ ALWAYS, diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/CloseableFile.java b/lucene/test-framework/src/java/org/apache/lucene/util/CloseableFile.java index aa44cfbdf41..7aedb3fec35 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/CloseableFile.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/CloseableFile.java @@ -24,25 +24,30 @@ import java.io.*; */ final class CloseableFile implements Closeable { private final File file; + private final TestRuleMarkFailure failureMarker; - public CloseableFile(File file) { + public CloseableFile(File file, TestRuleMarkFailure failureMarker) { this.file = file; + this.failureMarker = failureMarker; } @Override public void close() throws IOException { - if (file.exists()) { - try { - _TestUtil.rmDir(file); - } catch (IOException e) { - // Ignore the exception from rmDir. - } - - // Re-check. + // only if there were no other test failures. + if (failureMarker.wasSuccessful()) { if (file.exists()) { - throw new IOException( + try { + _TestUtil.rmDir(file); + } catch (IOException e) { + // Ignore the exception from rmDir. + } + + // Re-check. + if (file.exists()) { + throw new IOException( "Could not remove: " + file.getAbsolutePath()); - } + } + } } } } \ No newline at end of file diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/English.java b/lucene/test-framework/src/java/org/apache/lucene/util/English.java index 1f1766f738c..5ff95a4ae2f 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/English.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/English.java @@ -18,6 +18,7 @@ package org.apache.lucene.util; */ /** + * Converts numbers to english strings for testing. * @lucene.internal */ public final class English { diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/RollingBuffer.java b/lucene/test-framework/src/java/org/apache/lucene/util/RollingBuffer.java index c1ded254eb1..4450d3673a8 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/RollingBuffer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/RollingBuffer.java @@ -26,6 +26,9 @@ package org.apache.lucene.util; * @lucene.internal */ public abstract class RollingBuffer { + /** + * Implement to reset an instance + */ public static interface Resettable { public void reset(); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleFieldCacheSanity.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleFieldCacheSanity.java index ea5d632c690..7ad81a581aa 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleFieldCacheSanity.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleFieldCacheSanity.java @@ -1,10 +1,5 @@ package org.apache.lucene.util; -import org.apache.lucene.search.FieldCache; -import org.junit.rules.TestRule; -import org.junit.runner.Description; -import org.junit.runners.model.Statement; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -22,6 +17,30 @@ import org.junit.runners.model.Statement; * limitations under the License. */ +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.util.FieldCacheSanityChecker; // javadocs +import org.junit.rules.TestRule; +import org.junit.runner.Description; +import org.junit.runners.model.Statement; + +/** + * This rule will fail the test if it has insane field caches. + *

+ * calling assertSaneFieldCaches here isn't as useful as having test + * classes call it directly from the scope where the index readers + * are used, because they could be gc'ed just before this tearDown + * method is called. + *

+ * But it's better then nothing. + *

+ * If you are testing functionality that you know for a fact + * "violates" FieldCache sanity, then you should either explicitly + * call purgeFieldCache at the end of your test method, or refactor + * your Test class so that the inconsistent FieldCache usages are + * isolated in distinct test methods + * + * @see FieldCacheSanityChecker + */ public class TestRuleFieldCacheSanity implements TestRule { @Override @@ -33,18 +52,6 @@ public class TestRuleFieldCacheSanity implements TestRule { Throwable problem = null; try { - // calling assertSaneFieldCaches here isn't as useful as having test - // classes call it directly from the scope where the index readers - // are used, because they could be gc'ed just before this tearDown - // method is called. - // - // But it's better then nothing. - // - // If you are testing functionality that you know for a fact - // "violates" FieldCache sanity, then you should either explicitly - // call purgeFieldCache at the end of your test method, or refactor - // your Test class so that the inconsistent FieldCache usages are - // isolated in distinct test methods LuceneTestCase.assertSaneFieldCaches(d.getDisplayName()); } catch (Throwable t) { problem = t; diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleStoreClassName.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleStoreClassName.java index 093ed1dec88..1f17fbb5fff 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleStoreClassName.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleStoreClassName.java @@ -21,6 +21,10 @@ import org.junit.rules.TestRule; import org.junit.runner.Description; import org.junit.runners.model.Statement; +/** + * Stores the suite name so you can retrieve it + * from {@link #getTestClass()} + */ public class TestRuleStoreClassName implements TestRule { private volatile Description description; diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java index 0a0fd27f3f6..22837ba6cc8 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java @@ -94,7 +94,7 @@ public class _TestUtil { try { File f = createTempFile(desc, "tmp", LuceneTestCase.TEMP_DIR); f.delete(); - LuceneTestCase.closeAfterSuite(new CloseableFile(f)); + LuceneTestCase.closeAfterSuite(new CloseableFile(f, LuceneTestCase.suiteFailureMarker)); return f; } catch (IOException e) { throw new RuntimeException(e); @@ -136,7 +136,7 @@ public class _TestUtil { rmDir(destDir); destDir.mkdir(); - LuceneTestCase.closeAfterSuite(new CloseableFile(destDir)); + LuceneTestCase.closeAfterSuite(new CloseableFile(destDir, LuceneTestCase.suiteFailureMarker)); while (entries.hasMoreElements()) { ZipEntry entry = entries.nextElement(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java index c0cf4ca963a..e770a69ee80 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java @@ -127,6 +127,13 @@ public class AutomatonTestUtil { return code; } + /** + * Lets you retrieve random strings accepted + * by an Automaton. + *

+ * Once created, call {@link #getRandomAcceptedString(Random)} + * to get a new string (in UTF-32 codepoints). + */ public static class RandomAcceptedStrings { private final Map leadsToAccept; diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 71a59093e23..3046fc4a642 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -26,6 +26,14 @@ $Id$ ================== 4.0.0 ================== +Versions of Major Components +--------------------- +Apache Tika 1.2 +Carrot2 3.5.0 +Velocity 1.6.4 and Velocity Tools 2.0 +Apache UIMA 2.3.1 +Apache ZooKeeper 3.3.6 + Upgrading from Solr 4.0.0-BETA ---------------------- @@ -63,6 +71,27 @@ Bug Fixes * SOLR-3649: Fixed bug in JavabinLoader that caused deleteById(List ids) to not work in SolrJ (siren) +* SOLR-3730: Rollback is not implemented quite right and can cause corner case fails in + SolrCloud tests. (rmuir, Mark Miller) + +* SOLR-2981: Fixed StatsComponent to no longer return duplicated information + when requesting multiple stats.facet fields. + (Roman Kliewer via hossman) + +Other Changes +---------------------- + +* SOLR-3690: Fixed binary release packages to include dependencie needed for + the solr-test-framework (hossman) + +* SOLR-2857: The /update/json and /update/csv URLs were restored to aid + in the migration of existing clients. (yonik) + +* SOLR-3691: SimplePostTool: Mode for crawling/posting web pages + See http://wiki.apache.org/solr/ExtractingRequestHandler for examples (janhoy) + +* SOLR-3707: Upgrade Solr to Tika 1.2 (janhoy) + ================== 4.0.0-BETA =================== @@ -271,7 +300,6 @@ Other Changes Also, the configuration itself can be passed using the "dataConfig" parameter rather than using a file (this previously worked in debug mode only). When configuration errors are encountered, the error message is returned in XML format. (James Dyer) - * SOLR-3439: Make SolrCell easier to use out of the box. Also improves "/browse" to display rich-text documents correctly, along with facets for author and content_type. With the new "content" field, highlighting of body is supported. See also SOLR-3672 for diff --git a/solr/NOTICE.txt b/solr/NOTICE.txt index 4b237d98ebe..b22247b12b0 100644 --- a/solr/NOTICE.txt +++ b/solr/NOTICE.txt @@ -310,12 +310,11 @@ Copyright 2004 Sun Microsystems, Inc. (Rome JAR) Copyright 2002-2008 by John Cowan (TagSoup -- http://ccil.org/~cowan/XML/tagsoup/) -Copyright (C) 1999-2007 Shigeru Chiba. All Rights Reserved. -(Javassist, MPL licensed: http://www.csg.ci.i.u-tokyo.ac.jp/~chiba/javassist/) - Copyright (C) 1994-2007 by the Xiph.org Foundation, http://www.xiph.org/ (OggVorbis) -Scannotation (C) Bill Burke +Copyright 2012 Kohei Taketa juniversalchardet (http://code.google.com/p/juniversalchardet/) + +Lasse Collin and others, XZ for Java (http://tukaani.org/xz/java.html) ========================================================================= == Language Detection Notices == diff --git a/solr/build.xml b/solr/build.xml index 1bef7607cbf..5764f7b90eb 100644 --- a/solr/build.xml +++ b/solr/build.xml @@ -386,8 +386,9 @@ diff --git a/solr/common-build.xml b/solr/common-build.xml index 4bda26913f5..3493cdd5626 100644 --- a/solr/common-build.xml +++ b/solr/common-build.xml @@ -193,7 +193,7 @@ - + diff --git a/solr/contrib/extraction/ivy.xml b/solr/contrib/extraction/ivy.xml index 71d0d860f4f..335f7f7be34 100644 --- a/solr/contrib/extraction/ivy.xml +++ b/solr/contrib/extraction/ivy.xml @@ -20,36 +20,36 @@ - - - + + + - - - - - - + + + + + + - - - - + + + + - - - + + + diff --git a/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java b/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java index b1995a718fc..cde7793315c 100644 --- a/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java +++ b/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java @@ -64,8 +64,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 { "fmap.producer", "extractedProducer", "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords", "fmap.Creation-Date", "extractedDate", - "fmap.AAPL:Keywords", "ignored_a", - "fmap.xmpTPg:NPages", "ignored_a", + "uprefix", "ignored_", "fmap.Author", "extractedAuthor", "fmap.content", "extractedContent", "literal.id", "one", @@ -81,6 +80,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 { "fmap.Author", "extractedAuthor", "fmap.language", "extractedLanguage", "literal.id", "two", + "uprefix", "ignored_", "fmap.content", "extractedContent", "fmap.Last-Modified", "extractedDate" ); @@ -136,6 +136,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 { "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords", "fmap.Author", "extractedAuthor", "literal.id", "three", + "uprefix", "ignored_", "fmap.content", "extractedContent", "fmap.language", "extractedLanguage", "fmap.Last-Modified", "extractedDate" @@ -206,6 +207,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 { "fmap.Author", "extractedAuthor", "fmap.content", "extractedContent", "literal.id", "one", + "uprefix", "ignored_", "fmap.language", "extractedLanguage", "literal.extractionLiteralMV", "one", "literal.extractionLiteralMV", "two", @@ -374,9 +376,8 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 { loadLocal("extraction/arabic.pdf", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer", "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords", "fmap.Creation-Date", "extractedDate", - "fmap.AAPL:Keywords", "ignored_a", - "fmap.xmpTPg:NPages", "ignored_a", "fmap.Author", "extractedAuthor", + "uprefix", "ignored_", "fmap.content", "wdf_nocase", "literal.id", "one", "fmap.Last-Modified", "extractedDate"); @@ -404,8 +405,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 { loadLocal("extraction/password-is-solrcell.docx", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer", "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords", "fmap.Creation-Date", "extractedDate", - "fmap.AAPL:Keywords", "ignored_a", - "fmap.xmpTPg:NPages", "ignored_a", + "uprefix", "ignored_", "fmap.Author", "extractedAuthor", "fmap.content", "wdf_nocase", "literal.id", "one", @@ -462,8 +462,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 { "fmap.content", "extractedContent", "fmap.language", "extractedLanguage", "fmap.Creation-Date", "extractedDate", - "fmap.AAPL:Keywords", "ignored_a", - "fmap.xmpTPg:NPages", "ignored_a", + "uprefix", "ignored_", "fmap.Last-Modified", "extractedDate"); // Here the literal value should override the Tika-parsed title: @@ -478,8 +477,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 { "fmap.content", "extractedContent", "fmap.language", "extractedLanguage", "fmap.Creation-Date", "extractedDate", - "fmap.AAPL:Keywords", "ignored_a", - "fmap.xmpTPg:NPages", "ignored_a", + "uprefix", "ignored_", "fmap.Last-Modified", "extractedDate"); // Here we mimic the old behaviour where literals are added, not overridden @@ -498,8 +496,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 { "fmap.content", "extractedContent", "fmap.language", "extractedLanguage", "fmap.Creation-Date", "extractedDate", - "fmap.AAPL:Keywords", "ignored_a", - "fmap.xmpTPg:NPages", "ignored_a", + "uprefix", "ignored_", "fmap.Last-Modified", "extractedDate"); assertU(commit()); diff --git a/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java index e98fd5fcfb2..c5c7c4e1b36 100644 --- a/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java +++ b/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java @@ -125,6 +125,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory { } cacheValue.refCnt--; if (cacheValue.refCnt == 0 && cacheValue.doneWithDir) { + log.info("Closing directory:" + cacheValue.path); directory.close(); byDirectoryCache.remove(directory); byPathCache.remove(cacheValue.path); @@ -194,6 +195,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory { byDirectoryCache.put(directory, newCacheValue); byPathCache.put(fullPath, newCacheValue); + log.info("return new directory for " + fullPath + " forceNew:" + forceNew); } else { cacheValue.refCnt++; } diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java index b58546fd6c3..670972f8879 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java @@ -1554,7 +1554,7 @@ public final class SolrCore implements SolrInfoMBean { } catch (Throwable e) { // do not allow decref() operations to fail since they are typically called in finally blocks // and throwing another exception would be very unexpected. - SolrException.log(log, "Error closing searcher:", e); + SolrException.log(log, "Error closing searcher:" + this, e); } } }; diff --git a/solr/core/src/java/org/apache/solr/handler/CSVRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/CSVRequestHandler.java index 5adce486c30..335edf9781d 100755 --- a/solr/core/src/java/org/apache/solr/handler/CSVRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/CSVRequestHandler.java @@ -29,7 +29,7 @@ public class CSVRequestHandler extends UpdateRequestHandler { public void init(NamedList args) { super.init(args); setAssumeContentType("application/csv"); - log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler"); + // log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler"); } //////////////////////// SolrInfoMBeans methods ////////////////////// diff --git a/solr/core/src/java/org/apache/solr/handler/JsonUpdateRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/JsonUpdateRequestHandler.java index f4254a20e31..f8869f0276b 100644 --- a/solr/core/src/java/org/apache/solr/handler/JsonUpdateRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/JsonUpdateRequestHandler.java @@ -29,7 +29,7 @@ public class JsonUpdateRequestHandler extends UpdateRequestHandler { public void init(NamedList args) { super.init(args); setAssumeContentType("application/json"); - log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler"); + // log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler"); } //////////////////////// SolrInfoMBeans methods ////////////////////// diff --git a/solr/core/src/java/org/apache/solr/handler/SnapPuller.java b/solr/core/src/java/org/apache/solr/handler/SnapPuller.java index 72d52263f0a..6b9291fcecd 100644 --- a/solr/core/src/java/org/apache/solr/handler/SnapPuller.java +++ b/solr/core/src/java/org/apache/solr/handler/SnapPuller.java @@ -384,7 +384,7 @@ public class SnapPuller { // may be closed core.getDirectoryFactory().doneWithDirectory(oldDirectory); } - doCommit(); + doCommit(isFullCopyNeeded); } replicationStartTime = 0; @@ -533,11 +533,11 @@ public class SnapPuller { return sb; } - private void doCommit() throws IOException { + private void doCommit(boolean isFullCopyNeeded) throws IOException { SolrQueryRequest req = new LocalSolrQueryRequest(solrCore, new ModifiableSolrParams()); // reboot the writer on the new index and get a new searcher - solrCore.getUpdateHandler().newIndexWriter(true); + solrCore.getUpdateHandler().newIndexWriter(isFullCopyNeeded); try { // first try to open an NRT searcher so that the new diff --git a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java index 1796da7cb1a..1505ce4680f 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java +++ b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java @@ -182,8 +182,8 @@ abstract class AbstractStatsValues implements StatsValues { for (Map.Entry e2 : entry.getValue().entrySet()) { nl2.add(e2.getKey(), e2.getValue().getStatsValues()); } - res.add(FACETS, nl); } + res.add(FACETS, nl); return res; } diff --git a/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java b/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java index 67b43448ed3..6a53ed5e763 100644 --- a/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java +++ b/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java @@ -74,8 +74,7 @@ public final class DefaultSolrCoreState extends SolrCoreState { } if (indexWriter == null) { - indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2", - false, false); + indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2", false); } if (refCntWriter == null) { refCntWriter = new RefCounted(indexWriter) { @@ -110,18 +109,28 @@ public final class DefaultSolrCoreState extends SolrCoreState { writerPauseLock.wait(); } catch (InterruptedException e) {} } - + try { if (indexWriter != null) { - try { - log.info("Closing old IndexWriter... core=" + coreName); - indexWriter.close(); - } catch (Throwable t) { - SolrException.log(log, "Error closing old IndexWriter. core=" + coreName, t); + if (!rollback) { + try { + log.info("Closing old IndexWriter... core=" + coreName); + indexWriter.close(); + } catch (Throwable t) { + SolrException.log(log, "Error closing old IndexWriter. core=" + + coreName, t); + } + } else { + try { + log.info("Rollback old IndexWriter... core=" + coreName); + indexWriter.rollback(); + } catch (Throwable t) { + SolrException.log(log, "Error rolling back old IndexWriter. core=" + + coreName, t); + } } } - indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2", - false, true); + indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2", true); log.info("New IndexWriter is ready to be used."); // we need to null this so it picks up the new writer next get call refCntWriter = null; @@ -174,14 +183,12 @@ public final class DefaultSolrCoreState extends SolrCoreState { @Override public synchronized void rollbackIndexWriter(SolrCore core) throws IOException { - indexWriter.rollback(); newIndexWriter(core, true); } - protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name, - boolean removeAllExisting, boolean forceNewDirectory) throws IOException { + protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name, boolean forceNewDirectory) throws IOException { return new SolrIndexWriter(name, core.getNewIndexDir(), - core.getDirectoryFactory(), removeAllExisting, core.getSchema(), + core.getDirectoryFactory(), false, core.getSchema(), core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec(), forceNewDirectory); } diff --git a/solr/core/src/java/org/apache/solr/update/MemOutputStream.java b/solr/core/src/java/org/apache/solr/update/MemOutputStream.java new file mode 100644 index 00000000000..32b459e3cfe --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/MemOutputStream.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update; + +import org.apache.solr.common.util.FastOutputStream; + +import java.io.IOException; +import java.util.LinkedList; +import java.util.List; + +/** @lucene.internal */ +public class MemOutputStream extends FastOutputStream { + public List buffers = new LinkedList(); + public MemOutputStream(byte[] tempBuffer) { + super(null, tempBuffer, 0); + } + + @Override + public void flush(byte[] arr, int offset, int len) throws IOException { + if (arr == buf && offset==0 && len==buf.length) { + buffers.add(buf); // steal the buffer + buf = new byte[8192]; + } else if (len > 0) { + byte[] newBuf = new byte[len]; + System.arraycopy(arr, offset, newBuf, 0, len); + buffers.add(newBuf); + } + } + + public void writeAll(FastOutputStream fos) throws IOException { + for (byte[] buffer : buffers) { + fos.write(buffer); + } + if (pos > 0) { + fos.write(buf, 0, pos); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java b/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java index d59164a9344..56dbca1992a 100644 --- a/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java +++ b/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java @@ -141,6 +141,8 @@ public class SolrIndexWriter extends IndexWriter { super.rollback(); } finally { isClosed = true; + directoryFactory.release(getDirectory()); + numCloses.incrementAndGet(); } } diff --git a/solr/core/src/java/org/apache/solr/update/TransactionLog.java b/solr/core/src/java/org/apache/solr/update/TransactionLog.java index e2f4a5882f7..c609a59861f 100644 --- a/solr/core/src/java/org/apache/solr/update/TransactionLog.java +++ b/solr/core/src/java/org/apache/solr/update/TransactionLog.java @@ -775,31 +775,3 @@ class ChannelFastInputStream extends FastInputStream { } -class MemOutputStream extends FastOutputStream { - public List buffers = new LinkedList(); - public MemOutputStream(byte[] tempBuffer) { - super(null, tempBuffer, 0); - } - - @Override - public void flush(byte[] arr, int offset, int len) throws IOException { - if (arr == buf && offset==0 && len==buf.length) { - buffers.add(buf); // steal the buffer - buf = new byte[8192]; - } else if (len > 0) { - byte[] newBuf = new byte[len]; - System.arraycopy(arr, offset, newBuf, 0, len); - buffers.add(newBuf); - } - } - - public void writeAll(FastOutputStream fos) throws IOException { - for (byte[] buffer : buffers) { - fos.write(buffer); - } - if (pos > 0) { - fos.write(buf, 0, pos); - } - } -} - diff --git a/solr/core/src/java/org/apache/solr/util/FastWriter.java b/solr/core/src/java/org/apache/solr/util/FastWriter.java index 672eb06a4af..363cf223221 100755 --- a/solr/core/src/java/org/apache/solr/util/FastWriter.java +++ b/solr/core/src/java/org/apache/solr/util/FastWriter.java @@ -28,7 +28,7 @@ public class FastWriter extends Writer { // it won't cause double buffering. private static final int BUFSIZE = 8192; protected final Writer sink; - protected final char[] buf; + protected char[] buf; protected int pos; public FastWriter(Writer w) { @@ -69,42 +69,64 @@ public class FastWriter extends Writer { } @Override - public void write(char cbuf[], int off, int len) throws IOException { - int space = buf.length - pos; - if (len < space) { - System.arraycopy(cbuf, off, buf, pos, len); - pos += len; - } else if (len buf.length) { + if (pos>0) { + flush(buf,0,pos); // flush + pos=0; + } + // don't buffer, just write to sink + flush(arr, off, len); + return; + } + + // buffer is too big to fit in the free space, but + // not big enough to warrant writing on its own. + // write whatever we can fit, then flush and iterate. + + System.arraycopy(arr, off, buf, pos, space); flush(buf, 0, buf.length); - pos = len-space; - System.arraycopy(cbuf, off+space, buf, 0, pos); - } else { - flush(buf,0,pos); // flush - pos=0; - // don't buffer, just write to sink - flush(cbuf, off, len); + pos = 0; + off += space; + len -= space; } } @Override public void write(String str, int off, int len) throws IOException { - int space = buf.length - pos; - if (len < space) { - str.getChars(off, off+len, buf, pos); - pos += len; - } else if (len buf.length) { + if (pos>0) { + flush(buf,0,pos); // flush + pos=0; + } + // don't buffer, just write to sink + flush(str, off, len); + return; + } + + // buffer is too big to fit in the free space, but + // not big enough to warrant writing on its own. + // write whatever we can fit, then flush and iterate. + str.getChars(off, off+space, buf, pos); flush(buf, 0, buf.length); - str.getChars(off+space, off+len, buf, 0); - pos = len-space; - } else { - flush(buf,0,pos); // flush - pos=0; - // don't buffer, just write to sink - flush(str, off, len); + pos = 0; + off += space; + len -= space; } } diff --git a/solr/core/src/java/org/apache/solr/util/SimplePostTool.java b/solr/core/src/java/org/apache/solr/util/SimplePostTool.java index efbd2fb85fa..edf8f67e904 100644 --- a/solr/core/src/java/org/apache/solr/util/SimplePostTool.java +++ b/solr/core/src/java/org/apache/solr/util/SimplePostTool.java @@ -17,65 +17,110 @@ package org.apache.solr.util; * limitations under the License. */ +import java.io.BufferedReader; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileFilter; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.ByteArrayInputStream; +import java.io.InputStreamReader; import java.io.OutputStream; import java.io.UnsupportedEncodingException; -import java.util.Locale; +import java.util.ArrayList; import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; import java.util.Set; import java.util.HashSet; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import java.util.zip.GZIPInputStream; +import java.util.zip.Inflater; +import java.util.zip.InflaterInputStream; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.ProtocolException; import java.net.URL; import java.net.URLEncoder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpression; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; + +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + /** * A simple utility class for posting raw updates to a Solr server, * has a main method so it can be run on the command line. + * View this not as a best-practice code example, but as a standalone + * example built with an explicit purpose of not having external + * jar dependencies. */ public class SimplePostTool { - public static final String DEFAULT_POST_URL = "http://localhost:8983/solr/update"; - public static final String VERSION_OF_THIS_TOOL = "1.5"; + private static final String DEFAULT_POST_URL = "http://localhost:8983/solr/update"; + private static final String VERSION_OF_THIS_TOOL = "1.5"; private static final String DEFAULT_COMMIT = "yes"; private static final String DEFAULT_OPTIMIZE = "no"; private static final String DEFAULT_OUT = "no"; private static final String DEFAULT_AUTO = "no"; - private static final String DEFAULT_RECURSIVE = "no"; - + private static final String DEFAULT_RECURSIVE = "0"; + private static final int DEFAULT_WEB_DELAY = 10; + private static final int MAX_WEB_DEPTH = 10; private static final String DEFAULT_CONTENT_TYPE = "application/xml"; private static final String DEFAULT_FILE_TYPES = "xml,json,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log"; - private static final String DATA_MODE_FILES = "files"; - private static final String DATA_MODE_ARGS = "args"; - private static final String DATA_MODE_STDIN = "stdin"; - private static final String DEFAULT_DATA_MODE = DATA_MODE_FILES; + static final String DATA_MODE_FILES = "files"; + static final String DATA_MODE_ARGS = "args"; + static final String DATA_MODE_STDIN = "stdin"; + static final String DATA_MODE_WEB = "web"; + static final String DEFAULT_DATA_MODE = DATA_MODE_FILES; - private static final String TRUE_STRINGS = "true,on,yes,1"; + // Input args + boolean auto = false; + int recursive = 0; + int delay = 0; + String fileTypes; + URL solrUrl; + OutputStream out = null; + String type; + String mode; + boolean commit; + boolean optimize; + String[] args; - private boolean auto = false; - private boolean recursive = false; - private String fileTypes; + private int currentDepth; + + static HashMap mimeMap; + GlobFileFilter globFileFilter; + // Backlog for crawling + List> backlog = new ArrayList>(); + Set visited = new HashSet(); - private static HashMap mimeMap; - private GlobFileFilter globFileFilter; - - private static final Set DATA_MODES = new HashSet(); - private static final String USAGE_STRING_SHORT = - "Usage: java [SystemProperties] -jar post.jar [-h|-] [ [...]]"; + static final Set DATA_MODES = new HashSet(); + static final String USAGE_STRING_SHORT = + "Usage: java [SystemProperties] -jar post.jar [-h|-] [ [...]]"; + + // Used in tests to avoid doing actual network traffic + static boolean mockMode = false; + static PageFetcher pageFetcher; static { DATA_MODES.add(DATA_MODE_FILES); DATA_MODES.add(DATA_MODE_ARGS); DATA_MODES.add(DATA_MODE_STDIN); + DATA_MODES.add(DATA_MODE_WEB); mimeMap = new HashMap(); mimeMap.put("xml", "text/xml"); @@ -100,97 +145,196 @@ public class SimplePostTool { mimeMap.put("txt", "text/plain"); mimeMap.put("log", "text/plain"); } - - protected URL solrUrl; + /** + * See usage() for valid command line usage + * @param args the params on the command line + */ public static void main(String[] args) { info("SimplePostTool version " + VERSION_OF_THIS_TOOL); - if (0 < args.length && ("-help".equals(args[0]) || "--help".equals(args[0]) || "-h".equals(args[0]))) { usage(); - return; - } - - OutputStream out = null; - final String type = System.getProperty("type"); - - final String params = System.getProperty("params", ""); - - URL u = null; - try { - u = new URL(System.getProperty("url", SimplePostTool.appendParam(DEFAULT_POST_URL, params))); - } catch (MalformedURLException e) { - fatal("System Property 'url' is not a valid URL: " + u); - } - final SimplePostTool t = new SimplePostTool(u); - - if (isOn(System.getProperty("auto", DEFAULT_AUTO))) { - t.setAuto(true); - } - - if (isOn(System.getProperty("recursive", DEFAULT_RECURSIVE))) { - t.setRecursive(true); - } - - final String mode = System.getProperty("data", DEFAULT_DATA_MODE); - if (! DATA_MODES.contains(mode)) { - fatal("System Property 'data' is not valid for this tool: " + mode); - } - - if (isOn(System.getProperty("out", DEFAULT_OUT))) { - out = System.out; - } - - t.setFileTypes(System.getProperty("filetypes", DEFAULT_FILE_TYPES)); - - int numFilesPosted = 0; - - try { - if (DATA_MODE_FILES.equals(mode)) { - if (0 < args.length) { - // Skip posting files if special param "-" given - if (!args[0].equals("-")) { - info("Posting files to base url " + u + (!t.auto?" using content-type "+(type==null?DEFAULT_CONTENT_TYPE:type):"")+".."); - if(t.auto) - info("Entering auto mode. File endings considered are "+t.getFileTypes()); - if(t.recursive) - info("Entering recursive mode"); - numFilesPosted = t.postFiles(args, 0, out, type); - info(numFilesPosted + " files indexed."); - } - } else { - usageShort(); - return; - } - } else if (DATA_MODE_ARGS.equals(mode)) { - if (0 < args.length) { - info("POSTing args to " + u + ".."); - for (String a : args) { - t.postData(SimplePostTool.stringToStream(a), null, out, type); - } - } else { - usageShort(); - return; - } - } else if (DATA_MODE_STDIN.equals(mode)) { - info("POSTing stdin to " + u + ".."); - t.postData(System.in, null, out, type); - } - if (isOn(System.getProperty("commit",DEFAULT_COMMIT))) { - info("COMMITting Solr index changes to " + u + ".."); - t.commit(); - } - if (isOn(System.getProperty("optimize",DEFAULT_OPTIMIZE))) { - info("Performing an OPTIMIZE to " + u + ".."); - t.optimize(); - } - - } catch(RuntimeException e) { - e.printStackTrace(); - fatal("RuntimeException " + e); + } else { + final SimplePostTool t = parseArgsAndInit(args); + t.execute(); } } + /** + * After initialization, call execute to start the post job. + * This method delegates to the correct mode method. + */ + public void execute() { + if (DATA_MODE_FILES.equals(mode) && args.length > 0) { + doFilesMode(); + } else if(DATA_MODE_ARGS.equals(mode) && args.length > 0) { + doArgsMode(); + } else if(DATA_MODE_WEB.equals(mode) && args.length > 0) { + doWebMode(); + } else if(DATA_MODE_STDIN.equals(mode)) { + doStdinMode(); + } else { + usageShort(); + return; + } + + if (commit) commit(); + if (optimize) optimize(); + } + + /** + * Parses incoming arguments and system params and initializes the tool + * @param args the incoming cmd line args + * @return an instance of SimplePostTool + */ + protected static SimplePostTool parseArgsAndInit(String[] args) { + String urlStr = null; + try { + // Parse args + final String mode = System.getProperty("data", DEFAULT_DATA_MODE); + if (! DATA_MODES.contains(mode)) { + fatal("System Property 'data' is not valid for this tool: " + mode); + } + String params = System.getProperty("params", ""); + urlStr = System.getProperty("url", SimplePostTool.appendParam(DEFAULT_POST_URL, params)); + URL url = new URL(urlStr); + boolean auto = isOn(System.getProperty("auto", DEFAULT_AUTO)); + String type = System.getProperty("type"); + // Recursive + int recursive = 0; + String r = System.getProperty("recursive", DEFAULT_RECURSIVE); + try { + recursive = Integer.parseInt(r); + } catch(Exception e) { + if (isOn(r)) + recursive = DATA_MODE_WEB.equals(mode)?1:999; + } + // Delay + int delay = DATA_MODE_WEB.equals(mode) ? DEFAULT_WEB_DELAY : 0; + try { + delay = Integer.parseInt(System.getProperty("delay", ""+delay)); + } catch(Exception e) { } + OutputStream out = isOn(System.getProperty("out", DEFAULT_OUT)) ? System.out : null; + String fileTypes = System.getProperty("filetypes", DEFAULT_FILE_TYPES); + boolean commit = isOn(System.getProperty("commit",DEFAULT_COMMIT)); + boolean optimize = isOn(System.getProperty("optimize",DEFAULT_OPTIMIZE)); + + return new SimplePostTool(mode, url, auto, type, recursive, delay, fileTypes, out, commit, optimize, args); + } catch (MalformedURLException e) { + fatal("System Property 'url' is not a valid URL: " + urlStr); + return null; + } + } + + /** + * Constructor which takes in all mandatory input for the tool to work. + * Also see usage() for further explanation of the params. + * @param mode whether to post files, web pages, params or stdin + * @param url the Solr base Url to post to, should end with /update + * @param auto if true, we'll guess type and add resourcename/url + * @param type content-type of the data you are posting + * @param recursive number of levels for file/web mode, or 0 if one file only + * @param delay if recursive then delay will be the wait time between posts + * @param fileTypes a comma separated list of file-name endings to accept for file/web + * @param out an OutputStream to write output to, e.g. stdout to print to console + * @param commit if true, will commit at end of posting + * @param optimize if true, will optimize at end of posting + * @param args a String[] of arguments, varies between modes + */ + public SimplePostTool(String mode, URL url, boolean auto, String type, + int recursive, int delay, String fileTypes, OutputStream out, + boolean commit, boolean optimize, String[] args) { + this.mode = mode; + this.solrUrl = url; + this.auto = auto; + this.type = type; + this.recursive = recursive; + this.delay = delay; + this.fileTypes = fileTypes; + this.globFileFilter = getFileFilterFromFileTypes(fileTypes); + this.out = out; + this.commit = commit; + this.optimize = optimize; + this.args = args; + pageFetcher = new PageFetcher(); + } + + public SimplePostTool() {} + + // + // Do some action depending on which mode we have + // + private void doFilesMode() { + currentDepth = 0; + // Skip posting files if special param "-" given + if (!args[0].equals("-")) { + info("Posting files to base url " + solrUrl + (!auto?" using content-type "+(type==null?DEFAULT_CONTENT_TYPE:type):"")+".."); + if(auto) + info("Entering auto mode. File endings considered are "+fileTypes); + if(recursive > 0) + info("Entering recursive mode, max depth="+recursive+", delay="+delay+"s"); + int numFilesPosted = postFiles(args, 0, out, type); + info(numFilesPosted + " files indexed."); + } + } + + private void doArgsMode() { + info("POSTing args to " + solrUrl + ".."); + for (String a : args) { + postData(stringToStream(a), null, out, type, solrUrl); + } + } + + private int doWebMode() { + reset(); + int numPagesPosted = 0; + try { + if(type != null) { + fatal("Specifying content-type with \"-Ddata=web\" is not supported"); + } + if (args[0].equals("-")) { + // Skip posting url if special param "-" given + return 0; + } + // Set Extracting handler as default + solrUrl = appendUrlPath(solrUrl, "/extract"); + + info("Posting web pages to Solr url "+solrUrl); + auto=true; + info("Entering auto mode. Indexing pages with content-types corresponding to file endings "+fileTypes); + if(recursive > 0) { + if(recursive > MAX_WEB_DEPTH) { + recursive = MAX_WEB_DEPTH; + warn("Too large recursion depth for web mode, limiting to "+MAX_WEB_DEPTH+"..."); + } + if(delay < DEFAULT_WEB_DELAY) + warn("Never crawl an external web site faster than every 10 seconds, your IP will probably be blocked"); + info("Entering recursive mode, depth="+recursive+", delay="+delay+"s"); + } + numPagesPosted = postWebPages(args, 0, out); + info(numPagesPosted + " web pages indexed."); + } catch(MalformedURLException e) { + fatal("Wrong URL trying to append /extract to "+solrUrl); + } + return numPagesPosted; + } + + private void doStdinMode() { + info("POSTing stdin to " + solrUrl + ".."); + postData(System.in, null, out, type, solrUrl); + } + + private void reset() { + fileTypes = DEFAULT_FILE_TYPES; + globFileFilter = this.getFileFilterFromFileTypes(fileTypes); + backlog = new ArrayList>(); + visited = new HashSet(); + } + + + // + // USAGE + // private static void usageShort() { System.out.println(USAGE_STRING_SHORT+"\n"+ " Please invoke with -h option for extended usage help."); @@ -200,11 +344,12 @@ public class SimplePostTool { System.out.println (USAGE_STRING_SHORT+"\n\n" + "Supported System Properties and their defaults:\n"+ - " -Ddata=files|args|stdin (default=" + DEFAULT_DATA_MODE + ")\n"+ + " -Ddata=files|web|args|stdin (default=" + DEFAULT_DATA_MODE + ")\n"+ " -Dtype= (default=" + DEFAULT_CONTENT_TYPE + ")\n"+ " -Durl= (default=" + DEFAULT_POST_URL + ")\n"+ " -Dauto=yes|no (default=" + DEFAULT_AUTO + ")\n"+ - " -Drecursive=yes|no (default=" + DEFAULT_RECURSIVE + ")\n"+ + " -Drecursive=yes|no| (default=" + DEFAULT_RECURSIVE + ")\n"+ + " -Ddelay= (default=0 for files, 10 for web)\n"+ " -Dfiletypes=[,,...] (default=" + DEFAULT_FILE_TYPES + ")\n"+ " -Dparams=\"=[&=...]\" (values must be URL-encoded)\n"+ " -Dcommit=yes|no (default=" + DEFAULT_COMMIT + ")\n"+ @@ -212,11 +357,12 @@ public class SimplePostTool { " -Dout=yes|no (default=" + DEFAULT_OUT + ")\n\n"+ "This is a simple command line tool for POSTing raw data to a Solr\n"+ "port. Data can be read from files specified as commandline args,\n"+ - "as raw commandline arg strings, or via STDIN.\n"+ + "URLs specified as args, as raw commandline arg strings or via STDIN.\n"+ "Examples:\n"+ " java -jar post.jar *.xml\n"+ " java -Ddata=args -jar post.jar '42'\n"+ " java -Ddata=stdin -jar post.jar < hd.xml\n"+ + " java -Ddata=web -jar post.jar http://example.com/\n"+ " java -Dtype=text/csv -jar post.jar *.csv\n"+ " java -Dtype=application/json -jar post.jar *.json\n"+ " java -Durl=http://localhost:8983/solr/update/extract -Dparams=literal.id=a -Dtype=application/pdf -jar post.jar a.pdf\n"+ @@ -228,13 +374,10 @@ public class SimplePostTool { "or optimize should be executed, and whether the response should\n"+ "be written to STDOUT. If auto=yes the tool will try to set type\n"+ "and url automatically from file name. When posting rich documents\n"+ - "the file name will be propagated as \"resource.name\" and also used as \"literal.id\".\n" + - "You may override these or any other request parameter through the -Dparams property.\n"+ - "If you want to do a commit only, use \"-\" as argument."); - } - - private static boolean isOn(String property) { - return(TRUE_STRINGS.indexOf(property) >= 0); + "the file name will be propagated as \"resource.name\" and also used\n"+ + "as \"literal.id\". You may override these or any other request parameter\n"+ + "through the -Dparams property. To do a commit only, use \"-\" as argument.\n"+ + "The web mode is a simple crawler following links within domain, default delay=10s."); } /** Post all filenames provided in args @@ -244,7 +387,8 @@ public class SimplePostTool { * @param type default content-type to use when posting (may be overridden in auto mode) * @return number of files posted * */ - int postFiles(String [] args,int startIndexInArgs, OutputStream out, String type) { + public int postFiles(String [] args,int startIndexInArgs, OutputStream out, String type) { + reset(); int filesPosted = 0; for (int j = startIndexInArgs; j < args.length; j++) { File srcFile = new File(args[j]); @@ -258,7 +402,7 @@ public class SimplePostTool { String fileGlob = srcFile.getName(); GlobFileFilter ff = new GlobFileFilter(fileGlob, false); File[] files = parent.listFiles(ff); - if(files.length == 0) { + if(files == null || files.length == 0) { warn("No files or directories matching "+srcFile); continue; } @@ -268,32 +412,255 @@ public class SimplePostTool { return filesPosted; } + /** Post all filenames provided in args + * @param files array of Files + * @param startIndexInArgs offset to start + * @param out output stream to post data to + * @param type default content-type to use when posting (may be overridden in auto mode) + * @return number of files posted + * */ + public int postFiles(File[] files, int startIndexInArgs, OutputStream out, String type) { + reset(); + int filesPosted = 0; + for (File srcFile : files) { + if(srcFile.isDirectory() && srcFile.canRead()) { + filesPosted += postDirectory(srcFile, out, type); + } else if (srcFile.isFile() && srcFile.canRead()) { + filesPosted += postFiles(new File[] {srcFile}, out, type); + } else { + File parent = srcFile.getParentFile(); + if(parent == null) parent = new File("."); + String fileGlob = srcFile.getName(); + GlobFileFilter ff = new GlobFileFilter(fileGlob, false); + File[] fileList = parent.listFiles(ff); + if(fileList == null || fileList.length == 0) { + warn("No files or directories matching "+srcFile); + continue; + } + filesPosted += postFiles(fileList, out, type); + } + } + return filesPosted; + } + + /** + * Posts a whole directory + * @return number of files posted total + */ private int postDirectory(File dir, OutputStream out, String type) { if(dir.isHidden() && !dir.getName().equals(".")) return(0); - info("Indexing directory "+dir.getPath()); + info("Indexing directory "+dir.getPath()+" ("+dir.listFiles(globFileFilter).length+" files, depth="+currentDepth+")"); int posted = 0; posted += postFiles(dir.listFiles(globFileFilter), out, type); - if(recursive) { + if(recursive > currentDepth) { for(File d : dir.listFiles()) { - if(d.isDirectory()) + if(d.isDirectory()) { + currentDepth++; posted += postDirectory(d, out, type); + currentDepth--; + } } } return posted; } + /** + * Posts a list of file names + * @return number of files posted + */ int postFiles(File[] files, OutputStream out, String type) { int filesPosted = 0; for(File srcFile : files) { - if(!srcFile.isFile() || srcFile.isHidden()) - continue; - postFile(srcFile, out, type); - filesPosted++; + try { + if(!srcFile.isFile() || srcFile.isHidden()) + continue; + postFile(srcFile, out, type); + Thread.sleep(delay * 1000); + filesPosted++; + } catch (InterruptedException e) { + throw new RuntimeException(); + } } return filesPosted; } + /** + * This method takes as input a list of start URL strings for crawling, + * adds each one to a backlog and then starts crawling + * @param args the raw input args from main() + * @param startIndexInArgs offset for where to start + * @param out outputStream to write results to + * @return the number of web pages posted + */ + public int postWebPages(String[] args, int startIndexInArgs, OutputStream out) { + reset(); + LinkedHashSet s = new LinkedHashSet(); + for (int j = startIndexInArgs; j < args.length; j++) { + try { + URL u = new URL(normalizeUrlEnding(args[j])); + s.add(u); + } catch(MalformedURLException e) { + warn("Skipping malformed input URL: "+args[j]); + } + } + // Add URLs to level 0 of the backlog and start recursive crawling + backlog.add(s); + return webCrawl(0, out); + } + + /** + * Normalizes a URL string by removing anchor part and trailing slash + * @return the normalized URL string + */ + protected static String normalizeUrlEnding(String link) { + if(link.indexOf("#") > -1) + link = link.substring(0,link.indexOf("#")); + if(link.endsWith("?")) + link = link.substring(0,link.length()-1); + if(link.endsWith("/")) + link = link.substring(0,link.length()-1); + return link; + } + + /** + * A very simple crawler, pulling URLs to fetch from a backlog and then + * recurses N levels deep if recursive>0. Links are parsed from HTML + * through first getting an XHTML version using SolrCell with extractOnly, + * and followed if they are local. The crawler pauses for a default delay + * of 10 seconds between each fetch, this can be configured in the delay + * variable. This is only meant for test purposes, as it does not respect + * robots or anything else fancy :) + * @param level which level to crawl + * @param out output stream to write to + * @return number of pages crawled on this level and below + */ + protected int webCrawl(int level, OutputStream out) { + int numPages = 0; + LinkedHashSet stack = backlog.get(level); + int rawStackSize = stack.size(); + stack.removeAll(visited); + int stackSize = stack.size(); + LinkedHashSet subStack = new LinkedHashSet(); + info("Entering crawl at level "+level+" ("+rawStackSize+" links total, "+stackSize+" new)"); + for(URL u : stack) { + try { + visited.add(u); + PageFetcherResult result = pageFetcher.readPageFromUrl(u); + if(result.httpStatus == 200) { + u = (result.redirectUrl != null) ? result.redirectUrl : u; + URL postUrl = new URL(appendParam(solrUrl.toString(), + "literal.id="+URLEncoder.encode(u.toString(),"UTF-8") + + "&literal.url="+URLEncoder.encode(u.toString(),"UTF-8"))); + boolean success = postData(new ByteArrayInputStream(result.content), null, out, result.contentType, postUrl); + if (success) { + info("POSTed web resource "+u+" (depth: "+level+")"); + Thread.sleep(delay * 1000); + numPages++; + // Pull links from HTML pages only + if(recursive > level && result.contentType.equals("text/html")) { + Set children = pageFetcher.getLinksFromWebPage(u, new ByteArrayInputStream(result.content), result.contentType, postUrl); + subStack.addAll(children); + } + } else { + warn("An error occurred while posting "+u); + } + } else { + warn("The URL "+u+" returned a HTTP result status of "+result.httpStatus); + } + } catch (IOException e) { + warn("Caught exception when trying to open connection to "+u+": "+e.getMessage()); + } catch (InterruptedException e) { + throw new RuntimeException(); + } + } + if(!subStack.isEmpty()) { + backlog.add(subStack); + numPages += webCrawl(level+1, out); + } + return numPages; + } + + /** + * Reads an input stream into a byte array + * @param is the input stream + * @return the byte array + * @throws IOException + */ + protected byte[] inputStreamToByteArray(InputStream is) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + int next = is.read(); + while (next > -1) { + bos.write(next); + next = is.read(); + } + bos.flush(); + is.close(); + return bos.toByteArray(); + } + + /** + * Computes the full URL based on a base url and a possibly relative link found + * in the href param of an HTML anchor. + * @param baseUrl the base url from where the link was found + * @param link the absolute or relative link + * @return the string version of the full URL + */ + protected String computeFullUrl(URL baseUrl, String link) { + if(link == null || link.length() == 0) { + return null; + } + if(!link.startsWith("http")) { + if(link.startsWith("/")) { + link = baseUrl.getProtocol() + "://" + baseUrl.getAuthority() + link; + } else { + if(link.contains(":")) { + return null; // Skip non-relative URLs + } + String path = baseUrl.getPath(); + if(!path.endsWith("/")) { + int sep = path.lastIndexOf("/"); + String file = path.substring(sep+1); + if(file.contains(".") || file.contains("?")) + path = path.substring(0,sep); + } + link = baseUrl.getProtocol() + "://" + baseUrl.getAuthority() + path + "/" + link; + } + } + link = normalizeUrlEnding(link); + String l = link.toLowerCase(Locale.ROOT); + // Simple brute force skip images + if(l.endsWith(".jpg") || l.endsWith(".jpeg") || l.endsWith(".png") || l.endsWith(".gif")) { + return null; // Skip images + } + return link; + } + + /** + * Uses the mime-type map to reverse lookup whether the file ending for our type + * is supported by the fileTypes option + * @param type what content-type to lookup + * @return true if this is a supported content type + */ + protected boolean typeSupported(String type) { + for(String key : mimeMap.keySet()) { + if(mimeMap.get(key).equals(type)) { + if(fileTypes.contains(key)) + return true; + } + } + return false; + } + + /** + * Tests if a string is either "true", "on", "yes" or "1" + * @param property the string to test + * @return true if "on" + */ + protected static boolean isOn(String property) { + return("true,on,yes,1".indexOf(property) > -1); + } + static void warn(String msg) { System.err.println("SimplePostTool: WARNING: " + msg); } @@ -304,21 +671,14 @@ public class SimplePostTool { static void fatal(String msg) { System.err.println("SimplePostTool: FATAL: " + msg); - System.exit(1); - } - - /** - * Constructs an instance for posting data to the specified Solr URL - * (ie: "http://localhost:8983/solr/update") - */ - public SimplePostTool(URL solrUrl) { - this.solrUrl = solrUrl; + System.exit(2); } /** * Does a simple commit operation */ public void commit() { + info("COMMITting Solr index changes to " + solrUrl + ".."); doGet(appendParam(solrUrl.toString(), "commit=true")); } @@ -326,9 +686,16 @@ public class SimplePostTool { * Does a simple optimize operation */ public void optimize() { + info("Performing an OPTIMIZE to " + solrUrl + ".."); doGet(appendParam(solrUrl.toString(), "optimize=true")); } + /** + * Appends a URL query parameter to a URL + * @param url the original URL + * @param param the parameter(s) to append, separated by "&" + * @return the string version of the resulting URL + */ public static String appendParam(String url, String param) { String[] pa = param.split("&"); for(String p : pa) { @@ -360,13 +727,12 @@ public class SimplePostTool { // Default handler } else { // SolrCell - String urlStr = url.getProtocol() + "://" + url.getAuthority() + url.getPath() + "/extract" + (url.getQuery() != null ? "?"+url.getQuery() : ""); + String urlStr = appendUrlPath(solrUrl, "/extract").toString(); if(urlStr.indexOf("resource.name")==-1) urlStr = appendParam(urlStr, "resource.name=" + URLEncoder.encode(file.getAbsolutePath(), "UTF-8")); if(urlStr.indexOf("literal.id")==-1) urlStr = appendParam(urlStr, "literal.id=" + URLEncoder.encode(file.getAbsolutePath(), "UTF-8")); url = new URL(urlStr); -// info("Indexing to ExtractingRequestHandler with URL "+url); } } else { warn("Skipping "+file.getName()+". Unsupported file type for auto mode."); @@ -390,7 +756,23 @@ public class SimplePostTool { } } - private String guessType(File file) { + /** + * Appends to the path of the URL + * @param url the URL + * @param append the path to append + * @return the final URL version + * @throws MalformedURLException + */ + protected static URL appendUrlPath(URL url, String append) throws MalformedURLException { + return new URL(url.getProtocol() + "://" + url.getAuthority() + url.getPath() + append + (url.getQuery() != null ? "?"+url.getQuery() : "")); + } + + /** + * Guesses the type of a file, based on file name suffix + * @param file the file + * @return the content-type guessed + */ + protected static String guessType(File file) { String name = file.getName(); String suffix = name.substring(name.lastIndexOf(".")+1); return mimeMap.get(suffix.toLowerCase(Locale.ROOT)); @@ -412,6 +794,7 @@ public class SimplePostTool { */ public static void doGet(URL url) { try { + if(mockMode) return; HttpURLConnection urlc = (HttpURLConnection) url.openConnection(); if (HttpURLConnection.HTTP_OK != urlc.getResponseCode()) { warn("Solr returned an error #" + urlc.getResponseCode() + @@ -422,15 +805,14 @@ public class SimplePostTool { } } - public void postData(InputStream data, Integer length, OutputStream output, String type) { - postData(data, length, output, type, solrUrl); - } - /** * Reads data from the data stream and posts it to solr, * writes to the response to output + * @return true if success */ - public void postData(InputStream data, Integer length, OutputStream output, String type, URL url) { + public boolean postData(InputStream data, Integer length, OutputStream output, String type, URL url) { + if(mockMode) return true; + boolean success = true; if(type == null) type = DEFAULT_CONTENT_TYPE; HttpURLConnection urlc = null; @@ -441,7 +823,6 @@ public class SimplePostTool { urlc.setRequestMethod("POST"); } catch (ProtocolException e) { fatal("Shouldn't happen: HttpURLConnection doesn't support POST??"+e); - } urlc.setDoOutput(true); urlc.setDoInput(true); @@ -453,6 +834,7 @@ public class SimplePostTool { } catch (IOException e) { fatal("Connection error (is Solr running at " + solrUrl + " ?): " + e); + success = false; } OutputStream out = null; @@ -461,6 +843,7 @@ public class SimplePostTool { pipe(data, out); } catch (IOException e) { fatal("IOException while posting data: " + e); + success = false; } finally { try { if(out!=null) out.close(); } catch (IOException x) { /*NOOP*/ } } @@ -470,12 +853,14 @@ public class SimplePostTool { if (HttpURLConnection.HTTP_OK != urlc.getResponseCode()) { warn("Solr returned an error #" + urlc.getResponseCode() + " " + urlc.getResponseMessage()); + success = false; } in = urlc.getInputStream(); pipe(in, output); } catch (IOException e) { warn("IOException while reading response: " + e); + success = false; } finally { try { if(in!=null) in.close(); } catch (IOException x) { /*NOOP*/ } } @@ -483,8 +868,14 @@ public class SimplePostTool { } finally { if(urlc!=null) urlc.disconnect(); } + return success; } + /** + * Converts a string to an input stream + * @param s the string + * @return the input stream + */ public static InputStream stringToStream(String s) { InputStream is = null; try { @@ -508,36 +899,64 @@ public class SimplePostTool { if (null != dest) dest.flush(); } - public boolean isAuto() { - return auto; - } - - public void setAuto(boolean auto) { - this.auto = auto; - } - - public boolean isRecursive() { - return recursive; - } - - public void setRecursive(boolean recursive) { - this.recursive = recursive; - } - - public String getFileTypes() { - return fileTypes; - } - - public void setFileTypes(String fileTypes) { - this.fileTypes = fileTypes; + public GlobFileFilter getFileFilterFromFileTypes(String fileTypes) { String glob; if(fileTypes.equals("*")) glob = ".*"; else glob = "^.*\\.(" + fileTypes.replace(",", "|") + ")$"; - this.globFileFilter = new GlobFileFilter(glob, true); + return new GlobFileFilter(glob, true); } + // + // Utility methods for XPath handing + // + + /** + * Gets all nodes matching an XPath + */ + public static NodeList getNodesFromXP(Node n, String xpath) throws XPathExpressionException { + XPathFactory factory = XPathFactory.newInstance(); + XPath xp = factory.newXPath(); + XPathExpression expr = xp.compile(xpath); + return (NodeList) expr.evaluate(n, XPathConstants.NODESET); + } + + /** + * Gets the string content of the matching an XPath + * @param n the node (or doc) + * @param xpath the xpath string + * @param concatAll if true, text from all matching nodes will be concatenated, else only the first returned + */ + public static String getXP(Node n, String xpath, boolean concatAll) + throws XPathExpressionException { + NodeList nodes = getNodesFromXP(n, xpath); + StringBuffer sb = new StringBuffer(); + if (nodes.getLength() > 0) { + for(int i = 0; i < nodes.getLength() ; i++) { + sb.append(nodes.item(i).getNodeValue() + " "); + if(!concatAll) break; + } + return sb.toString().trim(); + } else + return ""; + } + + /** + * Takes a string as input and returns a DOM + */ + public static Document makeDom(String in, String inputEncoding) throws SAXException, IOException, + ParserConfigurationException { + InputStream is = new ByteArrayInputStream(in + .getBytes(inputEncoding)); + Document dom = DocumentBuilderFactory.newInstance() + .newDocumentBuilder().parse(is); + return dom; + } + + /** + * Inner class to filter files based on glob wildcards + */ class GlobFileFilter implements FileFilter { private String _pattern; @@ -571,4 +990,170 @@ public class SimplePostTool { return p.matcher(file.getName()).find(); } } + + // + // Simple crawler class which can fetch a page and check for robots.txt + // + class PageFetcher { + Map> robotsCache; + final String DISALLOW = "Disallow:"; + + public PageFetcher() { + robotsCache = new HashMap>(); + } + + public PageFetcherResult readPageFromUrl(URL u) { + PageFetcherResult res = new PageFetcherResult(); + try { + if (isDisallowedByRobots(u)) { + warn("The URL "+u+" is disallowed by robots.txt and will not be crawled."); + res.httpStatus = 403; + visited.add(u); + return res; + } + res.httpStatus = 404; + HttpURLConnection conn = (HttpURLConnection) u.openConnection(); + conn.setRequestProperty("User-Agent", "SimplePostTool-crawler/"+VERSION_OF_THIS_TOOL+" (http://lucene.apache.org/solr/)"); + conn.setRequestProperty("Accept-Encoding", "gzip, deflate"); + conn.connect(); + res.httpStatus = conn.getResponseCode(); + if(!normalizeUrlEnding(conn.getURL().toString()).equals(normalizeUrlEnding(u.toString()))) { + info("The URL "+u+" caused a redirect to "+conn.getURL()); + u = conn.getURL(); + res.redirectUrl = u; + visited.add(u); + } + if(res.httpStatus == 200) { + // Raw content type of form "text/html; encoding=utf-8" + String rawContentType = conn.getContentType(); + String type = rawContentType.split(";")[0]; + if(typeSupported(type)) { + String encoding = conn.getContentEncoding(); + InputStream is; + if (encoding != null && encoding.equalsIgnoreCase("gzip")) { + is = new GZIPInputStream(conn.getInputStream()); + } else if (encoding != null && encoding.equalsIgnoreCase("deflate")) { + is = new InflaterInputStream(conn.getInputStream(), new Inflater(true)); + } else { + is = conn.getInputStream(); + } + + // Read into memory, so that we later can pull links from the page without re-fetching + res.content = inputStreamToByteArray(is); + is.close(); + } else { + warn("Skipping URL with unsupported type "+type); + res.httpStatus = 415; + } + } + } catch(IOException e) { + warn("IOException when reading page from url "+u+": "+e.getMessage()); + } + return res; + } + + public boolean isDisallowedByRobots(URL url) { + String host = url.getHost(); + String strRobot = url.getProtocol() + "://" + host + "/robots.txt"; + List disallows = robotsCache.get(host); + if(disallows == null) { + disallows = new ArrayList(); + URL urlRobot; + try { + urlRobot = new URL(strRobot); + disallows = parseRobotsTxt(urlRobot.openStream()); + } catch (MalformedURLException e) { + return true; // We cannot trust this robots URL, should not happen + } catch (IOException e) { + // There is no robots.txt, will cache an empty disallow list + } + } + + robotsCache.put(host, disallows); + + String strURL = url.getFile(); + for (String path : disallows) { + if (path.equals("/") || strURL.indexOf(path) == 0) + return true; + } + return false; + } + + /** + * Very simple robots.txt parser which obeys all Disallow lines regardless + * of user agent or whether there are valid Allow: lines. + * @param is Input stream of the robots.txt file + * @return a list of disallow paths + * @throws IOException if problems reading the stream + */ + protected List parseRobotsTxt(InputStream is) throws IOException { + List disallows = new ArrayList(); + BufferedReader r = new BufferedReader(new InputStreamReader(is, "UTF-8")); + String l; + while((l = r.readLine()) != null) { + String[] arr = l.split("#"); + if(arr.length == 0) continue; + l = arr[0].trim(); + if(l.startsWith(DISALLOW)) { + l = l.substring(DISALLOW.length()).trim(); + if(l.length() == 0) continue; + disallows.add(l); + } + } + is.close(); + return disallows; + } + + /** + * Finds links on a web page, using /extract?extractOnly=true + * @param u the URL of the web page + * @param is the input stream of the page + * @param type the content-type + * @param postUrl the URL (typically /solr/extract) in order to pull out links + * @return a set of URLs parsed from the page + */ + protected Set getLinksFromWebPage(URL u, InputStream is, String type, URL postUrl) { + Set l = new HashSet(); + URL url = null; + try { + ByteArrayOutputStream os = new ByteArrayOutputStream(); + URL extractUrl = new URL(appendParam(postUrl.toString(), "extractOnly=true")); + boolean success = postData(is, null, os, type, extractUrl); + if(success) { + String rawXml = os.toString("UTF-8"); + Document d = makeDom(rawXml, "UTF-8"); + String innerXml = getXP(d, "/response/str/text()[1]", false); + d = makeDom(innerXml, "UTF-8"); + NodeList links = getNodesFromXP(d, "/html/body//a/@href"); + for(int i = 0; i < links.getLength(); i++) { + String link = links.item(i).getTextContent(); + link = computeFullUrl(u, link); + if(link == null) + continue; + url = new URL(link); + if(url.getAuthority() == null || !url.getAuthority().equals(u.getAuthority())) + continue; + l.add(url); + } + } + } catch (MalformedURLException e) { + warn("Malformed URL "+url); + } catch (IOException e) { + warn("IOException opening URL "+url+": "+e.getMessage()); + } catch (Exception e) { + throw new RuntimeException(); + } + return l; + } + } + + /** + * Utility class to hold the result form a page fetch + */ + public class PageFetcherResult { + int httpStatus = 200; + String contentType = "text/html"; + URL redirectUrl = null; + byte[] content; + } } diff --git a/solr/core/src/test-files/exampledocs/example.html b/solr/core/src/test-files/exampledocs/example.html new file mode 100644 index 00000000000..5732f6214bc --- /dev/null +++ b/solr/core/src/test-files/exampledocs/example.html @@ -0,0 +1,49 @@ + + + Welcome to Solr + + +

+ Here is some text +

+
Here is some text in a div
+
This has a link.
+News + + + + diff --git a/solr/core/src/test-files/exampledocs/example.txt b/solr/core/src/test-files/exampledocs/example.txt new file mode 100644 index 00000000000..0c9928b9e26 --- /dev/null +++ b/solr/core/src/test-files/exampledocs/example.txt @@ -0,0 +1,3 @@ +Example text document + +This is a simple example for a plain text document, indexed to Solr \ No newline at end of file diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml index 7a7ba7d547a..9b00044669c 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml @@ -54,7 +54,7 @@ --> 10 - single + native true diff --git a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java index f801edfbccf..96a2317b7c6 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java @@ -228,32 +228,35 @@ public class StatsComponentTest extends AbstractSolrTestCase { } public void doTestFacetStatisticsResult(String f) throws Exception { - assertU(adoc("id", "1", f, "10", "active_s", "true")); - assertU(adoc("id", "2", f, "20", "active_s", "true")); - assertU(adoc("id", "3", f, "30", "active_s", "false")); - assertU(adoc("id", "4", f, "40", "active_s", "false")); + assertU(adoc("id", "1", f, "10", "active_s", "true", "other_s", "foo")); + assertU(adoc("id", "2", f, "20", "active_s", "true", "other_s", "bar")); + assertU(adoc("id", "3", f, "30", "active_s", "false", "other_s", "foo")); + assertU(adoc("id", "4", f, "40", "active_s", "false", "other_s", "foo")); assertU(commit()); - assertQ("test value for active_s=true", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s","indent","true") - , "//lst[@name='true']/double[@name='min'][.='10.0']" - , "//lst[@name='true']/double[@name='max'][.='20.0']" - , "//lst[@name='true']/double[@name='sum'][.='30.0']" - , "//lst[@name='true']/long[@name='count'][.='2']" - , "//lst[@name='true']/long[@name='missing'][.='0']" - , "//lst[@name='true']/double[@name='sumOfSquares'][.='500.0']" - , "//lst[@name='true']/double[@name='mean'][.='15.0']" - , "//lst[@name='true']/double[@name='stddev'][.='7.0710678118654755']" + final String pre = "//lst[@name='stats_fields']/lst[@name='"+f+"']/lst[@name='facets']/lst[@name='active_s']"; + + assertQ("test value for active_s=true", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s","stats.facet","other_s","indent","true") + , "*[count("+pre+")=1]" + , pre+"/lst[@name='true']/double[@name='min'][.='10.0']" + , pre+"/lst[@name='true']/double[@name='max'][.='20.0']" + , pre+"/lst[@name='true']/double[@name='sum'][.='30.0']" + , pre+"/lst[@name='true']/long[@name='count'][.='2']" + , pre+"/lst[@name='true']/long[@name='missing'][.='0']" + , pre+"/lst[@name='true']/double[@name='sumOfSquares'][.='500.0']" + , pre+"/lst[@name='true']/double[@name='mean'][.='15.0']" + , pre+"/lst[@name='true']/double[@name='stddev'][.='7.0710678118654755']" ); assertQ("test value for active_s=false", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s") - , "//lst[@name='false']/double[@name='min'][.='30.0']" - , "//lst[@name='false']/double[@name='max'][.='40.0']" - , "//lst[@name='false']/double[@name='sum'][.='70.0']" - , "//lst[@name='false']/long[@name='count'][.='2']" - , "//lst[@name='false']/long[@name='missing'][.='0']" - , "//lst[@name='false']/double[@name='sumOfSquares'][.='2500.0']" - , "//lst[@name='false']/double[@name='mean'][.='35.0']" - , "//lst[@name='false']/double[@name='stddev'][.='7.0710678118654755']" + , pre+"/lst[@name='false']/double[@name='min'][.='30.0']" + , pre+"/lst[@name='false']/double[@name='max'][.='40.0']" + , pre+"/lst[@name='false']/double[@name='sum'][.='70.0']" + , pre+"/lst[@name='false']/long[@name='count'][.='2']" + , pre+"/lst[@name='false']/long[@name='missing'][.='0']" + , pre+"/lst[@name='false']/double[@name='sumOfSquares'][.='2500.0']" + , pre+"/lst[@name='false']/double[@name='mean'][.='35.0']" + , pre+"/lst[@name='false']/double[@name='stddev'][.='7.0710678118654755']" ); } diff --git a/solr/core/src/test/org/apache/solr/util/SimplePostToolTest.java b/solr/core/src/test/org/apache/solr/util/SimplePostToolTest.java new file mode 100644 index 00000000000..b82c8ebaae6 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/util/SimplePostToolTest.java @@ -0,0 +1,237 @@ +package org.apache.solr.util; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.util.SimplePostTool.PageFetcher; +import org.apache.solr.util.SimplePostTool.PageFetcherResult; +import org.junit.Before; +import org.junit.Test; + +public class SimplePostToolTest extends SolrTestCaseJ4 { + SimplePostTool t_file, t_file_auto, t_file_rec, t_web, t_test; + PageFetcher pf; + + @Before + public void setUp() throws Exception { + super.setUp(); + String[] args = {"-"}; + System.setProperty("data", "files"); + t_file = SimplePostTool.parseArgsAndInit(args); + + System.setProperty("auto", "yes"); + t_file_auto = SimplePostTool.parseArgsAndInit(args); + + System.setProperty("recursive", "yes"); + t_file_rec = SimplePostTool.parseArgsAndInit(args); + + System.setProperty("data", "web"); + t_web = SimplePostTool.parseArgsAndInit(args); + + System.setProperty("params", "param1=foo¶m2=bar"); + t_test = SimplePostTool.parseArgsAndInit(args); + + pf = new MockPageFetcher(); + SimplePostTool.pageFetcher = pf; + SimplePostTool.mockMode = true; + } + + @Test + public void testParseArgsAndInit() { + assertEquals(false, t_file.auto); + assertEquals(true, t_file_auto.auto); + assertEquals(0, t_file_auto.recursive); + assertEquals(999, t_file_rec.recursive); + assertEquals(true, t_file.commit); + assertEquals(false, t_file.optimize); + assertEquals(null, t_file.out); + + assertEquals(1, t_web.recursive); + assertEquals(10, t_web.delay); + + assertNotNull(t_test.solrUrl); + } + + @Test + public void testNormalizeUrlEnding() { + assertEquals("http://example.com", SimplePostTool.normalizeUrlEnding("http://example.com/")); + assertEquals("http://example.com", SimplePostTool.normalizeUrlEnding("http://example.com/#foo?bar=baz")); + assertEquals("http://example.com/index.html", SimplePostTool.normalizeUrlEnding("http://example.com/index.html#hello")); + } + + @Test + public void testComputeFullUrl() throws MalformedURLException { + assertEquals("http://example.com/index.html", t_web.computeFullUrl(new URL("http://example.com/"), "/index.html")); + assertEquals("http://example.com/index.html", t_web.computeFullUrl(new URL("http://example.com/foo/bar/"), "/index.html")); + assertEquals("http://example.com/fil.html", t_web.computeFullUrl(new URL("http://example.com/foo.htm?baz#hello"), "fil.html")); +// TODO: How to know what is the base if URL path ends with "foo"?? +// assertEquals("http://example.com/fil.html", t_web.computeFullUrl(new URL("http://example.com/foo?baz#hello"), "fil.html")); + assertEquals(null, t_web.computeFullUrl(new URL("http://example.com/"), "fil.jpg")); + assertEquals(null, t_web.computeFullUrl(new URL("http://example.com/"), "mailto:hello@foo.bar")); + assertEquals(null, t_web.computeFullUrl(new URL("http://example.com/"), "ftp://server/file")); + } + + @Test + public void testTypeSupported() { + assertTrue(t_web.typeSupported("application/pdf")); + assertTrue(t_web.typeSupported("text/xml")); + assertFalse(t_web.typeSupported("text/foo")); + + t_web.fileTypes = "doc,xls,ppt"; + t_web.globFileFilter = t_web.getFileFilterFromFileTypes(t_web.fileTypes); + assertFalse(t_web.typeSupported("application/pdf")); + assertTrue(t_web.typeSupported("application/msword")); + } + + @Test + public void testIsOn() { + assertTrue(SimplePostTool.isOn("true")); + assertTrue(SimplePostTool.isOn("1")); + assertFalse(SimplePostTool.isOn("off")); + } + + @Test + public void testAppendParam() { + assertEquals("http://example.com?foo=bar", SimplePostTool.appendParam("http://example.com", "foo=bar")); + assertEquals("http://example.com/?a=b&foo=bar", SimplePostTool.appendParam("http://example.com/?a=b", "foo=bar")); + } + + @Test + public void testAppendUrlPath() throws MalformedURLException { + assertEquals(new URL("http://example.com/a?foo=bar"), SimplePostTool.appendUrlPath(new URL("http://example.com?foo=bar"), "/a")); + } + + @Test + public void testGuessType() { + File f = new File("foo.doc"); + assertEquals("application/msword", SimplePostTool.guessType(f)); + f = new File("foobar"); + assertEquals(null, SimplePostTool.guessType(f)); + } + + @Test + public void testDoFilesMode() { + t_file_auto.recursive = 0; + File dir = getFile("exampledocs"); + int num = t_file_auto.postFiles(new File[] {dir}, 0, null, null); + assertEquals(2, num); + } + + @Test + public void testDoWebMode() { + // Uses mock pageFetcher + t_web.delay = 0; + t_web.recursive = 5; + int num = t_web.postWebPages(new String[] {"http://example.com/#removeme"}, 0, null); + assertEquals(5, num); + + t_web.recursive = 1; + num = t_web.postWebPages(new String[] {"http://example.com/"}, 0, null); + assertEquals(3, num); + + // Without respecting robots.txt + SimplePostTool.pageFetcher.robotsCache.clear(); + t_web.recursive = 5; + num = t_web.postWebPages(new String[] {"http://example.com/#removeme"}, 0, null); + assertEquals(6, num); +} + + @Test + public void testRobotsExclusion() throws MalformedURLException { + assertFalse(SimplePostTool.pageFetcher.isDisallowedByRobots(new URL("http://example.com/"))); + assertTrue(SimplePostTool.pageFetcher.isDisallowedByRobots(new URL("http://example.com/disallowed"))); + assertTrue("There should be two entries parsed from robots.txt", SimplePostTool.pageFetcher.robotsCache.get("example.com").size() == 2); + } + + class MockPageFetcher extends PageFetcher { + HashMap htmlMap = new HashMap(); + HashMap> linkMap = new HashMap>(); + + public MockPageFetcher() throws IOException { + (new SimplePostTool()).super(); + htmlMap.put("http://example.com", "page1page2"); + htmlMap.put("http://example.com/index.html", "page1page2"); + htmlMap.put("http://example.com/page1", ""); + htmlMap.put("http://example.com/page1/foo", ""); + htmlMap.put("http://example.com/page1/foo/bar", ""); + htmlMap.put("http://example.com/page2", ""); + htmlMap.put("http://example.com/disallowed", ""); + + Set s = new HashSet(); + s.add(new URL("http://example.com/page1")); + s.add(new URL("http://example.com/page2")); + linkMap.put("http://example.com", s); + linkMap.put("http://example.com/index.html", s); + s = new HashSet(); + s.add(new URL("http://example.com/page1/foo")); + linkMap.put("http://example.com/page1", s); + s = new HashSet(); + s.add(new URL("http://example.com/page1/foo/bar")); + linkMap.put("http://example.com/page1/foo", s); + s = new HashSet(); + s.add(new URL("http://example.com/disallowed")); + linkMap.put("http://example.com/page2", s); + + // Simulate a robots.txt file with comments and a few disallows + StringBuilder sb = new StringBuilder(); + sb.append("# Comments appear after the \"#\" symbol at the start of a line, or after a directive\n"); + sb.append("User-agent: * # match all bots\n"); + sb.append("Disallow: # This is void\n"); + sb.append("Disallow: /disallow # Disallow this path\n"); + sb.append("Disallow: /nonexistingpath # Disallow this path\n"); + this.robotsCache.put("example.com", SimplePostTool.pageFetcher. + parseRobotsTxt(new ByteArrayInputStream(sb.toString().getBytes("UTF-8")))); + } + + @Override + public PageFetcherResult readPageFromUrl(URL u) { + PageFetcherResult res = (new SimplePostTool()).new PageFetcherResult(); + if (isDisallowedByRobots(u)) { + res.httpStatus = 403; + return res; + } + res.httpStatus = 200; + res.contentType = "text/html"; + try { + res.content = htmlMap.get(u.toString()).getBytes("UTF-8"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(); + } + return res; + } + + @Override + public Set getLinksFromWebPage(URL u, InputStream is, String type, URL postUrl) { + Set s = linkMap.get(SimplePostTool.normalizeUrlEnding(u.toString())); + if(s == null) + s = new HashSet(); + return s; + } + } +} \ No newline at end of file diff --git a/solr/core/src/test/org/apache/solr/util/TestFastOutputStream.java b/solr/core/src/test/org/apache/solr/util/TestFastOutputStream.java new file mode 100644 index 00000000000..7c17d450f16 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/util/TestFastOutputStream.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.apache.solr.update.MemOutputStream; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Random; +import java.util.Set; +import java.util.TimeZone; + +public class TestFastOutputStream extends LuceneTestCase { + + Random rand; + byte[] arr; + + public void testRandomWrites() throws Exception { + rand = random(); + + arr = new byte[20000]; + for (int i=0; i buffers = new LinkedList(); + + Random r; + public MemWriter(char[] tempBuffer, Random r) { + super(null, tempBuffer, 0); + this.r = r; + } + + @Override + public void flush(char[] arr, int offset, int len) throws IOException { + if (arr == buf && offset==0 && len==buf.length) { + buffers.add(buf); // steal the buffer + buf = new char[r.nextInt(9000)+1]; + } else if (len > 0) { + char[] newBuf = new char[len]; + System.arraycopy(arr, offset, newBuf, 0, len); + buffers.add(newBuf); + } + } + + @Override + public void flush(String str, int offset, int len) throws IOException { + if (len == 0) return; + buffers.add( str.substring(offset, offset+len).toCharArray() ); + } +} + + + +public class TestFastWriter extends LuceneTestCase { + + Random rand; + char[] arr; + String s; + + public void testRandomWrites() throws Exception { + rand = random(); + + arr = new char[20000]; + for (int i=0; i --> - + + + + + application/json + + + + + application/csv + + - + diff --git a/solr/licenses/ant-1.8.2.jar.sha1 b/solr/licenses/ant-1.8.2.jar.sha1 new file mode 100644 index 00000000000..564db78dfdc --- /dev/null +++ b/solr/licenses/ant-1.8.2.jar.sha1 @@ -0,0 +1 @@ +fc33bf7cd8c5309dd7b81228e8626515ee42efd9 diff --git a/solr/licenses/ant-LICENSE-ASL.txt b/solr/licenses/ant-LICENSE-ASL.txt new file mode 100644 index 00000000000..ab3182e7776 --- /dev/null +++ b/solr/licenses/ant-LICENSE-ASL.txt @@ -0,0 +1,272 @@ +/* + * Apache License + * Version 2.0, January 2004 + * http://www.apache.org/licenses/ + * + * TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + * + * 1. Definitions. + * + * "License" shall mean the terms and conditions for use, reproduction, + * and distribution as defined by Sections 1 through 9 of this document. + * + * "Licensor" shall mean the copyright owner or entity authorized by + * the copyright owner that is granting the License. + * + * "Legal Entity" shall mean the union of the acting entity and all + * other entities that control, are controlled by, or are under common + * control with that entity. For the purposes of this definition, + * "control" means (i) the power, direct or indirect, to cause the + * direction or management of such entity, whether by contract or + * otherwise, or (ii) ownership of fifty percent (50%) or more of the + * outstanding shares, or (iii) beneficial ownership of such entity. + * + * "You" (or "Your") shall mean an individual or Legal Entity + * exercising permissions granted by this License. + * + * "Source" form shall mean the preferred form for making modifications, + * including but not limited to software source code, documentation + * source, and configuration files. + * + * "Object" form shall mean any form resulting from mechanical + * transformation or translation of a Source form, including but + * not limited to compiled object code, generated documentation, + * and conversions to other media types. + * + * "Work" shall mean the work of authorship, whether in Source or + * Object form, made available under the License, as indicated by a + * copyright notice that is included in or attached to the work + * (an example is provided in the Appendix below). + * + * "Derivative Works" shall mean any work, whether in Source or Object + * form, that is based on (or derived from) the Work and for which the + * editorial revisions, annotations, elaborations, or other modifications + * represent, as a whole, an original work of authorship. For the purposes + * of this License, Derivative Works shall not include works that remain + * separable from, or merely link (or bind by name) to the interfaces of, + * the Work and Derivative Works thereof. + * + * "Contribution" shall mean any work of authorship, including + * the original version of the Work and any modifications or additions + * to that Work or Derivative Works thereof, that is intentionally + * submitted to Licensor for inclusion in the Work by the copyright owner + * or by an individual or Legal Entity authorized to submit on behalf of + * the copyright owner. For the purposes of this definition, "submitted" + * means any form of electronic, verbal, or written communication sent + * to the Licensor or its representatives, including but not limited to + * communication on electronic mailing lists, source code control systems, + * and issue tracking systems that are managed by, or on behalf of, the + * Licensor for the purpose of discussing and improving the Work, but + * excluding communication that is conspicuously marked or otherwise + * designated in writing by the copyright owner as "Not a Contribution." + * + * "Contributor" shall mean Licensor and any individual or Legal Entity + * on behalf of whom a Contribution has been received by Licensor and + * subsequently incorporated within the Work. + * + * 2. Grant of Copyright License. Subject to the terms and conditions of + * this License, each Contributor hereby grants to You a perpetual, + * worldwide, non-exclusive, no-charge, royalty-free, irrevocable + * copyright license to reproduce, prepare Derivative Works of, + * publicly display, publicly perform, sublicense, and distribute the + * Work and such Derivative Works in Source or Object form. + * + * 3. Grant of Patent License. Subject to the terms and conditions of + * this License, each Contributor hereby grants to You a perpetual, + * worldwide, non-exclusive, no-charge, royalty-free, irrevocable + * (except as stated in this section) patent license to make, have made, + * use, offer to sell, sell, import, and otherwise transfer the Work, + * where such license applies only to those patent claims licensable + * by such Contributor that are necessarily infringed by their + * Contribution(s) alone or by combination of their Contribution(s) + * with the Work to which such Contribution(s) was submitted. If You + * institute patent litigation against any entity (including a + * cross-claim or counterclaim in a lawsuit) alleging that the Work + * or a Contribution incorporated within the Work constitutes direct + * or contributory patent infringement, then any patent licenses + * granted to You under this License for that Work shall terminate + * as of the date such litigation is filed. + * + * 4. Redistribution. You may reproduce and distribute copies of the + * Work or Derivative Works thereof in any medium, with or without + * modifications, and in Source or Object form, provided that You + * meet the following conditions: + * + * (a) You must give any other recipients of the Work or + * Derivative Works a copy of this License; and + * + * (b) You must cause any modified files to carry prominent notices + * stating that You changed the files; and + * + * (c) You must retain, in the Source form of any Derivative Works + * that You distribute, all copyright, patent, trademark, and + * attribution notices from the Source form of the Work, + * excluding those notices that do not pertain to any part of + * the Derivative Works; and + * + * (d) If the Work includes a "NOTICE" text file as part of its + * distribution, then any Derivative Works that You distribute must + * include a readable copy of the attribution notices contained + * within such NOTICE file, excluding those notices that do not + * pertain to any part of the Derivative Works, in at least one + * of the following places: within a NOTICE text file distributed + * as part of the Derivative Works; within the Source form or + * documentation, if provided along with the Derivative Works; or, + * within a display generated by the Derivative Works, if and + * wherever such third-party notices normally appear. The contents + * of the NOTICE file are for informational purposes only and + * do not modify the License. You may add Your own attribution + * notices within Derivative Works that You distribute, alongside + * or as an addendum to the NOTICE text from the Work, provided + * that such additional attribution notices cannot be construed + * as modifying the License. + * + * You may add Your own copyright statement to Your modifications and + * may provide additional or different license terms and conditions + * for use, reproduction, or distribution of Your modifications, or + * for any such Derivative Works as a whole, provided Your use, + * reproduction, and distribution of the Work otherwise complies with + * the conditions stated in this License. + * + * 5. Submission of Contributions. Unless You explicitly state otherwise, + * any Contribution intentionally submitted for inclusion in the Work + * by You to the Licensor shall be under the terms and conditions of + * this License, without any additional terms or conditions. + * Notwithstanding the above, nothing herein shall supersede or modify + * the terms of any separate license agreement you may have executed + * with Licensor regarding such Contributions. + * + * 6. Trademarks. This License does not grant permission to use the trade + * names, trademarks, service marks, or product names of the Licensor, + * except as required for reasonable and customary use in describing the + * origin of the Work and reproducing the content of the NOTICE file. + * + * 7. Disclaimer of Warranty. Unless required by applicable law or + * agreed to in writing, Licensor provides the Work (and each + * Contributor provides its Contributions) on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied, including, without limitation, any warranties or conditions + * of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + * PARTICULAR PURPOSE. You are solely responsible for determining the + * appropriateness of using or redistributing the Work and assume any + * risks associated with Your exercise of permissions under this License. + * + * 8. Limitation of Liability. In no event and under no legal theory, + * whether in tort (including negligence), contract, or otherwise, + * unless required by applicable law (such as deliberate and grossly + * negligent acts) or agreed to in writing, shall any Contributor be + * liable to You for damages, including any direct, indirect, special, + * incidental, or consequential damages of any character arising as a + * result of this License or out of the use or inability to use the + * Work (including but not limited to damages for loss of goodwill, + * work stoppage, computer failure or malfunction, or any and all + * other commercial damages or losses), even if such Contributor + * has been advised of the possibility of such damages. + * + * 9. Accepting Warranty or Additional Liability. While redistributing + * the Work or Derivative Works thereof, You may choose to offer, + * and charge a fee for, acceptance of support, warranty, indemnity, + * or other liability obligations and/or rights consistent with this + * License. However, in accepting such obligations, You may act only + * on Your own behalf and on Your sole responsibility, not on behalf + * of any other Contributor, and only if You agree to indemnify, + * defend, and hold each Contributor harmless for any liability + * incurred by, or claims asserted against, such Contributor by reason + * of your accepting any such warranty or additional liability. + * + * END OF TERMS AND CONDITIONS + * + * APPENDIX: How to apply the Apache License to your work. + * + * To apply the Apache License to your work, attach the following + * boilerplate notice, with the fields enclosed by brackets "[]" + * replaced with your own identifying information. (Don't include + * the brackets!) The text should be enclosed in the appropriate + * comment syntax for the file format. We also recommend that a + * file or class name and description of purpose be included on the + * same "printed page" as the copyright notice for easier + * identification within third-party archives. + * + * Copyright [yyyy] [name of copyright owner] + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +W3C® SOFTWARE NOTICE AND LICENSE +http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 + +This work (and included software, documentation such as READMEs, or other +related items) is being provided by the copyright holders under the following +license. By obtaining, using and/or copying this work, you (the licensee) agree +that you have read, understood, and will comply with the following terms and +conditions. + +Permission to copy, modify, and distribute this software and its documentation, +with or without modification, for any purpose and without fee or royalty is +hereby granted, provided that you include the following on ALL copies of the +software and documentation or portions thereof, including modifications: + + 1. The full text of this NOTICE in a location viewable to users of the + redistributed or derivative work. + 2. Any pre-existing intellectual property disclaimers, notices, or terms + and conditions. If none exist, the W3C Software Short Notice should be + included (hypertext is preferred, text is permitted) within the body + of any redistributed or derivative code. + 3. Notice of any changes or modifications to the files, including the date + changes were made. (We recommend you provide URIs to the location from + which the code is derived.) + +THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND COPYRIGHT HOLDERS MAKE +NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT +THE USE OF THE SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY +PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. + +COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE OF THE SOFTWARE OR DOCUMENTATION. + +The name and trademarks of copyright holders may NOT be used in advertising or +publicity pertaining to the software without specific, written prior permission. +Title to copyright in this software and any associated documentation will at +all times remain with copyright holders. + +____________________________________ + +This formulation of W3C's notice and license became active on December 31 2002. +This version removes the copyright ownership notice such that this license can +be used with materials other than those owned by the W3C, reflects that ERCIM +is now a host of the W3C, includes references to this specific dated version of +the license, and removes the ambiguous grant of "use". Otherwise, this version +is the same as the previous version and is written so as to preserve the Free +Software Foundation's assessment of GPL compatibility and OSI's certification +under the Open Source Definition. Please see our Copyright FAQ for common +questions about using materials from our site, including specific terms and +conditions for packages like libwww, Amaya, and Jigsaw. Other questions about +this notice can be directed to site-policy@w3.org. + +Joseph Reagle + +This license came from: http://www.megginson.com/SAX/copying.html + However please note future versions of SAX may be covered + under http://saxproject.org/?selected=pd + +SAX2 is Free! + +I hereby abandon any property rights to SAX 2.0 (the Simple API for +XML), and release all of the SAX 2.0 source code, compiled code, and +documentation contained in this distribution into the Public Domain. +SAX comes with NO WARRANTY or guarantee of fitness for any +purpose. + +David Megginson, david@megginson.com +2000-05-05 diff --git a/solr/licenses/ant-NOTICE.txt b/solr/licenses/ant-NOTICE.txt new file mode 100644 index 00000000000..4c88cc6659b --- /dev/null +++ b/solr/licenses/ant-NOTICE.txt @@ -0,0 +1,26 @@ + ========================================================================= + == NOTICE file corresponding to the section 4 d of == + == the Apache License, Version 2.0, == + == in this case for the Apache Ant distribution. == + ========================================================================= + + Apache Ant + Copyright 1999-2008 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + + This product includes also software developed by : + - the W3C consortium (http://www.w3c.org) , + - the SAX project (http://www.saxproject.org) + + The task is based on code Copyright (c) 2002, Landmark + Graphics Corp that has been kindly donated to the Apache Software + Foundation. + + Portions of this software were originally based on the following: + - software copyright (c) 1999, IBM Corporation., http://www.ibm.com. + - software copyright (c) 1999, Sun Microsystems., http://www.sun.com. + - voluntary contributions made by Paul Eng on behalf of the + Apache Software Foundation that were originally developed at iClick, Inc., + software copyright (c) 1999. diff --git a/solr/licenses/ant-junit-1.8.2.jar.sha1 b/solr/licenses/ant-junit-1.8.2.jar.sha1 new file mode 100644 index 00000000000..b079f25a851 --- /dev/null +++ b/solr/licenses/ant-junit-1.8.2.jar.sha1 @@ -0,0 +1 @@ +1653e85a2710d59edef1fcdd899a35f2c45324b3 diff --git a/solr/licenses/ant-junit-LICENSE-ASL.txt b/solr/licenses/ant-junit-LICENSE-ASL.txt new file mode 100644 index 00000000000..ab3182e7776 --- /dev/null +++ b/solr/licenses/ant-junit-LICENSE-ASL.txt @@ -0,0 +1,272 @@ +/* + * Apache License + * Version 2.0, January 2004 + * http://www.apache.org/licenses/ + * + * TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + * + * 1. Definitions. + * + * "License" shall mean the terms and conditions for use, reproduction, + * and distribution as defined by Sections 1 through 9 of this document. + * + * "Licensor" shall mean the copyright owner or entity authorized by + * the copyright owner that is granting the License. + * + * "Legal Entity" shall mean the union of the acting entity and all + * other entities that control, are controlled by, or are under common + * control with that entity. For the purposes of this definition, + * "control" means (i) the power, direct or indirect, to cause the + * direction or management of such entity, whether by contract or + * otherwise, or (ii) ownership of fifty percent (50%) or more of the + * outstanding shares, or (iii) beneficial ownership of such entity. + * + * "You" (or "Your") shall mean an individual or Legal Entity + * exercising permissions granted by this License. + * + * "Source" form shall mean the preferred form for making modifications, + * including but not limited to software source code, documentation + * source, and configuration files. + * + * "Object" form shall mean any form resulting from mechanical + * transformation or translation of a Source form, including but + * not limited to compiled object code, generated documentation, + * and conversions to other media types. + * + * "Work" shall mean the work of authorship, whether in Source or + * Object form, made available under the License, as indicated by a + * copyright notice that is included in or attached to the work + * (an example is provided in the Appendix below). + * + * "Derivative Works" shall mean any work, whether in Source or Object + * form, that is based on (or derived from) the Work and for which the + * editorial revisions, annotations, elaborations, or other modifications + * represent, as a whole, an original work of authorship. For the purposes + * of this License, Derivative Works shall not include works that remain + * separable from, or merely link (or bind by name) to the interfaces of, + * the Work and Derivative Works thereof. + * + * "Contribution" shall mean any work of authorship, including + * the original version of the Work and any modifications or additions + * to that Work or Derivative Works thereof, that is intentionally + * submitted to Licensor for inclusion in the Work by the copyright owner + * or by an individual or Legal Entity authorized to submit on behalf of + * the copyright owner. For the purposes of this definition, "submitted" + * means any form of electronic, verbal, or written communication sent + * to the Licensor or its representatives, including but not limited to + * communication on electronic mailing lists, source code control systems, + * and issue tracking systems that are managed by, or on behalf of, the + * Licensor for the purpose of discussing and improving the Work, but + * excluding communication that is conspicuously marked or otherwise + * designated in writing by the copyright owner as "Not a Contribution." + * + * "Contributor" shall mean Licensor and any individual or Legal Entity + * on behalf of whom a Contribution has been received by Licensor and + * subsequently incorporated within the Work. + * + * 2. Grant of Copyright License. Subject to the terms and conditions of + * this License, each Contributor hereby grants to You a perpetual, + * worldwide, non-exclusive, no-charge, royalty-free, irrevocable + * copyright license to reproduce, prepare Derivative Works of, + * publicly display, publicly perform, sublicense, and distribute the + * Work and such Derivative Works in Source or Object form. + * + * 3. Grant of Patent License. Subject to the terms and conditions of + * this License, each Contributor hereby grants to You a perpetual, + * worldwide, non-exclusive, no-charge, royalty-free, irrevocable + * (except as stated in this section) patent license to make, have made, + * use, offer to sell, sell, import, and otherwise transfer the Work, + * where such license applies only to those patent claims licensable + * by such Contributor that are necessarily infringed by their + * Contribution(s) alone or by combination of their Contribution(s) + * with the Work to which such Contribution(s) was submitted. If You + * institute patent litigation against any entity (including a + * cross-claim or counterclaim in a lawsuit) alleging that the Work + * or a Contribution incorporated within the Work constitutes direct + * or contributory patent infringement, then any patent licenses + * granted to You under this License for that Work shall terminate + * as of the date such litigation is filed. + * + * 4. Redistribution. You may reproduce and distribute copies of the + * Work or Derivative Works thereof in any medium, with or without + * modifications, and in Source or Object form, provided that You + * meet the following conditions: + * + * (a) You must give any other recipients of the Work or + * Derivative Works a copy of this License; and + * + * (b) You must cause any modified files to carry prominent notices + * stating that You changed the files; and + * + * (c) You must retain, in the Source form of any Derivative Works + * that You distribute, all copyright, patent, trademark, and + * attribution notices from the Source form of the Work, + * excluding those notices that do not pertain to any part of + * the Derivative Works; and + * + * (d) If the Work includes a "NOTICE" text file as part of its + * distribution, then any Derivative Works that You distribute must + * include a readable copy of the attribution notices contained + * within such NOTICE file, excluding those notices that do not + * pertain to any part of the Derivative Works, in at least one + * of the following places: within a NOTICE text file distributed + * as part of the Derivative Works; within the Source form or + * documentation, if provided along with the Derivative Works; or, + * within a display generated by the Derivative Works, if and + * wherever such third-party notices normally appear. The contents + * of the NOTICE file are for informational purposes only and + * do not modify the License. You may add Your own attribution + * notices within Derivative Works that You distribute, alongside + * or as an addendum to the NOTICE text from the Work, provided + * that such additional attribution notices cannot be construed + * as modifying the License. + * + * You may add Your own copyright statement to Your modifications and + * may provide additional or different license terms and conditions + * for use, reproduction, or distribution of Your modifications, or + * for any such Derivative Works as a whole, provided Your use, + * reproduction, and distribution of the Work otherwise complies with + * the conditions stated in this License. + * + * 5. Submission of Contributions. Unless You explicitly state otherwise, + * any Contribution intentionally submitted for inclusion in the Work + * by You to the Licensor shall be under the terms and conditions of + * this License, without any additional terms or conditions. + * Notwithstanding the above, nothing herein shall supersede or modify + * the terms of any separate license agreement you may have executed + * with Licensor regarding such Contributions. + * + * 6. Trademarks. This License does not grant permission to use the trade + * names, trademarks, service marks, or product names of the Licensor, + * except as required for reasonable and customary use in describing the + * origin of the Work and reproducing the content of the NOTICE file. + * + * 7. Disclaimer of Warranty. Unless required by applicable law or + * agreed to in writing, Licensor provides the Work (and each + * Contributor provides its Contributions) on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied, including, without limitation, any warranties or conditions + * of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + * PARTICULAR PURPOSE. You are solely responsible for determining the + * appropriateness of using or redistributing the Work and assume any + * risks associated with Your exercise of permissions under this License. + * + * 8. Limitation of Liability. In no event and under no legal theory, + * whether in tort (including negligence), contract, or otherwise, + * unless required by applicable law (such as deliberate and grossly + * negligent acts) or agreed to in writing, shall any Contributor be + * liable to You for damages, including any direct, indirect, special, + * incidental, or consequential damages of any character arising as a + * result of this License or out of the use or inability to use the + * Work (including but not limited to damages for loss of goodwill, + * work stoppage, computer failure or malfunction, or any and all + * other commercial damages or losses), even if such Contributor + * has been advised of the possibility of such damages. + * + * 9. Accepting Warranty or Additional Liability. While redistributing + * the Work or Derivative Works thereof, You may choose to offer, + * and charge a fee for, acceptance of support, warranty, indemnity, + * or other liability obligations and/or rights consistent with this + * License. However, in accepting such obligations, You may act only + * on Your own behalf and on Your sole responsibility, not on behalf + * of any other Contributor, and only if You agree to indemnify, + * defend, and hold each Contributor harmless for any liability + * incurred by, or claims asserted against, such Contributor by reason + * of your accepting any such warranty or additional liability. + * + * END OF TERMS AND CONDITIONS + * + * APPENDIX: How to apply the Apache License to your work. + * + * To apply the Apache License to your work, attach the following + * boilerplate notice, with the fields enclosed by brackets "[]" + * replaced with your own identifying information. (Don't include + * the brackets!) The text should be enclosed in the appropriate + * comment syntax for the file format. We also recommend that a + * file or class name and description of purpose be included on the + * same "printed page" as the copyright notice for easier + * identification within third-party archives. + * + * Copyright [yyyy] [name of copyright owner] + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +W3C® SOFTWARE NOTICE AND LICENSE +http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 + +This work (and included software, documentation such as READMEs, or other +related items) is being provided by the copyright holders under the following +license. By obtaining, using and/or copying this work, you (the licensee) agree +that you have read, understood, and will comply with the following terms and +conditions. + +Permission to copy, modify, and distribute this software and its documentation, +with or without modification, for any purpose and without fee or royalty is +hereby granted, provided that you include the following on ALL copies of the +software and documentation or portions thereof, including modifications: + + 1. The full text of this NOTICE in a location viewable to users of the + redistributed or derivative work. + 2. Any pre-existing intellectual property disclaimers, notices, or terms + and conditions. If none exist, the W3C Software Short Notice should be + included (hypertext is preferred, text is permitted) within the body + of any redistributed or derivative code. + 3. Notice of any changes or modifications to the files, including the date + changes were made. (We recommend you provide URIs to the location from + which the code is derived.) + +THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND COPYRIGHT HOLDERS MAKE +NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT +THE USE OF THE SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY +PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. + +COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE OF THE SOFTWARE OR DOCUMENTATION. + +The name and trademarks of copyright holders may NOT be used in advertising or +publicity pertaining to the software without specific, written prior permission. +Title to copyright in this software and any associated documentation will at +all times remain with copyright holders. + +____________________________________ + +This formulation of W3C's notice and license became active on December 31 2002. +This version removes the copyright ownership notice such that this license can +be used with materials other than those owned by the W3C, reflects that ERCIM +is now a host of the W3C, includes references to this specific dated version of +the license, and removes the ambiguous grant of "use". Otherwise, this version +is the same as the previous version and is written so as to preserve the Free +Software Foundation's assessment of GPL compatibility and OSI's certification +under the Open Source Definition. Please see our Copyright FAQ for common +questions about using materials from our site, including specific terms and +conditions for packages like libwww, Amaya, and Jigsaw. Other questions about +this notice can be directed to site-policy@w3.org. + +Joseph Reagle + +This license came from: http://www.megginson.com/SAX/copying.html + However please note future versions of SAX may be covered + under http://saxproject.org/?selected=pd + +SAX2 is Free! + +I hereby abandon any property rights to SAX 2.0 (the Simple API for +XML), and release all of the SAX 2.0 source code, compiled code, and +documentation contained in this distribution into the Public Domain. +SAX comes with NO WARRANTY or guarantee of fitness for any +purpose. + +David Megginson, david@megginson.com +2000-05-05 diff --git a/solr/licenses/ant-junit-NOTICE.txt b/solr/licenses/ant-junit-NOTICE.txt new file mode 100644 index 00000000000..203ed03c8b6 --- /dev/null +++ b/solr/licenses/ant-junit-NOTICE.txt @@ -0,0 +1,6 @@ +Apache Ant +Copyright 1999-2008 The Apache Software Foundation + +This product includes software developed by +The Apache Software Foundation (http://www.apache.org/). + diff --git a/solr/licenses/apache-mime4j-core-0.7.2.jar.sha1 b/solr/licenses/apache-mime4j-core-0.7.2.jar.sha1 new file mode 100644 index 00000000000..f5a55e605e2 --- /dev/null +++ b/solr/licenses/apache-mime4j-core-0.7.2.jar.sha1 @@ -0,0 +1 @@ +a81264fe0265ebe8fd1d8128aad06dc320de6eef diff --git a/solr/licenses/apache-mime4j-core-0.7.jar.sha1 b/solr/licenses/apache-mime4j-core-0.7.jar.sha1 deleted file mode 100644 index cba20aa6805..00000000000 --- a/solr/licenses/apache-mime4j-core-0.7.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -a06e8c6d9fa4063df541427686e9f070377b7b8e diff --git a/solr/licenses/apache-mime4j-dom-0.7.2.jar.sha1 b/solr/licenses/apache-mime4j-dom-0.7.2.jar.sha1 new file mode 100644 index 00000000000..9de5694dc6f --- /dev/null +++ b/solr/licenses/apache-mime4j-dom-0.7.2.jar.sha1 @@ -0,0 +1 @@ +1c289aa264548a0a1f1b43685a9cb2ab23f67287 diff --git a/solr/licenses/apache-mime4j-dom-0.7.jar.sha1 b/solr/licenses/apache-mime4j-dom-0.7.jar.sha1 deleted file mode 100644 index 6f39664c03a..00000000000 --- a/solr/licenses/apache-mime4j-dom-0.7.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -f0cbe6fd6f8d21843e6646c2d6490a58f60078a9 diff --git a/solr/licenses/commons-compress-1.3.jar.sha1 b/solr/licenses/commons-compress-1.3.jar.sha1 deleted file mode 100644 index 548906c8b33..00000000000 --- a/solr/licenses/commons-compress-1.3.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -613a08bc72e2e9a43c9f87b9ff8352b47e18e3ae diff --git a/solr/licenses/commons-compress-1.4.1.jar.sha1 b/solr/licenses/commons-compress-1.4.1.jar.sha1 new file mode 100644 index 00000000000..f64e67e42a9 --- /dev/null +++ b/solr/licenses/commons-compress-1.4.1.jar.sha1 @@ -0,0 +1 @@ +b02e84a993d88568417536240e970c4b809126fd diff --git a/solr/licenses/commons-compress-NOTICE.txt b/solr/licenses/commons-compress-NOTICE.txt index 1a1f52c9016..07baa98630b 100644 --- a/solr/licenses/commons-compress-NOTICE.txt +++ b/solr/licenses/commons-compress-NOTICE.txt @@ -1,5 +1,5 @@ Apache Commons Compress -Copyright 2002-2011 The Apache Software Foundation +Copyright 2002-2012 The Apache Software Foundation This product includes software developed by The Apache Software Foundation (http://www.apache.org/). diff --git a/solr/licenses/fontbox-1.6.0.jar.sha1 b/solr/licenses/fontbox-1.6.0.jar.sha1 deleted file mode 100644 index 6beed5764a6..00000000000 --- a/solr/licenses/fontbox-1.6.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -c59ede61204faa09c407a6b1c64a6e21d6f57548 diff --git a/solr/licenses/fontbox-1.7.0.jar.sha1 b/solr/licenses/fontbox-1.7.0.jar.sha1 new file mode 100644 index 00000000000..8be456d1592 --- /dev/null +++ b/solr/licenses/fontbox-1.7.0.jar.sha1 @@ -0,0 +1 @@ +a37220d35b0c673a9543b4853152b57fb77ac513 diff --git a/solr/licenses/fontbox-NOTICE.txt b/solr/licenses/fontbox-NOTICE.txt index effa7efcb83..a2e87e5ff8c 100644 --- a/solr/licenses/fontbox-NOTICE.txt +++ b/solr/licenses/fontbox-NOTICE.txt @@ -1,6 +1,6 @@ Apache FontBox -Copyright 2008-2010 The Apache Software Foundation +Copyright 2008-2012 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/solr/licenses/isoparser-1.0-RC-1.jar.sha1 b/solr/licenses/isoparser-1.0-RC-1.jar.sha1 new file mode 100644 index 00000000000..cdf30714509 --- /dev/null +++ b/solr/licenses/isoparser-1.0-RC-1.jar.sha1 @@ -0,0 +1 @@ +4a5768b1070b9488a433362d736720fd7a7b264f diff --git a/solr/licenses/isoparser-LICENSE-ASL.txt b/solr/licenses/isoparser-1.0-RC-LICENSE-ASL.txt similarity index 100% rename from solr/licenses/isoparser-LICENSE-ASL.txt rename to solr/licenses/isoparser-1.0-RC-LICENSE-ASL.txt diff --git a/solr/licenses/isoparser-NOTICE.txt b/solr/licenses/isoparser-1.0-RC-NOTICE.txt similarity index 87% rename from solr/licenses/isoparser-NOTICE.txt rename to solr/licenses/isoparser-1.0-RC-NOTICE.txt index 21d5546ffb9..4e1b1d9b981 100644 --- a/solr/licenses/isoparser-NOTICE.txt +++ b/solr/licenses/isoparser-1.0-RC-NOTICE.txt @@ -19,8 +19,5 @@ The Apache Software Foundation (http://www.apache.org/). This product includes software (Hex Encoder extracted from commons-codec) developed by The Apache Software Foundation (http://www.apache.org/). -This product includes software (JHexEditor) developed by -Germán Laullón (http://www.apache.org/). - This product includes software (package com.googlecode.mp4parser.h264) developed by -Stanislav Vitvitskiy and originally licensed under MIT license (http://www.opensource.org/licenses/mit-license.php) +Stanislav Vitvitskiy and originally licensed under MIT license (http://www.opensource.org/licenses/mit-license.php) \ No newline at end of file diff --git a/solr/licenses/isoparser-1.0-beta-5.jar.sha1 b/solr/licenses/isoparser-1.0-beta-5.jar.sha1 deleted file mode 100644 index 752abcd2c1b..00000000000 --- a/solr/licenses/isoparser-1.0-beta-5.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -587a6e3f9f6258838b05a7033b16699841a957d5 diff --git a/solr/licenses/javassist-3.6.0.GA.jar.sha1 b/solr/licenses/javassist-3.6.0.GA.jar.sha1 deleted file mode 100644 index 6e23d92d697..00000000000 --- a/solr/licenses/javassist-3.6.0.GA.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -231ae32c1b165696970362bea2ff022ce268f545 diff --git a/solr/licenses/jempbox-1.6.0.jar.sha1 b/solr/licenses/jempbox-1.6.0.jar.sha1 deleted file mode 100644 index 2ad507e0637..00000000000 --- a/solr/licenses/jempbox-1.6.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -b4c612d88a2a86e540edbf1125b1f095513cd65e diff --git a/solr/licenses/jempbox-1.7.0.jar.sha1 b/solr/licenses/jempbox-1.7.0.jar.sha1 new file mode 100644 index 00000000000..22082461164 --- /dev/null +++ b/solr/licenses/jempbox-1.7.0.jar.sha1 @@ -0,0 +1 @@ +d2f3d15fa0182e16e7f04fd37d2003dbd22d3e37 diff --git a/solr/licenses/jempbox-NOTICE.txt b/solr/licenses/jempbox-NOTICE.txt index 4e39ec478bf..a8affe37702 100644 --- a/solr/licenses/jempbox-NOTICE.txt +++ b/solr/licenses/jempbox-NOTICE.txt @@ -1,6 +1,6 @@ Apache JempBox -Copyright 2008-2010 The Apache Software Foundation +Copyright 2008-2012 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/solr/licenses/junit-4.10.jar.sha1 b/solr/licenses/junit-4.10.jar.sha1 new file mode 100644 index 00000000000..875e26cee22 --- /dev/null +++ b/solr/licenses/junit-4.10.jar.sha1 @@ -0,0 +1 @@ +e4f1766ce7404a08f45d859fb9c226fc9e41a861 diff --git a/solr/licenses/junit-LICENSE-CPL.txt b/solr/licenses/junit-LICENSE-CPL.txt new file mode 100644 index 00000000000..4efdc7b204b --- /dev/null +++ b/solr/licenses/junit-LICENSE-CPL.txt @@ -0,0 +1,88 @@ +Common Public License - v 1.0 + + +THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS COMMON PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. + + +1. DEFINITIONS + +"Contribution" means: + +a) in the case of the initial Contributor, the initial code and documentation distributed under this Agreement, and +b) in the case of each subsequent Contributor: +i) changes to the Program, and +ii) additions to the Program; +where such changes and/or additions to the Program originate from and are distributed by that particular Contributor. A Contribution 'originates' from a Contributor if it was added to the Program by such Contributor itself or anyone acting on such Contributor's behalf. Contributions do not include additions to the Program which: (i) are separate modules of software distributed in conjunction with the Program under their own license agreement, and (ii) are not derivative works of the Program. + +"Contributor" means any person or entity that distributes the Program. + + +"Licensed Patents " mean patent claims licensable by a Contributor which are necessarily infringed by the use or sale of its Contribution alone or when combined with the Program. + + +"Program" means the Contributions distributed in accordance with this Agreement. + + +"Recipient" means anyone who receives the Program under this Agreement, including all Contributors. + + +2. GRANT OF RIGHTS + +a) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, distribute and sublicense the Contribution of such Contributor, if any, and such derivative works, in source code and object code form. +b) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed Patents to make, use, sell, offer to sell, import and otherwise transfer the Contribution of such Contributor, if any, in source code and object code form. This patent license shall apply to the combination of the Contribution and the Program if, at the time the Contribution is added by the Contributor, such addition of the Contribution causes such combination to be covered by the Licensed Patents. The patent license shall not apply to any other combinations which include the Contribution. No hardware per se is licensed hereunder. +c) Recipient understands that although each Contributor grants the licenses to its Contributions set forth herein, no assurances are provided by any Contributor that the Program does not infringe the patent or other intellectual property rights of any other entity. Each Contributor disclaims any liability to Recipient for claims brought by any other entity based on infringement of intellectual property rights or otherwise. As a condition to exercising the rights and licenses granted hereunder, each Recipient hereby assumes sole responsibility to secure any other intellectual property rights needed, if any. For example, if a third party patent license is required to allow Recipient to distribute the Program, it is Recipient's responsibility to acquire that license before distributing the Program. +d) Each Contributor represents that to its knowledge it has sufficient copyright rights in its Contribution, if any, to grant the copyright license set forth in this Agreement. +3. REQUIREMENTS + +A Contributor may choose to distribute the Program in object code form under its own license agreement, provided that: + +a) it complies with the terms and conditions of this Agreement; and +b) its license agreement: +i) effectively disclaims on behalf of all Contributors all warranties and conditions, express and implied, including warranties or conditions of title and non-infringement, and implied warranties or conditions of merchantability and fitness for a particular purpose; +ii) effectively excludes on behalf of all Contributors all liability for damages, including direct, indirect, special, incidental and consequential damages, such as lost profits; +iii) states that any provisions which differ from this Agreement are offered by that Contributor alone and not by any other party; and +iv) states that source code for the Program is available from such Contributor, and informs licensees how to obtain it in a reasonable manner on or through a medium customarily used for software exchange. +When the Program is made available in source code form: + +a) it must be made available under this Agreement; and +b) a copy of this Agreement must be included with each copy of the Program. + +Contributors may not remove or alter any copyright notices contained within the Program. + + +Each Contributor must identify itself as the originator of its Contribution, if any, in a manner that reasonably allows subsequent Recipients to identify the originator of the Contribution. + + +4. COMMERCIAL DISTRIBUTION + +Commercial distributors of software may accept certain responsibilities with respect to end users, business partners and the like. While this license is intended to facilitate the commercial use of the Program, the Contributor who includes the Program in a commercial product offering should do so in a manner which does not create potential liability for other Contributors. Therefore, if a Contributor includes the Program in a commercial product offering, such Contributor ("Commercial Contributor") hereby agrees to defend and indemnify every other Contributor ("Indemnified Contributor") against any losses, damages and costs (collectively "Losses") arising from claims, lawsuits and other legal actions brought by a third party against the Indemnified Contributor to the extent caused by the acts or omissions of such Commercial Contributor in connection with its distribution of the Program in a commercial product offering. The obligations in this section do not apply to any claims or Losses relating to any actual or alleged intellectual property infringement. In order to qualify, an Indemnified Contributor must: a) promptly notify the Commercial Contributor in writing of such claim, and b) allow the Commercial Contributor to control, and cooperate with the Commercial Contributor in, the defense and any related settlement negotiations. The Indemnified Contributor may participate in any such claim at its own expense. + + +For example, a Contributor might include the Program in a commercial product offering, Product X. That Contributor is then a Commercial Contributor. If that Commercial Contributor then makes performance claims, or offers warranties related to Product X, those performance claims and warranties are such Commercial Contributor's responsibility alone. Under this section, the Commercial Contributor would have to defend claims against the other Contributors related to those performance claims and warranties, and if a court requires any other Contributor to pay any damages as a result, the Commercial Contributor must pay those damages. + + +5. NO WARRANTY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the appropriateness of using and distributing the Program and assumes all risks associated with its exercise of rights under this Agreement, including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and unavailability or interruption of operations. + + +6. DISCLAIMER OF LIABILITY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + + +7. GENERAL + +If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement, and without further action by the parties hereto, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable. + + +If Recipient institutes patent litigation against a Contributor with respect to a patent applicable to software (including a cross-claim or counterclaim in a lawsuit), then any patent licenses granted by that Contributor to such Recipient under this Agreement shall terminate as of the date such litigation is filed. In addition, if Recipient institutes patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Program itself (excluding combinations of the Program with other software or hardware) infringes such Recipient's patent(s), then such Recipient's rights granted under Section 2(b) shall terminate as of the date such litigation is filed. + + +All Recipient's rights under this Agreement shall terminate if it fails to comply with any of the material terms or conditions of this Agreement and does not cure such failure in a reasonable period of time after becoming aware of such noncompliance. If all Recipient's rights under this Agreement terminate, Recipient agrees to cease use and distribution of the Program as soon as reasonably practicable. However, Recipient's obligations under this Agreement and any licenses granted by Recipient relating to the Program shall continue and survive. + + +Everyone is permitted to copy and distribute copies of this Agreement, but in order to avoid inconsistency the Agreement is copyrighted and may only be modified in the following manner. The Agreement Steward reserves the right to publish new versions (including revisions) of this Agreement from time to time. No one other than the Agreement Steward has the right to modify this Agreement. IBM is the initial Agreement Steward. IBM may assign the responsibility to serve as the Agreement Steward to a suitable separate entity. Each new version of the Agreement will be given a distinguishing version number. The Program (including Contributions) may always be distributed subject to the version of the Agreement under which it was received. In addition, after a new version of the Agreement is published, Contributor may elect to distribute the Program (including its Contributions) under the new version. Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives no rights or licenses to the intellectual property of any Contributor under this Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the Program not expressly granted under this Agreement are reserved. + + +This Agreement is governed by the laws of the State of New York and the intellectual property laws of the United States of America. No party to this Agreement will bring a legal action under this Agreement more than one year after the cause of action arose. Each party waives its rights to a jury trial in any resulting litigation. diff --git a/solr/licenses/junit-NOTICE.txt b/solr/licenses/junit-NOTICE.txt new file mode 100644 index 00000000000..f9796ea6468 --- /dev/null +++ b/solr/licenses/junit-NOTICE.txt @@ -0,0 +1,2 @@ +JUnit (under lib/junit-4.10.jar) is licensed under the Common Public License v. 1.0 +See http://junit.sourceforge.net/cpl-v10.html \ No newline at end of file diff --git a/solr/licenses/junit4-ant-2.0.0.rc5.jar.sha1 b/solr/licenses/junit4-ant-2.0.0.rc5.jar.sha1 new file mode 100644 index 00000000000..2f0a50fd67b --- /dev/null +++ b/solr/licenses/junit4-ant-2.0.0.rc5.jar.sha1 @@ -0,0 +1 @@ +2b08ce9fc1269cbbdbb647ec651d64d501d8c071 \ No newline at end of file diff --git a/solr/licenses/junit4-ant-LICENSE-ASL.txt b/solr/licenses/junit4-ant-LICENSE-ASL.txt new file mode 100755 index 00000000000..7a4a3ea2424 --- /dev/null +++ b/solr/licenses/junit4-ant-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/solr/licenses/junit4-ant-NOTICE.txt b/solr/licenses/junit4-ant-NOTICE.txt new file mode 100755 index 00000000000..3c321aa2516 --- /dev/null +++ b/solr/licenses/junit4-ant-NOTICE.txt @@ -0,0 +1,12 @@ + +JUnit4, parallel JUnit execution for ANT +Copyright 2011-2012 Carrot Search s.c. +http://labs.carrotsearch.com/randomizedtesting.html + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +This product includes asm (asmlib), BSD license +This product includes Google Guava, ASL license +This product includes simple-xml, ASL license +This product includes Google GSON, ASL license diff --git a/solr/licenses/juniversalchardet-1.0.3.jar.sha1 b/solr/licenses/juniversalchardet-1.0.3.jar.sha1 new file mode 100644 index 00000000000..6b06952678f --- /dev/null +++ b/solr/licenses/juniversalchardet-1.0.3.jar.sha1 @@ -0,0 +1 @@ +cd49678784c46aa8789c060538e0154013bb421b diff --git a/solr/licenses/juniversalchardet-LICENSE-MPL.txt b/solr/licenses/juniversalchardet-LICENSE-MPL.txt new file mode 100644 index 00000000000..06f965147a8 --- /dev/null +++ b/solr/licenses/juniversalchardet-LICENSE-MPL.txt @@ -0,0 +1,469 @@ + MOZILLA PUBLIC LICENSE + Version 1.1 + + --------------- + +1. Definitions. + + 1.0.1. "Commercial Use" means distribution or otherwise making the + Covered Code available to a third party. + + 1.1. "Contributor" means each entity that creates or contributes to + the creation of Modifications. + + 1.2. "Contributor Version" means the combination of the Original + Code, prior Modifications used by a Contributor, and the Modifications + made by that particular Contributor. + + 1.3. "Covered Code" means the Original Code or Modifications or the + combination of the Original Code and Modifications, in each case + including portions thereof. + + 1.4. "Electronic Distribution Mechanism" means a mechanism generally + accepted in the software development community for the electronic + transfer of data. + + 1.5. "Executable" means Covered Code in any form other than Source + Code. + + 1.6. "Initial Developer" means the individual or entity identified + as the Initial Developer in the Source Code notice required by Exhibit + A. + + 1.7. "Larger Work" means a work which combines Covered Code or + portions thereof with code not governed by the terms of this License. + + 1.8. "License" means this document. + + 1.8.1. "Licensable" means having the right to grant, to the maximum + extent possible, whether at the time of the initial grant or + subsequently acquired, any and all of the rights conveyed herein. + + 1.9. "Modifications" means any addition to or deletion from the + substance or structure of either the Original Code or any previous + Modifications. When Covered Code is released as a series of files, a + Modification is: + A. Any addition to or deletion from the contents of a file + containing Original Code or previous Modifications. + + B. Any new file that contains any part of the Original Code or + previous Modifications. + + 1.10. "Original Code" means Source Code of computer software code + which is described in the Source Code notice required by Exhibit A as + Original Code, and which, at the time of its release under this + License is not already Covered Code governed by this License. + + 1.10.1. "Patent Claims" means any patent claim(s), now owned or + hereafter acquired, including without limitation, method, process, + and apparatus claims, in any patent Licensable by grantor. + + 1.11. "Source Code" means the preferred form of the Covered Code for + making modifications to it, including all modules it contains, plus + any associated interface definition files, scripts used to control + compilation and installation of an Executable, or source code + differential comparisons against either the Original Code or another + well known, available Covered Code of the Contributor's choice. The + Source Code can be in a compressed or archival form, provided the + appropriate decompression or de-archiving software is widely available + for no charge. + + 1.12. "You" (or "Your") means an individual or a legal entity + exercising rights under, and complying with all of the terms of, this + License or a future version of this License issued under Section 6.1. + For legal entities, "You" includes any entity which controls, is + controlled by, or is under common control with You. For purposes of + this definition, "control" means (a) the power, direct or indirect, + to cause the direction or management of such entity, whether by + contract or otherwise, or (b) ownership of more than fifty percent + (50%) of the outstanding shares or beneficial ownership of such + entity. + +2. Source Code License. + + 2.1. The Initial Developer Grant. + The Initial Developer hereby grants You a world-wide, royalty-free, + non-exclusive license, subject to third party intellectual property + claims: + (a) under intellectual property rights (other than patent or + trademark) Licensable by Initial Developer to use, reproduce, + modify, display, perform, sublicense and distribute the Original + Code (or portions thereof) with or without Modifications, and/or + as part of a Larger Work; and + + (b) under Patents Claims infringed by the making, using or + selling of Original Code, to make, have made, use, practice, + sell, and offer for sale, and/or otherwise dispose of the + Original Code (or portions thereof). + + (c) the licenses granted in this Section 2.1(a) and (b) are + effective on the date Initial Developer first distributes + Original Code under the terms of this License. + + (d) Notwithstanding Section 2.1(b) above, no patent license is + granted: 1) for code that You delete from the Original Code; 2) + separate from the Original Code; or 3) for infringements caused + by: i) the modification of the Original Code or ii) the + combination of the Original Code with other software or devices. + + 2.2. Contributor Grant. + Subject to third party intellectual property claims, each Contributor + hereby grants You a world-wide, royalty-free, non-exclusive license + + (a) under intellectual property rights (other than patent or + trademark) Licensable by Contributor, to use, reproduce, modify, + display, perform, sublicense and distribute the Modifications + created by such Contributor (or portions thereof) either on an + unmodified basis, with other Modifications, as Covered Code + and/or as part of a Larger Work; and + + (b) under Patent Claims infringed by the making, using, or + selling of Modifications made by that Contributor either alone + and/or in combination with its Contributor Version (or portions + of such combination), to make, use, sell, offer for sale, have + made, and/or otherwise dispose of: 1) Modifications made by that + Contributor (or portions thereof); and 2) the combination of + Modifications made by that Contributor with its Contributor + Version (or portions of such combination). + + (c) the licenses granted in Sections 2.2(a) and 2.2(b) are + effective on the date Contributor first makes Commercial Use of + the Covered Code. + + (d) Notwithstanding Section 2.2(b) above, no patent license is + granted: 1) for any code that Contributor has deleted from the + Contributor Version; 2) separate from the Contributor Version; + 3) for infringements caused by: i) third party modifications of + Contributor Version or ii) the combination of Modifications made + by that Contributor with other software (except as part of the + Contributor Version) or other devices; or 4) under Patent Claims + infringed by Covered Code in the absence of Modifications made by + that Contributor. + +3. Distribution Obligations. + + 3.1. Application of License. + The Modifications which You create or to which You contribute are + governed by the terms of this License, including without limitation + Section 2.2. The Source Code version of Covered Code may be + distributed only under the terms of this License or a future version + of this License released under Section 6.1, and You must include a + copy of this License with every copy of the Source Code You + distribute. You may not offer or impose any terms on any Source Code + version that alters or restricts the applicable version of this + License or the recipients' rights hereunder. However, You may include + an additional document offering the additional rights described in + Section 3.5. + + 3.2. Availability of Source Code. + Any Modification which You create or to which You contribute must be + made available in Source Code form under the terms of this License + either on the same media as an Executable version or via an accepted + Electronic Distribution Mechanism to anyone to whom you made an + Executable version available; and if made available via Electronic + Distribution Mechanism, must remain available for at least twelve (12) + months after the date it initially became available, or at least six + (6) months after a subsequent version of that particular Modification + has been made available to such recipients. You are responsible for + ensuring that the Source Code version remains available even if the + Electronic Distribution Mechanism is maintained by a third party. + + 3.3. Description of Modifications. + You must cause all Covered Code to which You contribute to contain a + file documenting the changes You made to create that Covered Code and + the date of any change. You must include a prominent statement that + the Modification is derived, directly or indirectly, from Original + Code provided by the Initial Developer and including the name of the + Initial Developer in (a) the Source Code, and (b) in any notice in an + Executable version or related documentation in which You describe the + origin or ownership of the Covered Code. + + 3.4. Intellectual Property Matters + (a) Third Party Claims. + If Contributor has knowledge that a license under a third party's + intellectual property rights is required to exercise the rights + granted by such Contributor under Sections 2.1 or 2.2, + Contributor must include a text file with the Source Code + distribution titled "LEGAL" which describes the claim and the + party making the claim in sufficient detail that a recipient will + know whom to contact. If Contributor obtains such knowledge after + the Modification is made available as described in Section 3.2, + Contributor shall promptly modify the LEGAL file in all copies + Contributor makes available thereafter and shall take other steps + (such as notifying appropriate mailing lists or newsgroups) + reasonably calculated to inform those who received the Covered + Code that new knowledge has been obtained. + + (b) Contributor APIs. + If Contributor's Modifications include an application programming + interface and Contributor has knowledge of patent licenses which + are reasonably necessary to implement that API, Contributor must + also include this information in the LEGAL file. + + (c) Representations. + Contributor represents that, except as disclosed pursuant to + Section 3.4(a) above, Contributor believes that Contributor's + Modifications are Contributor's original creation(s) and/or + Contributor has sufficient rights to grant the rights conveyed by + this License. + + 3.5. Required Notices. + You must duplicate the notice in Exhibit A in each file of the Source + Code. If it is not possible to put such notice in a particular Source + Code file due to its structure, then You must include such notice in a + location (such as a relevant directory) where a user would be likely + to look for such a notice. If You created one or more Modification(s) + You may add your name as a Contributor to the notice described in + Exhibit A. You must also duplicate this License in any documentation + for the Source Code where You describe recipients' rights or ownership + rights relating to Covered Code. You may choose to offer, and to + charge a fee for, warranty, support, indemnity or liability + obligations to one or more recipients of Covered Code. However, You + may do so only on Your own behalf, and not on behalf of the Initial + Developer or any Contributor. You must make it absolutely clear than + any such warranty, support, indemnity or liability obligation is + offered by You alone, and You hereby agree to indemnify the Initial + Developer and every Contributor for any liability incurred by the + Initial Developer or such Contributor as a result of warranty, + support, indemnity or liability terms You offer. + + 3.6. Distribution of Executable Versions. + You may distribute Covered Code in Executable form only if the + requirements of Section 3.1-3.5 have been met for that Covered Code, + and if You include a notice stating that the Source Code version of + the Covered Code is available under the terms of this License, + including a description of how and where You have fulfilled the + obligations of Section 3.2. The notice must be conspicuously included + in any notice in an Executable version, related documentation or + collateral in which You describe recipients' rights relating to the + Covered Code. You may distribute the Executable version of Covered + Code or ownership rights under a license of Your choice, which may + contain terms different from this License, provided that You are in + compliance with the terms of this License and that the license for the + Executable version does not attempt to limit or alter the recipient's + rights in the Source Code version from the rights set forth in this + License. If You distribute the Executable version under a different + license You must make it absolutely clear that any terms which differ + from this License are offered by You alone, not by the Initial + Developer or any Contributor. You hereby agree to indemnify the + Initial Developer and every Contributor for any liability incurred by + the Initial Developer or such Contributor as a result of any such + terms You offer. + + 3.7. Larger Works. + You may create a Larger Work by combining Covered Code with other code + not governed by the terms of this License and distribute the Larger + Work as a single product. In such a case, You must make sure the + requirements of this License are fulfilled for the Covered Code. + +4. Inability to Comply Due to Statute or Regulation. + + If it is impossible for You to comply with any of the terms of this + License with respect to some or all of the Covered Code due to + statute, judicial order, or regulation then You must: (a) comply with + the terms of this License to the maximum extent possible; and (b) + describe the limitations and the code they affect. Such description + must be included in the LEGAL file described in Section 3.4 and must + be included with all distributions of the Source Code. Except to the + extent prohibited by statute or regulation, such description must be + sufficiently detailed for a recipient of ordinary skill to be able to + understand it. + +5. Application of this License. + + This License applies to code to which the Initial Developer has + attached the notice in Exhibit A and to related Covered Code. + +6. Versions of the License. + + 6.1. New Versions. + Netscape Communications Corporation ("Netscape") may publish revised + and/or new versions of the License from time to time. Each version + will be given a distinguishing version number. + + 6.2. Effect of New Versions. + Once Covered Code has been published under a particular version of the + License, You may always continue to use it under the terms of that + version. You may also choose to use such Covered Code under the terms + of any subsequent version of the License published by Netscape. No one + other than Netscape has the right to modify the terms applicable to + Covered Code created under this License. + + 6.3. Derivative Works. + If You create or use a modified version of this License (which you may + only do in order to apply it to code which is not already Covered Code + governed by this License), You must (a) rename Your license so that + the phrases "Mozilla", "MOZILLAPL", "MOZPL", "Netscape", + "MPL", "NPL" or any confusingly similar phrase do not appear in your + license (except to note that your license differs from this License) + and (b) otherwise make it clear that Your version of the license + contains terms which differ from the Mozilla Public License and + Netscape Public License. (Filling in the name of the Initial + Developer, Original Code or Contributor in the notice described in + Exhibit A shall not of themselves be deemed to be modifications of + this License.) + +7. DISCLAIMER OF WARRANTY. + + COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS, + WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE IS FREE OF + DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. + THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED CODE + IS WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, + YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE + COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER + OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF + ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. + +8. TERMINATION. + + 8.1. This License and the rights granted hereunder will terminate + automatically if You fail to comply with terms herein and fail to cure + such breach within 30 days of becoming aware of the breach. All + sublicenses to the Covered Code which are properly granted shall + survive any termination of this License. Provisions which, by their + nature, must remain in effect beyond the termination of this License + shall survive. + + 8.2. If You initiate litigation by asserting a patent infringement + claim (excluding declatory judgment actions) against Initial Developer + or a Contributor (the Initial Developer or Contributor against whom + You file such action is referred to as "Participant") alleging that: + + (a) such Participant's Contributor Version directly or indirectly + infringes any patent, then any and all rights granted by such + Participant to You under Sections 2.1 and/or 2.2 of this License + shall, upon 60 days notice from Participant terminate prospectively, + unless if within 60 days after receipt of notice You either: (i) + agree in writing to pay Participant a mutually agreeable reasonable + royalty for Your past and future use of Modifications made by such + Participant, or (ii) withdraw Your litigation claim with respect to + the Contributor Version against such Participant. If within 60 days + of notice, a reasonable royalty and payment arrangement are not + mutually agreed upon in writing by the parties or the litigation claim + is not withdrawn, the rights granted by Participant to You under + Sections 2.1 and/or 2.2 automatically terminate at the expiration of + the 60 day notice period specified above. + + (b) any software, hardware, or device, other than such Participant's + Contributor Version, directly or indirectly infringes any patent, then + any rights granted to You by such Participant under Sections 2.1(b) + and 2.2(b) are revoked effective as of the date You first made, used, + sold, distributed, or had made, Modifications made by that + Participant. + + 8.3. If You assert a patent infringement claim against Participant + alleging that such Participant's Contributor Version directly or + indirectly infringes any patent where such claim is resolved (such as + by license or settlement) prior to the initiation of patent + infringement litigation, then the reasonable value of the licenses + granted by such Participant under Sections 2.1 or 2.2 shall be taken + into account in determining the amount or value of any payment or + license. + + 8.4. In the event of termination under Sections 8.1 or 8.2 above, + all end user license agreements (excluding distributors and resellers) + which have been validly granted by You or any distributor hereunder + prior to termination shall survive termination. + +9. LIMITATION OF LIABILITY. + + UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT + (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL + DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED CODE, + OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR + ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY + CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, + WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER + COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN + INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF + LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY + RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW + PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE + EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO + THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. + +10. U.S. GOVERNMENT END USERS. + + The Covered Code is a "commercial item," as that term is defined in + 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer + software" and "commercial computer software documentation," as such + terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 + C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), + all U.S. Government End Users acquire Covered Code with only those + rights set forth herein. + +11. MISCELLANEOUS. + + This License represents the complete agreement concerning subject + matter hereof. If any provision of this License is held to be + unenforceable, such provision shall be reformed only to the extent + necessary to make it enforceable. This License shall be governed by + California law provisions (except to the extent applicable law, if + any, provides otherwise), excluding its conflict-of-law provisions. + With respect to disputes in which at least one party is a citizen of, + or an entity chartered or registered to do business in the United + States of America, any litigation relating to this License shall be + subject to the jurisdiction of the Federal Courts of the Northern + District of California, with venue lying in Santa Clara County, + California, with the losing party responsible for costs, including + without limitation, court costs and reasonable attorneys' fees and + expenses. The application of the United Nations Convention on + Contracts for the International Sale of Goods is expressly excluded. + Any law or regulation which provides that the language of a contract + shall be construed against the drafter shall not apply to this + License. + +12. RESPONSIBILITY FOR CLAIMS. + + As between Initial Developer and the Contributors, each party is + responsible for claims and damages arising, directly or indirectly, + out of its utilization of rights under this License and You agree to + work with Initial Developer and Contributors to distribute such + responsibility on an equitable basis. Nothing herein is intended or + shall be deemed to constitute any admission of liability. + +13. MULTIPLE-LICENSED CODE. + + Initial Developer may designate portions of the Covered Code as + "Multiple-Licensed". "Multiple-Licensed" means that the Initial + Developer permits you to utilize portions of the Covered Code under + Your choice of the NPL or the alternative licenses, if any, specified + by the Initial Developer in the file described in Exhibit A. + +EXHIBIT A -Mozilla Public License. + + ``The contents of this file are subject to the Mozilla Public License + Version 1.1 (the "License"); you may not use this file except in + compliance with the License. You may obtain a copy of the License at + http://www.mozilla.org/MPL/ + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + License for the specific language governing rights and limitations + under the License. + + The Original Code is ______________________________________. + + The Initial Developer of the Original Code is ________________________. + Portions created by ______________________ are Copyright (C) ______ + _______________________. All Rights Reserved. + + Contributor(s): ______________________________________. + + Alternatively, the contents of this file may be used under the terms + of the _____ license (the "[___] License"), in which case the + provisions of [______] License are applicable instead of those + above. If you wish to allow use of your version of this file only + under the terms of the [____] License and not to allow others to use + your version of this file under the MPL, indicate your decision by + deleting the provisions above and replace them with the notice and + other provisions required by the [___] License. If you do not delete + the provisions above, a recipient may use your version of this file + under either the MPL or the [___] License." + + [NOTE: The text of this Exhibit A may differ slightly from the text of + the notices in the Source Code files of the Original Code. You should + use the text of this Exhibit A rather than the text found in the + Original Code Source Code for Your Modifications.] diff --git a/solr/licenses/juniversalchardet-NOTICE.txt b/solr/licenses/juniversalchardet-NOTICE.txt new file mode 100644 index 00000000000..0269c49838b --- /dev/null +++ b/solr/licenses/juniversalchardet-NOTICE.txt @@ -0,0 +1,6 @@ +Project home page: http://code.google.com/p/juniversalchardet/ +Java port by Kohei TAKETA (No copyright specified) + +The library is subject to the Mozilla Public License Version 1.1. +Alternatively, the library may be used under the terms of either the GNU General Public License Version 2 or later, +or the GNU Lesser General Public License 2.1 or later. \ No newline at end of file diff --git a/solr/licenses/pdfbox-1.6.0.jar.sha1 b/solr/licenses/pdfbox-1.6.0.jar.sha1 deleted file mode 100644 index c6b320f17e9..00000000000 --- a/solr/licenses/pdfbox-1.6.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -4fdc454f4925cca53a7140bfc7a61c74f80b2dd8 diff --git a/solr/licenses/pdfbox-1.7.0.jar.sha1 b/solr/licenses/pdfbox-1.7.0.jar.sha1 new file mode 100644 index 00000000000..378b0aae73b --- /dev/null +++ b/solr/licenses/pdfbox-1.7.0.jar.sha1 @@ -0,0 +1 @@ +45c684543f4a59a22aa32ae14bb73ea66cbeccb6 diff --git a/solr/licenses/pdfbox-NOTICE.txt b/solr/licenses/pdfbox-NOTICE.txt index 06f5844677f..0d67b51ff03 100644 --- a/solr/licenses/pdfbox-NOTICE.txt +++ b/solr/licenses/pdfbox-NOTICE.txt @@ -1,10 +1,14 @@ - Apache PDFBox -Copyright 2002-2010 The Apache Software Foundation +Copyright 2011 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). - -Based on source code contributed to the original PDFBox project. +Based on source code originally developed in the PDFBox, JempBox and +FontBox projects. Copyright (c) 2002-2007, www.pdfbox.org +Copyright (c) 2006-2007, www.jempbox.org + +Based on source code originally developed in the PaDaF project. +Copyright (c) 2010 Atos Worldline SAS + diff --git a/solr/licenses/poi-3.8-beta5.jar.sha1 b/solr/licenses/poi-3.8-beta5.jar.sha1 deleted file mode 100644 index d6ad71c5383..00000000000 --- a/solr/licenses/poi-3.8-beta5.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -44521521d5c9a945eb152fbdbbed1b60b0c11766 diff --git a/solr/licenses/poi-3.8.jar.sha1 b/solr/licenses/poi-3.8.jar.sha1 new file mode 100644 index 00000000000..a8115278b04 --- /dev/null +++ b/solr/licenses/poi-3.8.jar.sha1 @@ -0,0 +1 @@ +552a7703d32c57adb611df084b45f7158e8653f3 diff --git a/solr/licenses/poi-ooxml-3.8-beta5.jar.sha1 b/solr/licenses/poi-ooxml-3.8-beta5.jar.sha1 deleted file mode 100644 index 169d3c0747b..00000000000 --- a/solr/licenses/poi-ooxml-3.8-beta5.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -c73aa8c5d6cd0483f1680ea33407931b3151fa42 diff --git a/solr/licenses/poi-ooxml-3.8.jar.sha1 b/solr/licenses/poi-ooxml-3.8.jar.sha1 new file mode 100644 index 00000000000..9daaafac1d3 --- /dev/null +++ b/solr/licenses/poi-ooxml-3.8.jar.sha1 @@ -0,0 +1 @@ +235d18adccc2b140fb3f90a2fa859b7ae29d57b8 diff --git a/solr/licenses/poi-ooxml-schemas-3.8-beta5.jar.sha1 b/solr/licenses/poi-ooxml-schemas-3.8-beta5.jar.sha1 deleted file mode 100644 index c61898456b8..00000000000 --- a/solr/licenses/poi-ooxml-schemas-3.8-beta5.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -171221d31fdac04a68956750b550b8f2aa8d8bbd diff --git a/solr/licenses/poi-ooxml-schemas-3.8.jar.sha1 b/solr/licenses/poi-ooxml-schemas-3.8.jar.sha1 new file mode 100644 index 00000000000..9ade50e03a6 --- /dev/null +++ b/solr/licenses/poi-ooxml-schemas-3.8.jar.sha1 @@ -0,0 +1 @@ +cb3b26809ec65eba22143acfddf654bcf70aa009 diff --git a/solr/licenses/poi-scratchpad-3.8-beta5.jar.sha1 b/solr/licenses/poi-scratchpad-3.8-beta5.jar.sha1 deleted file mode 100644 index 801029ebd67..00000000000 --- a/solr/licenses/poi-scratchpad-3.8-beta5.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -21aa477fd4dcaefe8443f3c5f30dd9b8bec2e3b4 diff --git a/solr/licenses/poi-scratchpad-3.8.jar.sha1 b/solr/licenses/poi-scratchpad-3.8.jar.sha1 new file mode 100644 index 00000000000..b952b556c09 --- /dev/null +++ b/solr/licenses/poi-scratchpad-3.8.jar.sha1 @@ -0,0 +1 @@ +33ef3eb7bd97c0dcdf2873b0e0a0938f013d410c diff --git a/solr/licenses/randomizedtesting-runner-2.0.0.rc5.jar.sha1 b/solr/licenses/randomizedtesting-runner-2.0.0.rc5.jar.sha1 new file mode 100644 index 00000000000..4cc4de98bd4 --- /dev/null +++ b/solr/licenses/randomizedtesting-runner-2.0.0.rc5.jar.sha1 @@ -0,0 +1 @@ +68dbb7c5d90e6b3606a4f207eefbd028d6a68c1a \ No newline at end of file diff --git a/solr/licenses/randomizedtesting-runner-LICENSE-ASL.txt b/solr/licenses/randomizedtesting-runner-LICENSE-ASL.txt new file mode 100755 index 00000000000..7a4a3ea2424 --- /dev/null +++ b/solr/licenses/randomizedtesting-runner-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/solr/licenses/randomizedtesting-runner-NOTICE.txt b/solr/licenses/randomizedtesting-runner-NOTICE.txt new file mode 100755 index 00000000000..e657788259e --- /dev/null +++ b/solr/licenses/randomizedtesting-runner-NOTICE.txt @@ -0,0 +1,12 @@ + +RandomizedRunner, a JUnit @Runner for randomized tests (and more) +Copyright 2011-2012 Carrot Search s.c. +http://labs.carrotsearch.com/randomizedtesting.html + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +This product includes asm (asmlib), BSD license +This product includes Google Guava, ASL license +This product includes simple-xml, ASL license +This product includes Google GSON, ASL license diff --git a/solr/licenses/scannotation-1.0.2.jar.sha1 b/solr/licenses/scannotation-1.0.2.jar.sha1 deleted file mode 100644 index 9a2bf962c4b..00000000000 --- a/solr/licenses/scannotation-1.0.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -e2b4559236410970da0494ca2a24991ccb53fca7 diff --git a/solr/licenses/tika-core-1.1.jar.sha1 b/solr/licenses/tika-core-1.1.jar.sha1 deleted file mode 100644 index 5af1c9fd4a7..00000000000 --- a/solr/licenses/tika-core-1.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -3bf3a154705ef65df9dbe70be44c2066d1ee8a9e diff --git a/solr/licenses/tika-core-1.2.jar.sha1 b/solr/licenses/tika-core-1.2.jar.sha1 new file mode 100644 index 00000000000..29d34e76334 --- /dev/null +++ b/solr/licenses/tika-core-1.2.jar.sha1 @@ -0,0 +1 @@ +d17ad28bb6030b58449edeed397d87a5661e8cc1 diff --git a/solr/licenses/tika-core-NOTICE.txt b/solr/licenses/tika-core-NOTICE.txt index ceecc5f4da9..a1bf6205886 100644 --- a/solr/licenses/tika-core-NOTICE.txt +++ b/solr/licenses/tika-core-NOTICE.txt @@ -1,8 +1,15 @@ - -Apache Tika core -Copyright 2007-2010 The Apache Software Foundation +Apache Tika +Copyright 2011 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). +Copyright 1993-2010 University Corporation for Atmospheric Research/Unidata +This software contains code derived from UCAR/Unidata's NetCDF library. +Tika-server compoment uses CDDL-licensed dependencies: jersey (http://jersey.java.net/) and +Grizzly (http://grizzly.java.net/) + +OpenCSV: Copyright 2005 Bytecode Pty Ltd. Licensed under the Apache License, Version 2.0 + +IPTC Photo Metadata descriptions Copyright 2010 International Press Telecommunications Council. diff --git a/solr/licenses/tika-parsers-1.1.jar.sha1 b/solr/licenses/tika-parsers-1.1.jar.sha1 deleted file mode 100644 index acac68d29be..00000000000 --- a/solr/licenses/tika-parsers-1.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -ffdc50beb2f969ef67943726858649572af670e1 diff --git a/solr/licenses/tika-parsers-1.2.jar.sha1 b/solr/licenses/tika-parsers-1.2.jar.sha1 new file mode 100644 index 00000000000..c26487e3bdc --- /dev/null +++ b/solr/licenses/tika-parsers-1.2.jar.sha1 @@ -0,0 +1 @@ +ddc3e6ad93bca50e5a0267087d8ced26863bd64d diff --git a/solr/licenses/tika-parsers-NOTICE.txt b/solr/licenses/tika-parsers-NOTICE.txt index ff78126d9e6..4a1d9e082e3 100644 --- a/solr/licenses/tika-parsers-NOTICE.txt +++ b/solr/licenses/tika-parsers-NOTICE.txt @@ -1,8 +1,15 @@ - Apache Tika parsers -Copyright 2007-2010 The Apache Software Foundation +Copyright 2011 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). +Copyright 1993-2010 University Corporation for Atmospheric Research/Unidata +This software contains code derived from UCAR/Unidata's NetCDF library. +Tika-server compoment uses CDDL-licensed dependencies: jersey (http://jersey.java.net/) and +Grizzly (http://grizzly.java.net/) + +OpenCSV: Copyright 2005 Bytecode Pty Ltd. Licensed under the Apache License, Version 2.0 + +IPTC Photo Metadata descriptions Copyright 2010 International Press Telecommunications Council. diff --git a/solr/licenses/xz-1.0.jar.sha1 b/solr/licenses/xz-1.0.jar.sha1 new file mode 100644 index 00000000000..9e449a135b4 --- /dev/null +++ b/solr/licenses/xz-1.0.jar.sha1 @@ -0,0 +1 @@ +ecff5cb8b1189514c9d1d8d68eb77ac372e000c9 diff --git a/solr/licenses/xz-LICENSE-PD.txt b/solr/licenses/xz-LICENSE-PD.txt new file mode 100644 index 00000000000..6433d1d9c79 --- /dev/null +++ b/solr/licenses/xz-LICENSE-PD.txt @@ -0,0 +1,8 @@ +XZ for Java 1.0 (2011-10-22) + +http://tukaani.org/xz/java.html + +This Java implementation of XZ has been put into the public domain, +thus you can do whatever you want with it. All the files in the package +have been written by Lasse Collin, but some files are heavily based +on public domain code written by Igor Pavlov. \ No newline at end of file diff --git a/solr/licenses/xz-NOTICE.txt b/solr/licenses/xz-NOTICE.txt new file mode 100644 index 00000000000..6433d1d9c79 --- /dev/null +++ b/solr/licenses/xz-NOTICE.txt @@ -0,0 +1,8 @@ +XZ for Java 1.0 (2011-10-22) + +http://tukaani.org/xz/java.html + +This Java implementation of XZ has been put into the public domain, +thus you can do whatever you want with it. All the files in the package +have been written by Lasse Collin, but some files are heavily based +on public domain code written by Igor Pavlov. \ No newline at end of file diff --git a/solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java b/solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java index 7f154845054..b47f89e36a3 100755 --- a/solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java @@ -76,6 +76,7 @@ public abstract class ContentStreamBase implements ContentStream sourceInfo = "url"; } + @Override public InputStream getStream() throws IOException { URLConnection conn = this.url.openConnection(); @@ -102,37 +103,33 @@ public abstract class ContentStreamBase implements ContentStream sourceInfo = file.toURI().toString(); } + @Override public String getContentType() { if(contentType==null) { + InputStream stream = null; try { - char first = (char)new FileInputStream( file ).read(); + stream = new FileInputStream(file); + char first = (char)stream.read(); if(first == '<') { return "application/xml"; } if(first == '{') { return "application/json"; } + } catch(Exception ex) { + } finally { + if (stream != null) try { + stream.close(); + } catch (IOException ioe) {} } - catch(Exception ex) {} } return contentType; } + @Override public InputStream getStream() throws IOException { return new FileInputStream( file ); } - - /** - * If an charset is defined (by the contentType) use that, otherwise - * use a UTF-8 reader - */ - @Override - public Reader getReader() throws IOException { - String charset = getCharsetFromContentType( contentType ); - return charset == null - ? new InputStreamReader(getStream(), "UTF-8") - : new InputStreamReader( getStream(), charset ); - } } @@ -152,6 +149,7 @@ public abstract class ContentStreamBase implements ContentStream sourceInfo = "string"; } + @Override public String getContentType() { if(contentType==null && str.length() > 0) { char first = str.charAt(0); @@ -166,6 +164,7 @@ public abstract class ContentStreamBase implements ContentStream return contentType; } + @Override public InputStream getStream() throws IOException { return new ByteArrayInputStream( str.getBytes(DEFAULT_CHARSET) ); } diff --git a/solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java b/solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java index 2dd565d9638..89c3fa6e4a8 100755 --- a/solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java @@ -57,8 +57,8 @@ public class FastOutputStream extends OutputStream implements DataOutput { public void write(byte b) throws IOException { if (pos >= buf.length) { - flush(buf, 0, buf.length); written += pos; + flush(buf, 0, buf.length); pos=0; } buf[pos++] = b; @@ -66,29 +66,40 @@ public class FastOutputStream extends OutputStream implements DataOutput { @Override public void write(byte arr[], int off, int len) throws IOException { - int space = buf.length - pos; - if (len < space) { - System.arraycopy(arr, off, buf, pos, len); - pos += len; - } else if (len0) { - flush(buf,0,pos); // flush - written += pos; - pos=0; + + for(;;) { + int space = buf.length - pos; + + if (len <= space) { + System.arraycopy(arr, off, buf, pos, len); + pos += len; + return; + } else if (len > buf.length) { + if (pos>0) { + flush(buf,0,pos); // flush + written += pos; + pos=0; + } + // don't buffer, just write to sink + flush(arr, off, len); + written += len; + return; } - // don't buffer, just write to sink - flush(arr, off, len); - written += len; + + // buffer is too big to fit in the free space, but + // not big enough to warrant writing on its own. + // write whatever we can fit, then flush and iterate. + + System.arraycopy(arr, off, buf, pos, space); + written += buf.length; // important to do this first, since buf.length can change after a flush! + flush(buf, 0, buf.length); + pos = 0; + off += space; + len -= space; } } + /** reserve at least len bytes at the end of the buffer. * Invalid if len > buffer.length * @param len @@ -182,8 +193,8 @@ public class FastOutputStream extends OutputStream implements DataOutput { */ public void flushBuffer() throws IOException { if (pos > 0) { - flush(buf, 0, pos); written += pos; + flush(buf, 0, pos); pos=0; } } diff --git a/solr/solrj/src/test/org/apache/solr/common/util/ContentStreamTest.java b/solr/solrj/src/test/org/apache/solr/common/util/ContentStreamTest.java index a63d1bdd4cb..65dae081a3f 100755 --- a/solr/solrj/src/test/org/apache/solr/common/util/ContentStreamTest.java +++ b/solr/solrj/src/test/org/apache/solr/common/util/ContentStreamTest.java @@ -17,22 +17,16 @@ package org.apache.solr.common.util; -import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.io.StringReader; -import java.net.ConnectException; -import java.net.HttpURLConnection; import java.net.URL; -import java.net.URLConnection; import org.apache.commons.io.IOUtils; import org.apache.lucene.util.LuceneTestCase; -import org.apache.solr.common.util.ContentStreamBase; import org.apache.solr.core.SolrResourceLoader; /** @@ -66,50 +60,17 @@ public class ContentStreamTest extends LuceneTestCase public void testURLStream() throws IOException { - byte[] content = null; - String contentType = null; - URL url = new URL( "http://svn.apache.org/repos/asf/lucene/dev/trunk/" ); - InputStream in = null; - try { - HttpURLConnection conn = (HttpURLConnection)url.openConnection(); - conn.setConnectTimeout(1000); - conn.setReadTimeout(1000); - conn.connect(); - int code = conn.getResponseCode(); - assumeTrue("wrong response code from server: " + code, 200 == code); - in = conn.getInputStream(); - contentType = conn.getContentType(); - content = IOUtils.toByteArray(in); - - assumeTrue("not enough content for test to be useful", - content.length > 10 ); - - } catch (IOException ex) { - assumeNoException("Unable to connect to " + url + " to run the test.", ex); - }finally { - if (in != null) { - IOUtils.closeQuietly(in); - } - } + InputStream is = new SolrResourceLoader(null, null).openResource( "solrj/README" ); + assertNotNull( is ); + File file = new File(TEMP_DIR, "README"); + FileOutputStream os = new FileOutputStream(file); + IOUtils.copy(is, os); + os.close(); - - ContentStreamBase stream = new ContentStreamBase.URLStream( url ); - in = stream.getStream(); // getStream is needed before getSize is valid - assertEquals( content.length, stream.getSize().intValue() ); - - try { - assertTrue( IOUtils.contentEquals( - new ByteArrayInputStream(content), in ) ); - } - finally { - IOUtils.closeQuietly(in); - } - - String charset = ContentStreamBase.getCharsetFromContentType(contentType); - if (charset == null) - charset = ContentStreamBase.DEFAULT_CHARSET; - // Re-open the stream and this time use a reader - stream = new ContentStreamBase.URLStream( url ); - assertTrue( IOUtils.contentEquals( new StringReader(new String(content, charset)), stream.getReader() ) ); + ContentStreamBase stream = new ContentStreamBase.URLStream( new URL(file.toURI().toASCIIString()) ); + assertTrue( IOUtils.contentEquals( new FileInputStream( file ), stream.getStream() ) ); + assertEquals( file.length(), stream.getSize().intValue() ); + assertTrue( IOUtils.contentEquals( new InputStreamReader(new FileInputStream(file), "UTF-8"), stream.getReader() ) ); + assertEquals( file.length(), stream.getSize().intValue() ); } } diff --git a/solr/test-framework/README.txt b/solr/test-framework/README.txt new file mode 100644 index 00000000000..f19e6ed8f20 --- /dev/null +++ b/solr/test-framework/README.txt @@ -0,0 +1,6 @@ +The Solr test-framework products base classes and utility classes for +writting JUnit tests excercising Solr functionality. + +This test framework replies on the lucene components found in in the +./lucene-libs/ directory, as well as the third-party libraries found +in the ./lib directory. diff --git a/solr/test-framework/build.xml b/solr/test-framework/build.xml index 05699afffdd..ead58797469 100644 --- a/solr/test-framework/build.xml +++ b/solr/test-framework/build.xml @@ -20,10 +20,14 @@ - - - - + + + + + + + + @@ -39,19 +43,26 @@ - + + depends="compile-core,jar-test-framework,lucene-javadocs,javadocs-test-framework,define-lucene-javadoc-url"> + - - - - + title="${Name} ${version} Test Framework API"> + + + + @@ -60,5 +71,28 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/test-framework/ivy.xml b/solr/test-framework/ivy.xml index 812984d0175..67a2c048bdd 100644 --- a/solr/test-framework/ivy.xml +++ b/solr/test-framework/ivy.xml @@ -17,5 +17,25 @@ under the License. --> - + + + + + + + + + + + + + + + + + + diff --git a/solr/test-framework/src/java/org/apache/solr/core/MockDirectoryFactory.java b/solr/test-framework/src/java/org/apache/solr/core/MockDirectoryFactory.java index d0061a7c91b..4c55dd376be 100644 --- a/solr/test-framework/src/java/org/apache/solr/core/MockDirectoryFactory.java +++ b/solr/test-framework/src/java/org/apache/solr/core/MockDirectoryFactory.java @@ -32,10 +32,12 @@ public class MockDirectoryFactory extends CachingDirectoryFactory { @Override protected Directory create(String path) throws IOException { Directory dir = LuceneTestCase.newDirectory(); - // Somehow removing unref'd files in Solr tests causes - // problems... there's some interaction w/ - // CachingDirectoryFactory. Once we track down where Solr - // isn't closing an IW, we can re-enable this: + // we can't currently do this check because of how + // Solr has to reboot a new Directory sometimes when replicating + // or rolling back - the old directory is closed and the following + // test assumes it can open an IndexWriter when that happens - we + // have a new Directory for the same dir and still an open IW at + // this point if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).setAssertNoUnrefencedFilesOnClose(false); } diff --git a/solr/test-framework/src/java/org/apache/solr/core/MockFSDirectoryFactory.java b/solr/test-framework/src/java/org/apache/solr/core/MockFSDirectoryFactory.java index c83e602f9e9..4a23fbc5d5f 100644 --- a/solr/test-framework/src/java/org/apache/solr/core/MockFSDirectoryFactory.java +++ b/solr/test-framework/src/java/org/apache/solr/core/MockFSDirectoryFactory.java @@ -32,10 +32,12 @@ public class MockFSDirectoryFactory extends CachingDirectoryFactory { @Override public Directory create(String path) throws IOException { Directory dir = LuceneTestCase.newFSDirectory(new File(path)); - // Somehow removing unref'd files in Solr tests causes - // problems... there's some interaction w/ - // CachingDirectoryFactory. Once we track down where Solr - // isn't closing an IW, we can re-enable this: + // we can't currently do this check because of how + // Solr has to reboot a new Directory sometimes when replicating + // or rolling back - the old directory is closed and the following + // test assumes it can open an IndexWriter when that happens - we + // have a new Directory for the same dir and still an open IW at + // this point if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).setAssertNoUnrefencedFilesOnClose(false); }