mirror of https://github.com/apache/lucene.git
LUCENE-3892: merge trunk
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1374578 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
bfcd96c689
|
@ -145,21 +145,11 @@
|
|||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/bcmail-jdk15-1.45.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/bcprov-jdk15-1.45.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/boilerpipe-1.1.0.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/commons-compress-1.3.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/dom4j-1.6.1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/fontbox-1.6.0.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/jempbox-1.6.0.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/metadata-extractor-2.4.0-beta-1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/netcdf-4.2-min.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/pdfbox-1.6.0.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-3.8-beta5.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-ooxml-3.8-beta5.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-ooxml-schemas-3.8-beta5.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-scratchpad-3.8-beta5.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/rome-0.9.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tagsoup-1.2.1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-core-1.1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-parsers-1.1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/xmlbeans-2.3.0.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/langid/lib/langdetect-1.1-20120112.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/langid/lib/jsonic-1.2.7.jar"/>
|
||||
|
@ -175,5 +165,25 @@
|
|||
<classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-collections-3.2.1.jar"/>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
||||
<classpathentry kind="lib" path="lucene/test-framework/lib/randomizedtesting-runner-2.0.0.rc5.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/apache-mime4j-core-0.7.2.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/apache-mime4j-dom-0.7.2.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/commons-compress-1.4.1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/fontbox-1.7.0.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/icu4j-49.1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/isoparser-1.0-RC-1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/jdom-1.0.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/jempbox-1.7.0.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/juniversalchardet-1.0.3.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/pdfbox-1.7.0.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-3.8.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-ooxml-3.8.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-ooxml-schemas-3.8.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-scratchpad-3.8.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-core-1.2.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-parsers-1.2.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/vorbis-java-core-0.1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/vorbis-java-tika-0.1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/xercesImpl-2.9.1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/xz-1.0.jar"/>
|
||||
<classpathentry kind="output" path="bin/other"/>
|
||||
</classpath>
|
||||
|
|
|
@ -75,6 +75,14 @@ Bug Fixes
|
|||
encoders / stemmers via the ResourceLoader now instead of Class.forName().
|
||||
Solr users should now no longer have to embed these in its war. (David Smiley)
|
||||
|
||||
* SOLR-3737: StempelPolishStemFilterFactory loaded its stemmer table incorrectly.
|
||||
Also, ensure immutability and use only one instance of this table in RAM (lazy
|
||||
loaded) since its quite large. (sausarkar, Steven Rowe, Robert Muir)
|
||||
|
||||
* LUCENE-4310: MappingCharFilter was failing to match input strings
|
||||
containing non-BMP Unicode characters. (Dawid Weiss, Robert Muir,
|
||||
Mike McCandless)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for
|
||||
|
|
|
@ -111,9 +111,8 @@ public class NormalizeCharMap {
|
|||
final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
|
||||
final IntsRef scratch = new IntsRef();
|
||||
for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
|
||||
builder.add(Util.toUTF32(ent.getKey(), scratch),
|
||||
builder.add(Util.toUTF16(ent.getKey(), scratch),
|
||||
new CharsRef(ent.getValue()));
|
||||
|
||||
}
|
||||
map = builder.finish();
|
||||
pendingPairs.clear();
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.CharFilter;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
||||
|
@ -55,6 +56,11 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
builder.add( "empty", "" );
|
||||
|
||||
// BMP (surrogate pair):
|
||||
builder.add(UnicodeUtil.newString(new int[] {0x1D122}, 0, 1), "fclef");
|
||||
|
||||
builder.add("\uff01", "full-width-exclamation");
|
||||
|
||||
normMap = builder.build();
|
||||
}
|
||||
|
||||
|
@ -128,6 +134,18 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
|||
assertTokenStreamContents(ts, new String[0], new int[]{}, new int[]{}, 5);
|
||||
}
|
||||
|
||||
public void testNonBMPChar() throws Exception {
|
||||
CharFilter cs = new MappingCharFilter( normMap, new StringReader( UnicodeUtil.newString(new int[] {0x1D122}, 0, 1) ) );
|
||||
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||
assertTokenStreamContents(ts, new String[]{"fclef"}, new int[]{0}, new int[]{2}, 2);
|
||||
}
|
||||
|
||||
public void testFullWidthChar() throws Exception {
|
||||
CharFilter cs = new MappingCharFilter( normMap, new StringReader( "\uff01") );
|
||||
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||
assertTokenStreamContents(ts, new String[]{"full-width-exclamation"}, new int[]{0}, new int[]{1}, 1);
|
||||
}
|
||||
|
||||
//
|
||||
// 1111111111222
|
||||
// 01234567890123456789012
|
||||
|
|
|
@ -58,6 +58,13 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
|
|||
return DefaultsHolder.DEFAULT_STOP_SET;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an unmodifiable instance of the default stemmer table.
|
||||
*/
|
||||
public static Trie getDefaultTable() {
|
||||
return DefaultsHolder.DEFAULT_TABLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
|
||||
* accesses the static final set the first time.;
|
||||
|
|
|
@ -17,28 +17,17 @@ package org.apache.lucene.analysis.stempel;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.pl.PolishAnalyzer;
|
||||
import org.apache.lucene.analysis.stempel.StempelFilter;
|
||||
import org.apache.lucene.analysis.stempel.StempelStemmer;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.egothor.stemmer.Trie;
|
||||
|
||||
/**
|
||||
* Factory for {@link StempelFilter} using a Polish stemming table.
|
||||
*/
|
||||
public class StempelPolishStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
private Trie stemmer = null;
|
||||
private static final String STEMTABLE = "/org/apache/lucene/analysis/pl/stemmer_20000.tbl";
|
||||
|
||||
public class StempelPolishStemFilterFactory extends TokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new StempelFilter(input, new StempelStemmer(stemmer));
|
||||
}
|
||||
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
stemmer = StempelStemmer.load(loader.openResource(STEMTABLE));
|
||||
return new StempelFilter(input, new StempelStemmer(PolishAnalyzer.getDefaultTable()));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -332,7 +332,7 @@ public class Trie {
|
|||
* @param key the key
|
||||
* @param cmd the patch command
|
||||
*/
|
||||
public void add(CharSequence key, CharSequence cmd) {
|
||||
void add(CharSequence key, CharSequence cmd) {
|
||||
if (key == null || cmd == null) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.io.StringReader;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
||||
|
||||
/**
|
||||
* Tests for {@link StempelPolishStemFilterFactory}
|
||||
|
@ -31,7 +30,6 @@ public class TestStempelPolishStemFilterFactory extends BaseTokenStreamTestCase
|
|||
public void testBasics() throws Exception {
|
||||
StringReader document = new StringReader("studenta studenci");
|
||||
StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory();
|
||||
factory.inform(new ClasspathResourceLoader(getClass()));
|
||||
TokenStream ts = factory.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, document));
|
||||
assertTokenStreamContents(ts,
|
||||
new String[] { "student", "student" });
|
||||
|
|
|
@ -234,10 +234,10 @@
|
|||
<check-missing-javadocs dir="build/docs/analyzers-stempel" level="class"/>
|
||||
<check-missing-javadocs dir="build/docs/analyzers-uima" level="class"/>
|
||||
<check-missing-javadocs dir="build/docs/benchmark" level="class"/>
|
||||
<!-- core: problems -->
|
||||
<check-missing-javadocs dir="build/docs/core" level="class"/>
|
||||
<check-missing-javadocs dir="build/docs/demo" level="class"/>
|
||||
<check-missing-javadocs dir="build/docs/facet" level="class"/>
|
||||
<!-- grouping: problems -->
|
||||
<check-missing-javadocs dir="build/docs/grouping" level="class"/>
|
||||
<!-- highlighter: problems -->
|
||||
<check-missing-javadocs dir="build/docs/join" level="class"/>
|
||||
<check-missing-javadocs dir="build/docs/memory" level="class"/>
|
||||
|
@ -247,7 +247,7 @@
|
|||
<check-missing-javadocs dir="build/docs/sandbox" level="class"/>
|
||||
<!-- spatial: problems -->
|
||||
<check-missing-javadocs dir="build/docs/suggest" level="class"/>
|
||||
<!-- test-framework: problems -->
|
||||
<check-missing-javadocs dir="build/docs/test-framework" level="class"/>
|
||||
</sequential>
|
||||
</target>
|
||||
|
||||
|
|
|
@ -26,11 +26,15 @@ import org.apache.lucene.index.TermState;
|
|||
* terms dict.
|
||||
*/
|
||||
public class BlockTermState extends OrdTermState {
|
||||
public int docFreq; // how many docs have this term
|
||||
public long totalTermFreq; // total number of occurrences of this term
|
||||
/** how many docs have this term */
|
||||
public int docFreq;
|
||||
/** total number of occurrences of this term */
|
||||
public long totalTermFreq;
|
||||
|
||||
public int termBlockOrd; // the term's ord in the current block
|
||||
public long blockFilePointer; // fp into the terms dict primary file (_X.tim) that holds this term
|
||||
/** the term's ord in the current block */
|
||||
public int termBlockOrd;
|
||||
/** fp into the terms dict primary file (_X.tim) that holds this term */
|
||||
public long blockFilePointer;
|
||||
|
||||
@Override
|
||||
public void copyFrom(TermState _other) {
|
||||
|
|
|
@ -36,7 +36,7 @@ import org.apache.lucene.util.MathUtil;
|
|||
*/
|
||||
|
||||
public abstract class MultiLevelSkipListReader {
|
||||
// the maximum number of skip levels possible for this index
|
||||
/** the maximum number of skip levels possible for this index */
|
||||
protected int maxNumberOfSkipLevels;
|
||||
|
||||
// number of levels in this skip list
|
||||
|
|
|
@ -52,7 +52,7 @@ import org.apache.lucene.util.MathUtil;
|
|||
*/
|
||||
|
||||
public abstract class MultiLevelSkipListWriter {
|
||||
// number of levels in this skip list
|
||||
/** number of levels in this skip list */
|
||||
protected int numberOfSkipLevels;
|
||||
|
||||
// the skip interval in the list with level = 0
|
||||
|
@ -93,8 +93,8 @@ public abstract class MultiLevelSkipListWriter {
|
|||
}
|
||||
}
|
||||
|
||||
/** creates new buffers or empties the existing ones */
|
||||
protected void resetSkip() {
|
||||
// creates new buffers or empties the existing ones
|
||||
if (skipBuffer == null) {
|
||||
init();
|
||||
} else {
|
||||
|
|
|
@ -1796,7 +1796,7 @@ public class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
// Docs + freqs:
|
||||
public final static class HighFreqDocsEnum extends DocsEnum {
|
||||
private final static class HighFreqDocsEnum extends DocsEnum {
|
||||
private int[] docIDs;
|
||||
private int[] freqs;
|
||||
private final Bits liveDocs;
|
||||
|
@ -1969,7 +1969,7 @@ public class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
// TODO: specialize offsets and not
|
||||
public final static class HighFreqDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||
private final static class HighFreqDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||
private int[] docIDs;
|
||||
private int[] freqs;
|
||||
private int[][] positions;
|
||||
|
|
|
@ -36,7 +36,7 @@ public abstract class IntIndexInput implements Closeable {
|
|||
|
||||
public abstract Index index() throws IOException;
|
||||
|
||||
// TODO: -- can we simplify this?
|
||||
/** Records a single skip-point in the {@link IntIndexInput.Reader}. */
|
||||
public abstract static class Index {
|
||||
|
||||
public abstract void read(DataInput indexIn, boolean absolute) throws IOException;
|
||||
|
@ -50,6 +50,7 @@ public abstract class IntIndexInput implements Closeable {
|
|||
public abstract Index clone();
|
||||
}
|
||||
|
||||
/** Reads int values. */
|
||||
public abstract static class Reader {
|
||||
|
||||
/** Reads next single int */
|
||||
|
|
|
@ -38,6 +38,7 @@ public abstract class IntIndexOutput implements Closeable {
|
|||
* >= 0. */
|
||||
public abstract void write(int v) throws IOException;
|
||||
|
||||
/** Records a single skip-point in the IndexOutput. */
|
||||
public abstract static class Index {
|
||||
|
||||
/** Internally records the current location */
|
||||
|
|
|
@ -22,8 +22,15 @@ import org.apache.lucene.store.IOContext;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
/** @lucene.experimental */
|
||||
/** Provides int reader and writer to specified files.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public abstract class IntStreamFactory {
|
||||
/** Create an {@link IntIndexInput} on the provided
|
||||
* fileName. */
|
||||
public abstract IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException;
|
||||
|
||||
/** Create an {@link IntIndexOutput} on the provided
|
||||
* fileName. */
|
||||
public abstract IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException;
|
||||
}
|
||||
|
|
|
@ -119,10 +119,13 @@ public class DocTermOrds {
|
|||
protected final String field;
|
||||
|
||||
protected int numTermsInField;
|
||||
protected long termInstances; // total number of references to term numbers
|
||||
/** total number of references to term numbers */
|
||||
protected long termInstances;
|
||||
private long memsz;
|
||||
protected int total_time; // total time to uninvert the field
|
||||
protected int phase1_time; // time for phase1 of the uninvert process
|
||||
/** total time to uninvert the field */
|
||||
protected int total_time;
|
||||
/** time for phase1 of the uninvert process */
|
||||
protected int phase1_time;
|
||||
|
||||
protected int[] index;
|
||||
protected byte[][] tnums = new byte[256][];
|
||||
|
@ -234,7 +237,7 @@ public class DocTermOrds {
|
|||
protected void setActualDocFreq(int termNum, int df) throws IOException {
|
||||
}
|
||||
|
||||
// Call this only once (if you subclass!)
|
||||
/** Call this only once (if you subclass!) */
|
||||
protected void uninvert(final AtomicReader reader, final BytesRef termPrefix) throws IOException {
|
||||
//System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
|
||||
final long startTime = System.currentTimeMillis();
|
||||
|
|
|
@ -267,11 +267,11 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||
return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType);
|
||||
}
|
||||
|
||||
// NOTE: this method does not carry over termVector
|
||||
// booleans nor docValuesType; the indexer chain
|
||||
// (TermVectorsConsumerPerField, DocFieldProcessor) must
|
||||
// set these fields when they succeed in consuming
|
||||
// the document:
|
||||
/** NOTE: this method does not carry over termVector
|
||||
* booleans nor docValuesType; the indexer chain
|
||||
* (TermVectorsConsumerPerField, DocFieldProcessor) must
|
||||
* set these fields when they succeed in consuming
|
||||
* the document */
|
||||
public FieldInfo addOrUpdate(String name, IndexableFieldType fieldType) {
|
||||
// TODO: really, indexer shouldn't even call this
|
||||
// method (it's only called from DocFieldProcessor);
|
||||
|
|
|
@ -243,6 +243,10 @@ public abstract class MergePolicy implements java.io.Closeable, Cloneable {
|
|||
}
|
||||
}
|
||||
|
||||
/** Thrown when a merge was explicity aborted because
|
||||
* {@link IndexWriter#close(boolean)} was called with
|
||||
* <code>false</code>. Normally this exception is
|
||||
* privately caught and suppresed by {@link IndexWriter}. */
|
||||
public static class MergeAbortedException extends IOException {
|
||||
public MergeAbortedException() {
|
||||
super("merge is aborted");
|
||||
|
|
|
@ -29,6 +29,9 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* @lucene.experimental */
|
||||
public class MergeState {
|
||||
|
||||
/**
|
||||
* Remaps docids around deletes during merge
|
||||
*/
|
||||
public static abstract class DocMap {
|
||||
private final Bits liveDocs;
|
||||
|
||||
|
@ -197,6 +200,9 @@ public class MergeState {
|
|||
public SegmentReader[] matchingSegmentReaders;
|
||||
public int matchedCount;
|
||||
|
||||
/**
|
||||
* Class for recording units of work when merging segments.
|
||||
*/
|
||||
public static class CheckAbort {
|
||||
private double workCount;
|
||||
private final MergePolicy.OneMerge merge;
|
||||
|
|
|
@ -43,7 +43,7 @@ import org.apache.lucene.util.packed.PackedInts.Reader;
|
|||
* @lucene.experimental
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class MultiDocValues extends DocValues {
|
||||
class MultiDocValues extends DocValues {
|
||||
|
||||
private static DocValuesPuller DEFAULT_PULLER = new DocValuesPuller();
|
||||
private static final DocValuesPuller NORMS_PULLER = new DocValuesPuller() {
|
||||
|
|
|
@ -143,6 +143,8 @@ public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
}
|
||||
|
||||
// TODO: implement bulk read more efficiently than super
|
||||
/** Holds a {@link DocsAndPositionsEnum} along with the
|
||||
* corresponding {@link ReaderSlice}. */
|
||||
public final static class EnumWithSlice {
|
||||
public DocsAndPositionsEnum docsAndPositionsEnum;
|
||||
public ReaderSlice slice;
|
||||
|
|
|
@ -122,6 +122,8 @@ public final class MultiDocsEnum extends DocsEnum {
|
|||
}
|
||||
|
||||
// TODO: implement bulk read more efficiently than super
|
||||
/** Holds a {@link DocsEnum} along with the
|
||||
* corresponding {@link ReaderSlice}. */
|
||||
public final static class EnumWithSlice {
|
||||
public DocsEnum docsEnum;
|
||||
public ReaderSlice slice;
|
||||
|
|
|
@ -133,7 +133,8 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfoPerCom
|
|||
// or wrote; this is normally the same as generation except if
|
||||
// there was an IOException that had interrupted a commit
|
||||
|
||||
public Map<String,String> userData = Collections.<String,String>emptyMap(); // Opaque Map<String, String> that user can specify during IndexWriter.commit
|
||||
/** Opaque Map<String, String> that user can specify during IndexWriter.commit */
|
||||
public Map<String,String> userData = Collections.<String,String>emptyMap();
|
||||
|
||||
private List<SegmentInfoPerCommit> segments = new ArrayList<SegmentInfoPerCommit>();
|
||||
|
||||
|
|
|
@ -30,11 +30,11 @@ public class SegmentReadState {
|
|||
public final FieldInfos fieldInfos;
|
||||
public final IOContext context;
|
||||
|
||||
// NOTE: if this is < 0, that means "defer terms index
|
||||
// load until needed". But if the codec must load the
|
||||
// terms index on init (preflex is the only once currently
|
||||
// that must do so), then it should negate this value to
|
||||
// get the app's terms divisor:
|
||||
/** NOTE: if this is < 0, that means "defer terms index
|
||||
* load until needed". But if the codec must load the
|
||||
* terms index on init (preflex is the only once currently
|
||||
* that must do so), then it should negate this value to
|
||||
* get the app's terms divisor */
|
||||
public int termsIndexDivisor;
|
||||
public final String segmentSuffix;
|
||||
|
||||
|
|
|
@ -33,11 +33,11 @@ public class SegmentWriteState {
|
|||
public final FieldInfos fieldInfos;
|
||||
public int delCountOnFlush;
|
||||
|
||||
// Deletes to apply while we are flushing the segment. A
|
||||
// Term is enrolled in here if it was deleted at one
|
||||
// point, and it's mapped to the docIDUpto, meaning any
|
||||
// docID < docIDUpto containing this term should be
|
||||
// deleted.
|
||||
/** Deletes to apply while we are flushing the segment. A
|
||||
* Term is enrolled in here if it was deleted at one
|
||||
* point, and it's mapped to the docIDUpto, meaning any
|
||||
* docID < docIDUpto containing this term should be
|
||||
* deleted. */
|
||||
public final BufferedDeletes segDeletes;
|
||||
|
||||
// Lazily created:
|
||||
|
|
|
@ -32,6 +32,9 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/**
|
||||
* Utility class for merging SortedBytes DocValues
|
||||
* instances.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class SortedBytesMergeUtils {
|
||||
|
@ -54,7 +57,14 @@ public final class SortedBytesMergeUtils {
|
|||
}
|
||||
return new MergeContext(comp, mergeDocCount, size, type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encapsulates contextual information about the merge.
|
||||
* This class holds document id to ordinal mappings, offsets for
|
||||
* variable length values and the comparator to sort the merged
|
||||
* bytes.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static final class MergeContext {
|
||||
private final Comparator<BytesRef> comp;
|
||||
private final BytesRef missingValue = new BytesRef();
|
||||
|
@ -169,10 +179,36 @@ public final class SortedBytesMergeUtils {
|
|||
return merger.currentOrd;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of this interface consume the merged bytes with their
|
||||
* corresponding ordinal and byte offset. The offset is the byte offset in
|
||||
* target sorted source where the currently merged {@link BytesRef} instance
|
||||
* should be stored at.
|
||||
*/
|
||||
public static interface BytesRefConsumer {
|
||||
|
||||
/**
|
||||
* Consumes a single {@link BytesRef}. The provided {@link BytesRef}
|
||||
* instances are strictly increasing with respect to the used
|
||||
* {@link Comparator} used for merging
|
||||
*
|
||||
* @param ref
|
||||
* the {@link BytesRef} to consume
|
||||
* @param ord
|
||||
* the ordinal of the given {@link BytesRef} in the merge target
|
||||
* @param offset
|
||||
* the byte offset of the given {@link BytesRef} in the merge
|
||||
* target
|
||||
* @throws IOException
|
||||
* if an {@link IOException} occurs
|
||||
*/
|
||||
public void consume(BytesRef ref, int ord, long offset) throws IOException;
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple {@link BytesRefConsumer} that writes the merged {@link BytesRef}
|
||||
* instances sequentially to an {@link IndexOutput}.
|
||||
*/
|
||||
public static final class IndexOutputBytesRefConsumer implements BytesRefConsumer {
|
||||
private final IndexOutput datOut;
|
||||
|
||||
|
@ -187,6 +223,14 @@ public final class SortedBytesMergeUtils {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* {@link RecordMerger} merges a list of {@link SortedSourceSlice} lazily by
|
||||
* consuming the sorted source records one by one and de-duplicates records
|
||||
* that are shared across slices. The algorithm is based on a lazy priority queue
|
||||
* that prevents reading merge sources into heap memory.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
private static final class RecordMerger {
|
||||
private final MergeQueue queue;
|
||||
private final SortedSourceSlice[] top;
|
||||
|
@ -231,6 +275,12 @@ public final class SortedBytesMergeUtils {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* {@link SortedSourceSlice} represents a single {@link SortedSource} merge candidate.
|
||||
* It encapsulates ordinal and pre-calculated target doc id to ordinal mappings.
|
||||
* This class also holds state private to the merge process.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static class SortedSourceSlice {
|
||||
final SortedSource source;
|
||||
final int readerIdx;
|
||||
|
|
|
@ -45,6 +45,9 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
*/
|
||||
public interface FieldCache {
|
||||
|
||||
/**
|
||||
* Placeholder indicating creation of this cache is currently in-progress.
|
||||
*/
|
||||
public static final class CreationPlaceholder {
|
||||
Object value;
|
||||
}
|
||||
|
|
|
@ -194,6 +194,9 @@ public abstract class FieldComparator<T> {
|
|||
* than the provided value. */
|
||||
public abstract int compareDocToValue(int doc, T value) throws IOException;
|
||||
|
||||
/**
|
||||
* Base FieldComparator class for numeric types
|
||||
*/
|
||||
public static abstract class NumericComparator<T extends Number> extends FieldComparator<T> {
|
||||
protected final T missingValue;
|
||||
protected final String field;
|
||||
|
|
|
@ -33,6 +33,10 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
*/
|
||||
public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> extends PriorityQueue<T> {
|
||||
|
||||
/**
|
||||
* Extension of ScoreDoc to also store the
|
||||
* {@link FieldComparator} slot.
|
||||
*/
|
||||
public static class Entry extends ScoreDoc {
|
||||
public int slot;
|
||||
|
||||
|
|
|
@ -398,12 +398,17 @@ public class FuzzyTermsEnum extends TermsEnum {
|
|||
return scale_factor;
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
/**
|
||||
* reuses compiled automata across different segments,
|
||||
* because they are independent of the index
|
||||
* @lucene.internal */
|
||||
public static interface LevenshteinAutomataAttribute extends Attribute {
|
||||
public List<CompiledAutomaton> automata();
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
/**
|
||||
* Stores compiled automata as a list (indexed by edit distance)
|
||||
* @lucene.internal */
|
||||
public static final class LevenshteinAutomataAttributeImpl extends AttributeImpl implements LevenshteinAutomataAttribute {
|
||||
private final List<CompiledAutomaton> automata = new ArrayList<CompiledAutomaton>();
|
||||
|
||||
|
|
|
@ -82,7 +82,7 @@ public class IndexSearcher {
|
|||
// in the next release
|
||||
protected final IndexReaderContext readerContext;
|
||||
protected final List<AtomicReaderContext> leafContexts;
|
||||
// used with executor - each slice holds a set of leafs executed within one thread
|
||||
/** used with executor - each slice holds a set of leafs executed within one thread */
|
||||
protected final LeafSlice[] leafSlices;
|
||||
|
||||
// These are only used for multi-threaded search
|
||||
|
|
|
@ -32,7 +32,11 @@ import org.apache.lucene.util.BytesRefHash;
|
|||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
|
||||
|
||||
/** @lucene.internal Only public to be accessible by spans package. */
|
||||
/**
|
||||
* Base rewrite method that translates each term into a query, and keeps
|
||||
* the scores as computed by the query.
|
||||
* <p>
|
||||
* @lucene.internal Only public to be accessible by spans package. */
|
||||
public abstract class ScoringRewrite<Q extends Query> extends TermCollectingRewrite<Q> {
|
||||
|
||||
/** A rewrite method that first translates each term into
|
||||
|
|
|
@ -38,6 +38,9 @@ import org.apache.lucene.util.StringHelper;
|
|||
*/
|
||||
public class SortField {
|
||||
|
||||
/**
|
||||
* Specifies the type of the terms to be sorted, or special types such as CUSTOM
|
||||
*/
|
||||
public static enum Type {
|
||||
|
||||
/** Sort by document score (relevance). Sort values are Float and higher
|
||||
|
|
|
@ -225,6 +225,8 @@ public class TimeLimitingCollector extends Collector {
|
|||
}
|
||||
|
||||
/**
|
||||
* Thread used to timeout search requests.
|
||||
* Can be stopped completely with {@link TimerThread#stopTimer()}
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public static final class TimerThread extends Thread {
|
||||
|
|
|
@ -33,8 +33,8 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
*/
|
||||
public abstract class TopDocsCollector<T extends ScoreDoc> extends Collector {
|
||||
|
||||
// This is used in case topDocs() is called with illegal parameters, or there
|
||||
// simply aren't (enough) results.
|
||||
/** This is used in case topDocs() is called with illegal parameters, or there
|
||||
* simply aren't (enough) results. */
|
||||
protected static final TopDocs EMPTY_TOPDOCS = new TopDocs(0, new ScoreDoc[0], Float.NaN);
|
||||
|
||||
/**
|
||||
|
|
|
@ -436,6 +436,9 @@ public abstract class FSDirectory extends Directory {
|
|||
return chunkSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes output with {@link RandomAccessFile#write(byte[], int, int)}
|
||||
*/
|
||||
protected static class FSIndexOutput extends BufferedIndexOutput {
|
||||
private final FSDirectory parent;
|
||||
private final String name;
|
||||
|
|
|
@ -106,6 +106,9 @@ public class NIOFSDirectory extends FSDirectory {
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads bytes with {@link FileChannel#read(ByteBuffer, long)}
|
||||
*/
|
||||
protected static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput {
|
||||
|
||||
private ByteBuffer byteBuf; // wraps the buffer for NIO
|
||||
|
|
|
@ -19,7 +19,9 @@ package org.apache.lucene.store;
|
|||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/** @lucene.internal */
|
||||
/**
|
||||
* Represents a file in RAM as a list of byte[] buffers.
|
||||
* @lucene.internal */
|
||||
public class RAMFile {
|
||||
protected ArrayList<byte[]> buffers = new ArrayList<byte[]>();
|
||||
long length;
|
||||
|
|
|
@ -85,8 +85,16 @@ public class SimpleFSDirectory extends FSDirectory {
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads bytes with {@link RandomAccessFile#seek(long)} followed by
|
||||
* {@link RandomAccessFile#read(byte[], int, int)}.
|
||||
*/
|
||||
protected static class SimpleFSIndexInput extends BufferedIndexInput {
|
||||
|
||||
/**
|
||||
* Extension of RandomAccessFile that tracks if the file is
|
||||
* open.
|
||||
*/
|
||||
protected static class Descriptor extends RandomAccessFile {
|
||||
// remember if the file is open, so that we don't try to close it
|
||||
// more than once
|
||||
|
|
|
@ -117,10 +117,13 @@ public final class ByteBlockPool {
|
|||
public byte[][] buffers = new byte[10][];
|
||||
|
||||
int bufferUpto = -1; // Which buffer we are upto
|
||||
public int byteUpto = BYTE_BLOCK_SIZE; // Where we are in head buffer
|
||||
/** Where we are in head buffer */
|
||||
public int byteUpto = BYTE_BLOCK_SIZE;
|
||||
|
||||
public byte[] buffer; // Current head buffer
|
||||
public int byteOffset = -BYTE_BLOCK_SIZE; // Current head offset
|
||||
/** Current head buffer */
|
||||
public byte[] buffer;
|
||||
/** Current head offset */
|
||||
public int byteOffset = -BYTE_BLOCK_SIZE;
|
||||
|
||||
private final Allocator allocator;
|
||||
|
||||
|
|
|
@ -48,6 +48,11 @@ public class FuzzySet {
|
|||
|
||||
public static final int FUZZY_SERIALIZATION_VERSION=1;
|
||||
|
||||
/**
|
||||
* Result from {@link FuzzySet#contains(BytesRef)}:
|
||||
* can never return definitively YES (always MAYBE),
|
||||
* but can sometimes definitely return NO.
|
||||
*/
|
||||
public enum ContainsResult {
|
||||
MAYBE, NO
|
||||
};
|
||||
|
|
|
@ -158,7 +158,7 @@ public final class FST<T> {
|
|||
private final boolean packed;
|
||||
private PackedInts.Reader nodeRefToAddress;
|
||||
|
||||
// If arc has this label then that arc is final/accepted
|
||||
/** If arc has this label then that arc is final/accepted */
|
||||
public static final int END_LABEL = -1;
|
||||
|
||||
private boolean allowArrayArcs = true;
|
||||
|
@ -174,7 +174,7 @@ public final class FST<T> {
|
|||
// building an FST w/ willPackFST=true:
|
||||
int node;
|
||||
|
||||
// To node (ord or address):
|
||||
/** To node (ord or address) */
|
||||
public int target;
|
||||
|
||||
byte flags;
|
||||
|
@ -542,8 +542,8 @@ public final class FST<T> {
|
|||
return v;
|
||||
}
|
||||
|
||||
// returns true if the node at this address has any
|
||||
// outgoing arcs
|
||||
/** returns true if the node at this address has any
|
||||
* outgoing arcs */
|
||||
public static<T> boolean targetHasArcs(Arc<T> arc) {
|
||||
return arc.target > 0;
|
||||
}
|
||||
|
|
|
@ -767,6 +767,19 @@ public final class Util {
|
|||
}
|
||||
}
|
||||
|
||||
/** Just maps each UTF16 unit (char) to the ints in an
|
||||
* IntsRef. */
|
||||
public static IntsRef toUTF16(CharSequence s, IntsRef scratch) {
|
||||
final int charLimit = s.length();
|
||||
scratch.offset = 0;
|
||||
scratch.length = charLimit;
|
||||
scratch.grow(charLimit);
|
||||
for (int idx = 0; idx < charLimit; idx++) {
|
||||
scratch.ints[idx] = (int) s.charAt(idx);
|
||||
}
|
||||
return scratch;
|
||||
}
|
||||
|
||||
/** Decodes the Unicode codepoints from the provided
|
||||
* CharSequence and places them in the provided scratch
|
||||
* IntsRef, which must not be null, returning it. */
|
||||
|
|
|
@ -734,7 +734,7 @@ public class PackedInts {
|
|||
}
|
||||
return new Packed64(in, valueCount, bitsPerValue);
|
||||
default:
|
||||
throw new AssertionError("Unknwown Writer format: " + format);
|
||||
throw new AssertionError("Unknown Writer format: " + format);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,12 +20,13 @@ package org.apache.lucene.search;
|
|||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
@ -36,7 +37,7 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
|
||||
public class TestSimilarityProvider extends LuceneTestCase {
|
||||
private Directory directory;
|
||||
private IndexReader reader;
|
||||
private DirectoryReader reader;
|
||||
private IndexSearcher searcher;
|
||||
|
||||
@Override
|
||||
|
@ -75,8 +76,9 @@ public class TestSimilarityProvider extends LuceneTestCase {
|
|||
public void testBasics() throws Exception {
|
||||
// sanity check of norms writer
|
||||
// TODO: generalize
|
||||
byte fooNorms[] = (byte[]) MultiDocValues.getNormDocValues(reader, "foo").getSource().getArray();
|
||||
byte barNorms[] = (byte[]) MultiDocValues.getNormDocValues(reader, "bar").getSource().getArray();
|
||||
AtomicReader slow = new SlowCompositeReaderWrapper(reader);
|
||||
byte fooNorms[] = (byte[]) slow.normValues("foo").getSource().getArray();
|
||||
byte barNorms[] = (byte[]) slow.normValues("bar").getSource().getArray();
|
||||
for (int i = 0; i < fooNorms.length; i++) {
|
||||
assertFalse(fooNorms[i] == barNorms[i]);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
package org.apache.lucene.util.junitcompat;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.JUnitCore;
|
||||
import org.junit.runner.Result;
|
||||
|
||||
public class TestLeaveFilesIfTestFails extends WithNestedTests {
|
||||
public TestLeaveFilesIfTestFails() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
public static class Nested1 extends WithNestedTests.AbstractNestedTest {
|
||||
static File file;
|
||||
public void testDummy() {
|
||||
file = _TestUtil.getTempDir("leftover");
|
||||
file.mkdirs();
|
||||
fail();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLeaveFilesIfTestFails() {
|
||||
Result r = JUnitCore.runClasses(Nested1.class);
|
||||
Assert.assertEquals(1, r.getFailureCount());
|
||||
Assert.assertTrue(Nested1.file.exists());
|
||||
Nested1.file.delete();
|
||||
}
|
||||
}
|
|
@ -44,6 +44,10 @@ public abstract class AbstractDistinctValuesCollector<GC extends AbstractDistinc
|
|||
public void setScorer(Scorer scorer) throws IOException {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returned by {@link AbstractDistinctValuesCollector#getGroups()},
|
||||
* representing the value and set of distinct values for the group.
|
||||
*/
|
||||
public abstract static class GroupCount<GROUP_VALUE_TYPE> {
|
||||
|
||||
public final GROUP_VALUE_TYPE groupValue;
|
||||
|
|
|
@ -17,7 +17,12 @@
|
|||
|
||||
package org.apache.lucene.search.grouping;
|
||||
|
||||
/** @lucene.internal */
|
||||
import org.apache.lucene.search.FieldComparator; // javadocs
|
||||
|
||||
/**
|
||||
* Expert: representation of a group in {@link AbstractFirstPassGroupingCollector},
|
||||
* tracking the top doc and {@link FieldComparator} slot.
|
||||
* @lucene.internal */
|
||||
public class CollectedSearchGroup<T> extends SearchGroup<T> {
|
||||
int topDoc;
|
||||
int comparatorSlot;
|
||||
|
|
|
@ -90,6 +90,28 @@
|
|||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
<property name="test-framework.jar" value="${common.dir}/build/test-framework/lucene-test-framework-${version}.jar"/>
|
||||
<target name="check-test-framework-uptodate" unless="test-framework.uptodate">
|
||||
<module-uptodate name="test-framework" jarfile="${test-framework.jar}" property="test-framework.uptodate"/>
|
||||
</target>
|
||||
<target name="jar-test-framework" unless="test-framework.uptodate" depends="check-test-framework-uptodate">
|
||||
<ant dir="${common.dir}/test-framework" target="jar-core" inheritall="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
<property name="test-framework.uptodate" value="true"/>
|
||||
</target>
|
||||
|
||||
<property name="test-framework-javadoc.jar" value="${common.dir}/build/test-framework/lucene-test-framework-${version}-javadoc.jar"/>
|
||||
<target name="check-test-framework-javadocs-uptodate" unless="test-framework-javadocs.uptodate">
|
||||
<module-uptodate name="test-framework" jarfile="${test-framework-javadoc.jar}" property="test-framework-javadocs.uptodate"/>
|
||||
</target>
|
||||
<target name="javadocs-test-framework" unless="test-framework-javadocs.uptodate" depends="check-test-framework-javadocs-uptodate">
|
||||
<ant dir="${common.dir}/test-framework" target="javadocs" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
<property name="test-framework-javadocs.uptodate" value="true"/>
|
||||
</target>
|
||||
|
||||
<property name="queryparser.jar" value="${common.dir}/build/queryparser/lucene-queryparser-${version}.jar"/>
|
||||
<target name="check-queryparser-uptodate" unless="queryparser.uptodate">
|
||||
<module-uptodate name="queryparser" jarfile="${queryparser.jar}" property="queryparser.uptodate"/>
|
||||
|
|
|
@ -24,6 +24,9 @@ import org.apache.lucene.queries.function.docvalues.DoubleDocValues;
|
|||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Function that returns a constant double value for every document.
|
||||
*/
|
||||
public class DoubleConstValueSource extends ConstNumberSource {
|
||||
final double constant;
|
||||
private final float fv;
|
||||
|
|
|
@ -28,7 +28,13 @@ import org.apache.lucene.util.BytesRef;
|
|||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
/** @lucene.internal */
|
||||
/**
|
||||
* Function that returns {@link TFIDFSimilarity #idf(long, long)}
|
||||
* for every document.
|
||||
* <p>
|
||||
* Note that the configured Similarity for the field must be
|
||||
* a subclass of {@link TFIDFSimilarity}
|
||||
* @lucene.internal */
|
||||
public class IDFValueSource extends DocFreqValueSource {
|
||||
public IDFValueSource(String field, String val, String indexedField, BytesRef indexedBytes) {
|
||||
super(field, val, indexedField, indexedBytes);
|
||||
|
|
|
@ -30,6 +30,10 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
* Depending on the boolean value of the <code>ifSource</code> function,
|
||||
* returns the value of the <code>trueSource</code> or <code>falseSource</code> function.
|
||||
*/
|
||||
public class IfFunction extends BoolFunction {
|
||||
private final ValueSource ifSource;
|
||||
private final ValueSource trueSource;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.lucene.queries.function.valuesource;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader; // javadocs
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -24,6 +25,11 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Returns the value of {@link IndexReader#maxDoc()}
|
||||
* for every document. This is the number of documents
|
||||
* including deletions.
|
||||
*/
|
||||
public class MaxDocValueSource extends ValueSource {
|
||||
public String name() {
|
||||
return "maxdoc";
|
||||
|
|
|
@ -28,6 +28,13 @@ import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
|||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Function that returns {@link TFIDFSimilarity#decodeNormValue(byte)}
|
||||
* for every document.
|
||||
* <p>
|
||||
* Note that the configured Similarity for the field must be
|
||||
* a subclass of {@link TFIDFSimilarity}
|
||||
* @lucene.internal */
|
||||
public class NormValueSource extends ValueSource {
|
||||
protected final String field;
|
||||
public NormValueSource(String field) {
|
||||
|
|
|
@ -30,7 +30,10 @@ import java.io.IOException;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <code>TotalTermFreqValueSource</code> returns the total term freq (sum of term freqs across all docuyments).
|
||||
* <code>SumTotalTermFreqValueSource</code> returns the number of tokens.
|
||||
* (sum of term freqs across all documents, across all terms).
|
||||
* Returns -1 if frequencies were omitted for the field, or if
|
||||
* the codec doesn't support this statistic.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class SumTotalTermFreqValueSource extends ValueSource {
|
||||
|
|
|
@ -28,6 +28,13 @@ import org.apache.lucene.util.BytesRef;
|
|||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Function that returns {@link TFIDFSimilarity#tf(int)}
|
||||
* for every document.
|
||||
* <p>
|
||||
* Note that the configured Similarity for the field must be
|
||||
* a subclass of {@link TFIDFSimilarity}
|
||||
* @lucene.internal */
|
||||
public class TFValueSource extends TermFreqValueSource {
|
||||
public TFValueSource(String field, String val, String indexedField, BytesRef indexedBytes) {
|
||||
super(field, val, indexedField, indexedBytes);
|
||||
|
|
|
@ -26,6 +26,13 @@ import org.apache.lucene.util.BytesRef;
|
|||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Function that returns {@link DocsEnum#freq()} for the
|
||||
* supplied term in every document.
|
||||
* <p>
|
||||
* If the term does not exist in the document, returns 0.
|
||||
* If frequencies are omitted, returns 1.
|
||||
*/
|
||||
public class TermFreqValueSource extends DocFreqValueSource {
|
||||
public TermFreqValueSource(String field, String val, String indexedField, BytesRef indexedBytes) {
|
||||
super(field, val, indexedField, indexedBytes);
|
||||
|
|
|
@ -28,7 +28,10 @@ import java.io.IOException;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <code>TotalTermFreqValueSource</code> returns the total term freq (sum of term freqs across all docuyments).
|
||||
* <code>TotalTermFreqValueSource</code> returns the total term freq
|
||||
* (sum of term freqs across all documents).
|
||||
* Returns -1 if frequencies were omitted for the field, or if
|
||||
* the codec doesn't support this statistic.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class TotalTermFreqValueSource extends ValueSource {
|
||||
|
|
|
@ -29,7 +29,9 @@ import org.apache.lucene.util._TestUtil;
|
|||
// a MockRemovesTokensTF, ideally subclassing FilteringTF
|
||||
// (in modules/analysis)
|
||||
|
||||
// Randomly injects holes:
|
||||
/**
|
||||
* Randomly injects holes (similar to what a stopfilter would do)
|
||||
*/
|
||||
public final class MockHoleInjectingTokenFilter extends TokenFilter {
|
||||
|
||||
private final long randomSeed;
|
||||
|
|
|
@ -27,8 +27,9 @@ import java.io.Reader;
|
|||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* Wraps a whitespace tokenizer with a filter that sets
|
||||
* the first token, and odd tokens to posinc=1, and all others
|
||||
* to 0, encoding the position as pos: XXX in the payload.
|
||||
**/
|
||||
public final class MockPayloadAnalyzer extends Analyzer {
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.codecs.PostingsReaderBase;
|
|||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.codecs.TermsIndexReaderBase;
|
||||
import org.apache.lucene.codecs.TermsIndexWriterBase;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40Codec; // javadocs
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40PostingsReader;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40PostingsWriter;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
|
@ -39,6 +40,10 @@ import org.apache.lucene.util.BytesRef;
|
|||
// TODO: we could make separate base class that can wrapp
|
||||
// any PostingsBaseFormat and make it ord-able...
|
||||
|
||||
/**
|
||||
* Customized version of {@link Lucene40Codec} that uses
|
||||
* {@link FixedGapTermsIndexWriter}.
|
||||
*/
|
||||
public class Lucene40WithOrds extends PostingsFormat {
|
||||
|
||||
public Lucene40WithOrds() {
|
||||
|
|
|
@ -72,6 +72,9 @@ public class MockFixedIntBlockPostingsFormat extends PostingsFormat {
|
|||
return new MockIntFactory(blockSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes blocks as vInts of a fixed block size.
|
||||
*/
|
||||
public static class MockIntFactory extends IntStreamFactory {
|
||||
private final int blockSize;
|
||||
|
||||
|
|
|
@ -70,6 +70,10 @@ public class MockVariableIntBlockPostingsFormat extends PostingsFormat {
|
|||
return getName() + "(baseBlockSize="+ baseBlockSize + ")";
|
||||
}
|
||||
|
||||
/**
|
||||
* If the first value is <= 3, writes baseBlockSize vInts at once,
|
||||
* otherwise writes 2*baseBlockSize vInts.
|
||||
*/
|
||||
public static class MockIntFactory extends IntStreamFactory {
|
||||
|
||||
private final int baseBlockSize;
|
||||
|
|
|
@ -25,7 +25,10 @@ import org.apache.lucene.codecs.sep.IntStreamFactory;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
/** @lucene.experimental */
|
||||
/**
|
||||
* Encodes ints directly as vInts with {@link MockSingleIntIndexOutput}
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class MockSingleIntFactory extends IntStreamFactory {
|
||||
@Override
|
||||
public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException {
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.store.IndexInput;
|
|||
|
||||
/** Reads IndexInputs written with {@link
|
||||
* MockSingleIntIndexOutput}. NOTE: this class is just for
|
||||
* demonstration puprposes (it is a very slow way to read a
|
||||
* demonstration purposes (it is a very slow way to read a
|
||||
* block of ints).
|
||||
*
|
||||
* @lucene.experimental
|
||||
|
@ -54,6 +54,9 @@ public class MockSingleIntIndexInput extends IntIndexInput {
|
|||
in.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Just reads a vInt directly from the file.
|
||||
*/
|
||||
public static class Reader extends IntIndexInput.Reader {
|
||||
// clone:
|
||||
private final IndexInput in;
|
||||
|
|
|
@ -68,7 +68,7 @@ public class AlcoholicMergePolicy extends LogMergePolicy {
|
|||
return info.sizeInBytes();
|
||||
}
|
||||
|
||||
public static enum Drink {
|
||||
private static enum Drink {
|
||||
|
||||
Beer(15), Wine(17), Champagne(21), WhiteRussian(22), SingleMalt(30);
|
||||
|
||||
|
@ -77,11 +77,6 @@ public class AlcoholicMergePolicy extends LogMergePolicy {
|
|||
Drink(long drunkFactor) {
|
||||
this.drunkFactor = drunkFactor;
|
||||
}
|
||||
|
||||
public long drunk() {
|
||||
return drunkFactor;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -23,6 +23,10 @@ import java.util.Iterator;
|
|||
import java.util.NoSuchElementException;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* A {@link FilterAtomicReader} that exposes only a subset
|
||||
* of fields from the underlying wrapped reader.
|
||||
*/
|
||||
public final class FieldFilterAtomicReader extends FilterAtomicReader {
|
||||
|
||||
private final Set<String> fields;
|
||||
|
|
|
@ -42,9 +42,15 @@ import org.apache.lucene.util._TestUtil;
|
|||
// - doc blocks? so we can test joins/grouping...
|
||||
// - controlled consistency (NRTMgr)
|
||||
|
||||
/**
|
||||
* Base test class for simulating distributed search across multiple shards.
|
||||
*/
|
||||
public abstract class ShardSearchingTestBase extends LuceneTestCase {
|
||||
|
||||
// TODO: maybe SLM should throw this instead of returning null...
|
||||
/**
|
||||
* Thrown when the lease for a searcher has expired.
|
||||
*/
|
||||
public static class SearcherExpiredException extends RuntimeException {
|
||||
public SearcherExpiredException(String message) {
|
||||
super(message);
|
||||
|
@ -604,6 +610,9 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An IndexSearcher and associated version (lease)
|
||||
*/
|
||||
protected static class SearcherAndVersion {
|
||||
public final IndexSearcher searcher;
|
||||
public final long version;
|
||||
|
|
|
@ -146,6 +146,12 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
|
|||
preventDoubleWrite = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enum for controlling hard disk throttling.
|
||||
* Set via {@link MockDirectoryWrapper #setThrottling(Throttling)}
|
||||
* <p>
|
||||
* WARNING: can make tests very slow.
|
||||
*/
|
||||
public static enum Throttling {
|
||||
/** always emulate a slow hard disk. could be very slow! */
|
||||
ALWAYS,
|
||||
|
|
|
@ -24,13 +24,17 @@ import java.io.*;
|
|||
*/
|
||||
final class CloseableFile implements Closeable {
|
||||
private final File file;
|
||||
private final TestRuleMarkFailure failureMarker;
|
||||
|
||||
public CloseableFile(File file) {
|
||||
public CloseableFile(File file, TestRuleMarkFailure failureMarker) {
|
||||
this.file = file;
|
||||
this.failureMarker = failureMarker;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
// only if there were no other test failures.
|
||||
if (failureMarker.wasSuccessful()) {
|
||||
if (file.exists()) {
|
||||
try {
|
||||
_TestUtil.rmDir(file);
|
||||
|
@ -45,4 +49,5 @@ final class CloseableFile implements Closeable {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.util;
|
|||
*/
|
||||
|
||||
/**
|
||||
* Converts numbers to english strings for testing.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class English {
|
||||
|
|
|
@ -26,6 +26,9 @@ package org.apache.lucene.util;
|
|||
* @lucene.internal */
|
||||
public abstract class RollingBuffer<T extends RollingBuffer.Resettable> {
|
||||
|
||||
/**
|
||||
* Implement to reset an instance
|
||||
*/
|
||||
public static interface Resettable {
|
||||
public void reset();
|
||||
}
|
||||
|
|
|
@ -1,10 +1,5 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.junit.rules.TestRule;
|
||||
import org.junit.runner.Description;
|
||||
import org.junit.runners.model.Statement;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -22,6 +17,30 @@ import org.junit.runners.model.Statement;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.util.FieldCacheSanityChecker; // javadocs
|
||||
import org.junit.rules.TestRule;
|
||||
import org.junit.runner.Description;
|
||||
import org.junit.runners.model.Statement;
|
||||
|
||||
/**
|
||||
* This rule will fail the test if it has insane field caches.
|
||||
* <p>
|
||||
* calling assertSaneFieldCaches here isn't as useful as having test
|
||||
* classes call it directly from the scope where the index readers
|
||||
* are used, because they could be gc'ed just before this tearDown
|
||||
* method is called.
|
||||
* <p>
|
||||
* But it's better then nothing.
|
||||
* <p>
|
||||
* If you are testing functionality that you know for a fact
|
||||
* "violates" FieldCache sanity, then you should either explicitly
|
||||
* call purgeFieldCache at the end of your test method, or refactor
|
||||
* your Test class so that the inconsistent FieldCache usages are
|
||||
* isolated in distinct test methods
|
||||
*
|
||||
* @see FieldCacheSanityChecker
|
||||
*/
|
||||
public class TestRuleFieldCacheSanity implements TestRule {
|
||||
|
||||
@Override
|
||||
|
@ -33,18 +52,6 @@ public class TestRuleFieldCacheSanity implements TestRule {
|
|||
|
||||
Throwable problem = null;
|
||||
try {
|
||||
// calling assertSaneFieldCaches here isn't as useful as having test
|
||||
// classes call it directly from the scope where the index readers
|
||||
// are used, because they could be gc'ed just before this tearDown
|
||||
// method is called.
|
||||
//
|
||||
// But it's better then nothing.
|
||||
//
|
||||
// If you are testing functionality that you know for a fact
|
||||
// "violates" FieldCache sanity, then you should either explicitly
|
||||
// call purgeFieldCache at the end of your test method, or refactor
|
||||
// your Test class so that the inconsistent FieldCache usages are
|
||||
// isolated in distinct test methods
|
||||
LuceneTestCase.assertSaneFieldCaches(d.getDisplayName());
|
||||
} catch (Throwable t) {
|
||||
problem = t;
|
||||
|
|
|
@ -21,6 +21,10 @@ import org.junit.rules.TestRule;
|
|||
import org.junit.runner.Description;
|
||||
import org.junit.runners.model.Statement;
|
||||
|
||||
/**
|
||||
* Stores the suite name so you can retrieve it
|
||||
* from {@link #getTestClass()}
|
||||
*/
|
||||
public class TestRuleStoreClassName implements TestRule {
|
||||
private volatile Description description;
|
||||
|
||||
|
|
|
@ -94,7 +94,7 @@ public class _TestUtil {
|
|||
try {
|
||||
File f = createTempFile(desc, "tmp", LuceneTestCase.TEMP_DIR);
|
||||
f.delete();
|
||||
LuceneTestCase.closeAfterSuite(new CloseableFile(f));
|
||||
LuceneTestCase.closeAfterSuite(new CloseableFile(f, LuceneTestCase.suiteFailureMarker));
|
||||
return f;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
|
@ -136,7 +136,7 @@ public class _TestUtil {
|
|||
rmDir(destDir);
|
||||
|
||||
destDir.mkdir();
|
||||
LuceneTestCase.closeAfterSuite(new CloseableFile(destDir));
|
||||
LuceneTestCase.closeAfterSuite(new CloseableFile(destDir, LuceneTestCase.suiteFailureMarker));
|
||||
|
||||
while (entries.hasMoreElements()) {
|
||||
ZipEntry entry = entries.nextElement();
|
||||
|
|
|
@ -127,6 +127,13 @@ public class AutomatonTestUtil {
|
|||
return code;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lets you retrieve random strings accepted
|
||||
* by an Automaton.
|
||||
* <p>
|
||||
* Once created, call {@link #getRandomAcceptedString(Random)}
|
||||
* to get a new string (in UTF-32 codepoints).
|
||||
*/
|
||||
public static class RandomAcceptedStrings {
|
||||
|
||||
private final Map<Transition,Boolean> leadsToAccept;
|
||||
|
|
|
@ -26,6 +26,14 @@ $Id$
|
|||
|
||||
================== 4.0.0 ==================
|
||||
|
||||
Versions of Major Components
|
||||
---------------------
|
||||
Apache Tika 1.2
|
||||
Carrot2 3.5.0
|
||||
Velocity 1.6.4 and Velocity Tools 2.0
|
||||
Apache UIMA 2.3.1
|
||||
Apache ZooKeeper 3.3.6
|
||||
|
||||
Upgrading from Solr 4.0.0-BETA
|
||||
----------------------
|
||||
|
||||
|
@ -63,6 +71,27 @@ Bug Fixes
|
|||
* SOLR-3649: Fixed bug in JavabinLoader that caused deleteById(List<String> ids)
|
||||
to not work in SolrJ (siren)
|
||||
|
||||
* SOLR-3730: Rollback is not implemented quite right and can cause corner case fails in
|
||||
SolrCloud tests. (rmuir, Mark Miller)
|
||||
|
||||
* SOLR-2981: Fixed StatsComponent to no longer return duplicated information
|
||||
when requesting multiple stats.facet fields.
|
||||
(Roman Kliewer via hossman)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
* SOLR-3690: Fixed binary release packages to include dependencie needed for
|
||||
the solr-test-framework (hossman)
|
||||
|
||||
* SOLR-2857: The /update/json and /update/csv URLs were restored to aid
|
||||
in the migration of existing clients. (yonik)
|
||||
|
||||
* SOLR-3691: SimplePostTool: Mode for crawling/posting web pages
|
||||
See http://wiki.apache.org/solr/ExtractingRequestHandler for examples (janhoy)
|
||||
|
||||
* SOLR-3707: Upgrade Solr to Tika 1.2 (janhoy)
|
||||
|
||||
================== 4.0.0-BETA ===================
|
||||
|
||||
|
||||
|
@ -271,7 +300,6 @@ Other Changes
|
|||
Also, the configuration itself can be passed using the "dataConfig" parameter rather than
|
||||
using a file (this previously worked in debug mode only). When configuration errors are
|
||||
encountered, the error message is returned in XML format. (James Dyer)
|
||||
|
||||
* SOLR-3439: Make SolrCell easier to use out of the box. Also improves "/browse" to display
|
||||
rich-text documents correctly, along with facets for author and content_type.
|
||||
With the new "content" field, highlighting of body is supported. See also SOLR-3672 for
|
||||
|
|
|
@ -310,12 +310,11 @@ Copyright 2004 Sun Microsystems, Inc. (Rome JAR)
|
|||
|
||||
Copyright 2002-2008 by John Cowan (TagSoup -- http://ccil.org/~cowan/XML/tagsoup/)
|
||||
|
||||
Copyright (C) 1999-2007 Shigeru Chiba. All Rights Reserved.
|
||||
(Javassist, MPL licensed: http://www.csg.ci.i.u-tokyo.ac.jp/~chiba/javassist/)
|
||||
|
||||
Copyright (C) 1994-2007 by the Xiph.org Foundation, http://www.xiph.org/ (OggVorbis)
|
||||
|
||||
Scannotation (C) Bill Burke
|
||||
Copyright 2012 Kohei Taketa juniversalchardet (http://code.google.com/p/juniversalchardet/)
|
||||
|
||||
Lasse Collin and others, XZ for Java (http://tukaani.org/xz/java.html)
|
||||
|
||||
=========================================================================
|
||||
== Language Detection Notices ==
|
||||
|
|
|
@ -386,8 +386,9 @@
|
|||
<tarfileset dir="."
|
||||
prefix="${fullnamever}"
|
||||
includes="LICENSE.txt NOTICE.txt CHANGES.txt README.txt example/**
|
||||
client/README.txt client/ruby/solr-ruby/** contrib/**/lib/**
|
||||
contrib/**/README.txt licenses/**"
|
||||
client/README.txt client/ruby/solr-ruby/**
|
||||
contrib/**/lib/** contrib/**/README.txt
|
||||
licenses/**"
|
||||
excludes="lib/README.committers.txt **/data/ **/logs/*
|
||||
**/classes/ **/*.sh **/ivy.xml **/build.xml
|
||||
**/bin/ **/*.iml **/*.ipr **/*.iws **/pom.xml
|
||||
|
@ -401,7 +402,9 @@
|
|||
includes="example/**/*.sh example/**/bin/" />
|
||||
<tarfileset dir="."
|
||||
prefix="${fullnamever}"
|
||||
includes="dist/*.jar dist/*.war dist/solrj-lib/*"
|
||||
includes="dist/*.jar dist/*.war
|
||||
dist/solrj-lib/*
|
||||
dist/test-framework/**"
|
||||
excludes="**/*.tgz **/*.zip **/*.md5 **/*src*.jar **/*docs*.jar **/*.sha1" />
|
||||
<tarfileset dir="${dest}/docs"
|
||||
prefix="${fullnamever}/docs" />
|
||||
|
|
|
@ -193,7 +193,7 @@
|
|||
<property name="lucenedocs" location="${common.dir}/build/docs"/>
|
||||
|
||||
<!-- dependency to ensure all lucene javadocs are present -->
|
||||
<target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-analyzers-uima,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial"/>
|
||||
<target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-analyzers-uima,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial,javadocs-test-framework"/>
|
||||
|
||||
<!-- create javadocs for the current module -->
|
||||
<target name="javadocs" depends="compile-core,define-lucene-javadoc-url,lucene-javadocs">
|
||||
|
|
|
@ -20,36 +20,36 @@
|
|||
<info organisation="org.apache.solr" module="extraction"/>
|
||||
<dependencies>
|
||||
<!-- Tika JARs -->
|
||||
<dependency org="org.apache.tika" name="tika-core" rev="1.1" transitive="false"/>
|
||||
<dependency org="org.apache.tika" name="tika-parsers" rev="1.1" transitive="false"/>
|
||||
<!-- Tika dependencies - see http://tika.apache.org/1.1/gettingstarted.html#Using_Tika_as_a_Maven_dependency -->
|
||||
<dependency org="org.apache.tika" name="tika-core" rev="1.2" transitive="false"/>
|
||||
<dependency org="org.apache.tika" name="tika-parsers" rev="1.2" transitive="false"/>
|
||||
<!-- Tika dependencies - see http://tika.apache.org/1.2/gettingstarted.html#Using_Tika_as_a_Maven_dependency -->
|
||||
<!-- When upgrading Tika, upgrade dependencies versions and add any new ones
|
||||
(except slf4j-api, commons-codec, commons-logging, geronimo-stax-api_1.0_spec) -->
|
||||
<dependency org="org.gagravarr" name="vorbis-java-tika" rev="0.1" transitive="false"/>
|
||||
<dependency org="org.gagravarr" name="vorbis-java-core" rev="0.1" transitive="false"/>
|
||||
<dependency org="edu.ucar" name="netcdf" rev="4.2-min" transitive="false"/>
|
||||
<dependency org="org.apache.james" name="apache-mime4j-core" rev="0.7" transitive="false"/>
|
||||
<dependency org="org.apache.james" name="apache-mime4j-dom" rev="0.7" transitive="false"/>
|
||||
<dependency org="org.apache.commons" name="commons-compress" rev="1.3" transitive="false"/>
|
||||
<dependency org="org.apache.pdfbox" name="pdfbox" rev="1.6.0" transitive="false"/>
|
||||
<dependency org="org.apache.pdfbox" name="fontbox" rev="1.6.0" transitive="false"/>
|
||||
<dependency org="org.apache.pdfbox" name="jempbox" rev="1.6.0" transitive="false"/>
|
||||
<dependency org="org.apache.james" name="apache-mime4j-core" rev="0.7.2" transitive="false"/>
|
||||
<dependency org="org.apache.james" name="apache-mime4j-dom" rev="0.7.2" transitive="false"/>
|
||||
<dependency org="org.apache.commons" name="commons-compress" rev="1.4.1" transitive="false"/>
|
||||
<dependency org="org.apache.pdfbox" name="pdfbox" rev="1.7.0" transitive="false"/>
|
||||
<dependency org="org.apache.pdfbox" name="fontbox" rev="1.7.0" transitive="false"/>
|
||||
<dependency org="org.apache.pdfbox" name="jempbox" rev="1.7.0" transitive="false"/>
|
||||
<dependency org="org.bouncycastle" name="bcmail-jdk15" rev="1.45" transitive="false"/>
|
||||
<dependency org="org.bouncycastle" name="bcprov-jdk15" rev="1.45" transitive="false"/>
|
||||
<dependency org="org.apache.poi" name="poi" rev="3.8-beta5" transitive="false"/>
|
||||
<dependency org="org.apache.poi" name="poi-scratchpad" rev="3.8-beta5" transitive="false"/>
|
||||
<dependency org="org.apache.poi" name="poi-ooxml" rev="3.8-beta5" transitive="false"/>
|
||||
<dependency org="org.apache.poi" name="poi-ooxml-schemas" rev="3.8-beta5" transitive="false"/>
|
||||
<dependency org="org.apache.poi" name="poi" rev="3.8" transitive="false"/>
|
||||
<dependency org="org.apache.poi" name="poi-scratchpad" rev="3.8" transitive="false"/>
|
||||
<dependency org="org.apache.poi" name="poi-ooxml" rev="3.8" transitive="false"/>
|
||||
<dependency org="org.apache.poi" name="poi-ooxml-schemas" rev="3.8" transitive="false"/>
|
||||
<dependency org="org.apache.xmlbeans" name="xmlbeans" rev="2.3.0" transitive="false"/>
|
||||
<dependency org="dom4j" name="dom4j" rev="1.6.1" transitive="false"/>
|
||||
<dependency org="org.ccil.cowan.tagsoup" name="tagsoup" rev="1.2.1" transitive="false"/>
|
||||
<dependency org="com.googlecode.mp4parser" name="isoparser" rev="1.0-beta-5" transitive="false"/>
|
||||
<dependency org="net.sf.scannotation" name="scannotation" rev="1.0.2" transitive="false"/>
|
||||
<dependency org="javassist" name="javassist" rev="3.6.0.GA" transitive="false"/>
|
||||
<dependency org="com.googlecode.mp4parser" name="isoparser" rev="1.0-RC-1" transitive="false"/>
|
||||
<dependency org="com.drewnoakes" name="metadata-extractor" rev="2.4.0-beta-1" transitive="false"/>
|
||||
<dependency org="de.l3s.boilerpipe" name="boilerpipe" rev="1.1.0" transitive="false"/>
|
||||
<dependency org="rome" name="rome" rev="0.9" transitive="false"/>
|
||||
<dependency org="jdom" name="jdom" rev="1.0" transitive="false"/>
|
||||
<dependency org="com.googlecode.juniversalchardet" name="juniversalchardet" rev="1.0.3" transitive="false"/>
|
||||
<dependency org="org.tukaani" name="xz" rev="1.0" transitive="false"/>
|
||||
<!-- Other ExtracingRequestHandler dependencies -->
|
||||
<dependency org="com.ibm.icu" name="icu4j" rev="49.1" transitive="false"/>
|
||||
<dependency org="xerces" name="xercesImpl" rev="2.9.1" transitive="false"/>
|
||||
|
|
|
@ -64,8 +64,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
|
|||
"fmap.producer", "extractedProducer",
|
||||
"fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
|
||||
"fmap.Creation-Date", "extractedDate",
|
||||
"fmap.AAPL:Keywords", "ignored_a",
|
||||
"fmap.xmpTPg:NPages", "ignored_a",
|
||||
"uprefix", "ignored_",
|
||||
"fmap.Author", "extractedAuthor",
|
||||
"fmap.content", "extractedContent",
|
||||
"literal.id", "one",
|
||||
|
@ -81,6 +80,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
|
|||
"fmap.Author", "extractedAuthor",
|
||||
"fmap.language", "extractedLanguage",
|
||||
"literal.id", "two",
|
||||
"uprefix", "ignored_",
|
||||
"fmap.content", "extractedContent",
|
||||
"fmap.Last-Modified", "extractedDate"
|
||||
);
|
||||
|
@ -136,6 +136,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
|
|||
"fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
|
||||
"fmap.Author", "extractedAuthor",
|
||||
"literal.id", "three",
|
||||
"uprefix", "ignored_",
|
||||
"fmap.content", "extractedContent",
|
||||
"fmap.language", "extractedLanguage",
|
||||
"fmap.Last-Modified", "extractedDate"
|
||||
|
@ -206,6 +207,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
|
|||
"fmap.Author", "extractedAuthor",
|
||||
"fmap.content", "extractedContent",
|
||||
"literal.id", "one",
|
||||
"uprefix", "ignored_",
|
||||
"fmap.language", "extractedLanguage",
|
||||
"literal.extractionLiteralMV", "one",
|
||||
"literal.extractionLiteralMV", "two",
|
||||
|
@ -374,9 +376,8 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
|
|||
loadLocal("extraction/arabic.pdf", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer",
|
||||
"fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
|
||||
"fmap.Creation-Date", "extractedDate",
|
||||
"fmap.AAPL:Keywords", "ignored_a",
|
||||
"fmap.xmpTPg:NPages", "ignored_a",
|
||||
"fmap.Author", "extractedAuthor",
|
||||
"uprefix", "ignored_",
|
||||
"fmap.content", "wdf_nocase",
|
||||
"literal.id", "one",
|
||||
"fmap.Last-Modified", "extractedDate");
|
||||
|
@ -404,8 +405,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
|
|||
loadLocal("extraction/password-is-solrcell.docx", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer",
|
||||
"fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
|
||||
"fmap.Creation-Date", "extractedDate",
|
||||
"fmap.AAPL:Keywords", "ignored_a",
|
||||
"fmap.xmpTPg:NPages", "ignored_a",
|
||||
"uprefix", "ignored_",
|
||||
"fmap.Author", "extractedAuthor",
|
||||
"fmap.content", "wdf_nocase",
|
||||
"literal.id", "one",
|
||||
|
@ -462,8 +462,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
|
|||
"fmap.content", "extractedContent",
|
||||
"fmap.language", "extractedLanguage",
|
||||
"fmap.Creation-Date", "extractedDate",
|
||||
"fmap.AAPL:Keywords", "ignored_a",
|
||||
"fmap.xmpTPg:NPages", "ignored_a",
|
||||
"uprefix", "ignored_",
|
||||
"fmap.Last-Modified", "extractedDate");
|
||||
|
||||
// Here the literal value should override the Tika-parsed title:
|
||||
|
@ -478,8 +477,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
|
|||
"fmap.content", "extractedContent",
|
||||
"fmap.language", "extractedLanguage",
|
||||
"fmap.Creation-Date", "extractedDate",
|
||||
"fmap.AAPL:Keywords", "ignored_a",
|
||||
"fmap.xmpTPg:NPages", "ignored_a",
|
||||
"uprefix", "ignored_",
|
||||
"fmap.Last-Modified", "extractedDate");
|
||||
|
||||
// Here we mimic the old behaviour where literals are added, not overridden
|
||||
|
@ -498,8 +496,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
|
|||
"fmap.content", "extractedContent",
|
||||
"fmap.language", "extractedLanguage",
|
||||
"fmap.Creation-Date", "extractedDate",
|
||||
"fmap.AAPL:Keywords", "ignored_a",
|
||||
"fmap.xmpTPg:NPages", "ignored_a",
|
||||
"uprefix", "ignored_",
|
||||
"fmap.Last-Modified", "extractedDate");
|
||||
|
||||
assertU(commit());
|
||||
|
|
|
@ -125,6 +125,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
|
|||
}
|
||||
cacheValue.refCnt--;
|
||||
if (cacheValue.refCnt == 0 && cacheValue.doneWithDir) {
|
||||
log.info("Closing directory:" + cacheValue.path);
|
||||
directory.close();
|
||||
byDirectoryCache.remove(directory);
|
||||
byPathCache.remove(cacheValue.path);
|
||||
|
@ -194,6 +195,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
|
|||
|
||||
byDirectoryCache.put(directory, newCacheValue);
|
||||
byPathCache.put(fullPath, newCacheValue);
|
||||
log.info("return new directory for " + fullPath + " forceNew:" + forceNew);
|
||||
} else {
|
||||
cacheValue.refCnt++;
|
||||
}
|
||||
|
|
|
@ -1554,7 +1554,7 @@ public final class SolrCore implements SolrInfoMBean {
|
|||
} catch (Throwable e) {
|
||||
// do not allow decref() operations to fail since they are typically called in finally blocks
|
||||
// and throwing another exception would be very unexpected.
|
||||
SolrException.log(log, "Error closing searcher:", e);
|
||||
SolrException.log(log, "Error closing searcher:" + this, e);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -29,7 +29,7 @@ public class CSVRequestHandler extends UpdateRequestHandler {
|
|||
public void init(NamedList args) {
|
||||
super.init(args);
|
||||
setAssumeContentType("application/csv");
|
||||
log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler");
|
||||
// log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler");
|
||||
}
|
||||
|
||||
//////////////////////// SolrInfoMBeans methods //////////////////////
|
||||
|
|
|
@ -29,7 +29,7 @@ public class JsonUpdateRequestHandler extends UpdateRequestHandler {
|
|||
public void init(NamedList args) {
|
||||
super.init(args);
|
||||
setAssumeContentType("application/json");
|
||||
log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler");
|
||||
// log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler");
|
||||
}
|
||||
|
||||
//////////////////////// SolrInfoMBeans methods //////////////////////
|
||||
|
|
|
@ -384,7 +384,7 @@ public class SnapPuller {
|
|||
// may be closed
|
||||
core.getDirectoryFactory().doneWithDirectory(oldDirectory);
|
||||
}
|
||||
doCommit();
|
||||
doCommit(isFullCopyNeeded);
|
||||
}
|
||||
|
||||
replicationStartTime = 0;
|
||||
|
@ -533,11 +533,11 @@ public class SnapPuller {
|
|||
return sb;
|
||||
}
|
||||
|
||||
private void doCommit() throws IOException {
|
||||
private void doCommit(boolean isFullCopyNeeded) throws IOException {
|
||||
SolrQueryRequest req = new LocalSolrQueryRequest(solrCore,
|
||||
new ModifiableSolrParams());
|
||||
// reboot the writer on the new index and get a new searcher
|
||||
solrCore.getUpdateHandler().newIndexWriter(true);
|
||||
solrCore.getUpdateHandler().newIndexWriter(isFullCopyNeeded);
|
||||
|
||||
try {
|
||||
// first try to open an NRT searcher so that the new
|
||||
|
|
|
@ -182,8 +182,8 @@ abstract class AbstractStatsValues<T> implements StatsValues {
|
|||
for (Map.Entry<String, StatsValues> e2 : entry.getValue().entrySet()) {
|
||||
nl2.add(e2.getKey(), e2.getValue().getStatsValues());
|
||||
}
|
||||
res.add(FACETS, nl);
|
||||
}
|
||||
res.add(FACETS, nl);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
|
@ -74,8 +74,7 @@ public final class DefaultSolrCoreState extends SolrCoreState {
|
|||
}
|
||||
|
||||
if (indexWriter == null) {
|
||||
indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2",
|
||||
false, false);
|
||||
indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2", false);
|
||||
}
|
||||
if (refCntWriter == null) {
|
||||
refCntWriter = new RefCounted<IndexWriter>(indexWriter) {
|
||||
|
@ -113,15 +112,25 @@ public final class DefaultSolrCoreState extends SolrCoreState {
|
|||
|
||||
try {
|
||||
if (indexWriter != null) {
|
||||
if (!rollback) {
|
||||
try {
|
||||
log.info("Closing old IndexWriter... core=" + coreName);
|
||||
indexWriter.close();
|
||||
} catch (Throwable t) {
|
||||
SolrException.log(log, "Error closing old IndexWriter. core=" + coreName, t);
|
||||
SolrException.log(log, "Error closing old IndexWriter. core="
|
||||
+ coreName, t);
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
log.info("Rollback old IndexWriter... core=" + coreName);
|
||||
indexWriter.rollback();
|
||||
} catch (Throwable t) {
|
||||
SolrException.log(log, "Error rolling back old IndexWriter. core="
|
||||
+ coreName, t);
|
||||
}
|
||||
}
|
||||
indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2",
|
||||
false, true);
|
||||
}
|
||||
indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2", true);
|
||||
log.info("New IndexWriter is ready to be used.");
|
||||
// we need to null this so it picks up the new writer next get call
|
||||
refCntWriter = null;
|
||||
|
@ -174,14 +183,12 @@ public final class DefaultSolrCoreState extends SolrCoreState {
|
|||
|
||||
@Override
|
||||
public synchronized void rollbackIndexWriter(SolrCore core) throws IOException {
|
||||
indexWriter.rollback();
|
||||
newIndexWriter(core, true);
|
||||
}
|
||||
|
||||
protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name,
|
||||
boolean removeAllExisting, boolean forceNewDirectory) throws IOException {
|
||||
protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name, boolean forceNewDirectory) throws IOException {
|
||||
return new SolrIndexWriter(name, core.getNewIndexDir(),
|
||||
core.getDirectoryFactory(), removeAllExisting, core.getSchema(),
|
||||
core.getDirectoryFactory(), false, core.getSchema(),
|
||||
core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec(), forceNewDirectory);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update;
|
||||
|
||||
import org.apache.solr.common.util.FastOutputStream;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/** @lucene.internal */
|
||||
public class MemOutputStream extends FastOutputStream {
|
||||
public List<byte[]> buffers = new LinkedList<byte[]>();
|
||||
public MemOutputStream(byte[] tempBuffer) {
|
||||
super(null, tempBuffer, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush(byte[] arr, int offset, int len) throws IOException {
|
||||
if (arr == buf && offset==0 && len==buf.length) {
|
||||
buffers.add(buf); // steal the buffer
|
||||
buf = new byte[8192];
|
||||
} else if (len > 0) {
|
||||
byte[] newBuf = new byte[len];
|
||||
System.arraycopy(arr, offset, newBuf, 0, len);
|
||||
buffers.add(newBuf);
|
||||
}
|
||||
}
|
||||
|
||||
public void writeAll(FastOutputStream fos) throws IOException {
|
||||
for (byte[] buffer : buffers) {
|
||||
fos.write(buffer);
|
||||
}
|
||||
if (pos > 0) {
|
||||
fos.write(buf, 0, pos);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -141,6 +141,8 @@ public class SolrIndexWriter extends IndexWriter {
|
|||
super.rollback();
|
||||
} finally {
|
||||
isClosed = true;
|
||||
directoryFactory.release(getDirectory());
|
||||
numCloses.incrementAndGet();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -775,31 +775,3 @@ class ChannelFastInputStream extends FastInputStream {
|
|||
}
|
||||
|
||||
|
||||
class MemOutputStream extends FastOutputStream {
|
||||
public List<byte[]> buffers = new LinkedList<byte[]>();
|
||||
public MemOutputStream(byte[] tempBuffer) {
|
||||
super(null, tempBuffer, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush(byte[] arr, int offset, int len) throws IOException {
|
||||
if (arr == buf && offset==0 && len==buf.length) {
|
||||
buffers.add(buf); // steal the buffer
|
||||
buf = new byte[8192];
|
||||
} else if (len > 0) {
|
||||
byte[] newBuf = new byte[len];
|
||||
System.arraycopy(arr, offset, newBuf, 0, len);
|
||||
buffers.add(newBuf);
|
||||
}
|
||||
}
|
||||
|
||||
public void writeAll(FastOutputStream fos) throws IOException {
|
||||
for (byte[] buffer : buffers) {
|
||||
fos.write(buffer);
|
||||
}
|
||||
if (pos > 0) {
|
||||
fos.write(buf, 0, pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ public class FastWriter extends Writer {
|
|||
// it won't cause double buffering.
|
||||
private static final int BUFSIZE = 8192;
|
||||
protected final Writer sink;
|
||||
protected final char[] buf;
|
||||
protected char[] buf;
|
||||
protected int pos;
|
||||
|
||||
public FastWriter(Writer w) {
|
||||
|
@ -69,42 +69,64 @@ public class FastWriter extends Writer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void write(char cbuf[], int off, int len) throws IOException {
|
||||
public void write(char arr[], int off, int len) throws IOException {
|
||||
for(;;) {
|
||||
int space = buf.length - pos;
|
||||
if (len < space) {
|
||||
System.arraycopy(cbuf, off, buf, pos, len);
|
||||
|
||||
if (len <= space) {
|
||||
System.arraycopy(arr, off, buf, pos, len);
|
||||
pos += len;
|
||||
} else if (len<BUFSIZE) {
|
||||
// if the data to write is small enough, buffer it.
|
||||
System.arraycopy(cbuf, off, buf, pos, space);
|
||||
flush(buf, 0, buf.length);
|
||||
pos = len-space;
|
||||
System.arraycopy(cbuf, off+space, buf, 0, pos);
|
||||
} else {
|
||||
return;
|
||||
} else if (len > buf.length) {
|
||||
if (pos>0) {
|
||||
flush(buf,0,pos); // flush
|
||||
pos=0;
|
||||
}
|
||||
// don't buffer, just write to sink
|
||||
flush(cbuf, off, len);
|
||||
flush(arr, off, len);
|
||||
return;
|
||||
}
|
||||
|
||||
// buffer is too big to fit in the free space, but
|
||||
// not big enough to warrant writing on its own.
|
||||
// write whatever we can fit, then flush and iterate.
|
||||
|
||||
System.arraycopy(arr, off, buf, pos, space);
|
||||
flush(buf, 0, buf.length);
|
||||
pos = 0;
|
||||
off += space;
|
||||
len -= space;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(String str, int off, int len) throws IOException {
|
||||
for(;;) {
|
||||
int space = buf.length - pos;
|
||||
if (len < space) {
|
||||
|
||||
if (len <= space) {
|
||||
str.getChars(off, off+len, buf, pos);
|
||||
pos += len;
|
||||
} else if (len<BUFSIZE) {
|
||||
// if the data to write is small enough, buffer it.
|
||||
str.getChars(off, off+space, buf, pos);
|
||||
flush(buf, 0, buf.length);
|
||||
str.getChars(off+space, off+len, buf, 0);
|
||||
pos = len-space;
|
||||
} else {
|
||||
return;
|
||||
} else if (len > buf.length) {
|
||||
if (pos>0) {
|
||||
flush(buf,0,pos); // flush
|
||||
pos=0;
|
||||
}
|
||||
// don't buffer, just write to sink
|
||||
flush(str, off, len);
|
||||
return;
|
||||
}
|
||||
|
||||
// buffer is too big to fit in the free space, but
|
||||
// not big enough to warrant writing on its own.
|
||||
// write whatever we can fit, then flush and iterate.
|
||||
|
||||
str.getChars(off, off+space, buf, pos);
|
||||
flush(buf, 0, buf.length);
|
||||
pos = 0;
|
||||
off += space;
|
||||
len -= space;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,49 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Welcome to Solr</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
Here is some text
|
||||
</p>
|
||||
<div>Here is some text in a div</div>
|
||||
<div>This has a <a href="http://www.apache.org">link</a>.</div>
|
||||
<a href="#news">News</a>
|
||||
<ul class="minitoc">
|
||||
<li>
|
||||
<a href="#03+October+2008+-+Solr+Logo+Contest">03 October 2008 - Solr Logo Contest</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#15+September+2008+-+Solr+1.3.0+Available">15 September 2008 - Solr 1.3.0 Available</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#28+August+2008+-+Lucene%2FSolr+at+ApacheCon+New+Orleans">28 August 2008 - Lucene/Solr at ApacheCon New Orleans</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#03+September+2007+-+Lucene+at+ApacheCon+Atlanta">03 September 2007 - Lucene at ApacheCon Atlanta</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#06+June+2007%3A+Release+1.2+available">06 June 2007: Release 1.2 available</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#17+January+2007%3A+Solr+graduates+from+Incubator">17 January 2007: Solr graduates from Incubator</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#22+December+2006%3A+Release+1.1.0+available">22 December 2006: Release 1.1.0 available</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#15+August+2006%3A+Solr+at+ApacheCon+US">15 August 2006: Solr at ApacheCon US</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#21+April+2006%3A+Solr+at+ApacheCon">21 April 2006: Solr at ApacheCon</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#21+February+2006%3A+nightly+builds">21 February 2006: nightly builds</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#17+January+2006%3A+Solr+Joins+Apache+Incubator">17 January 2006: Solr Joins Apache Incubator</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,3 @@
|
|||
Example text document
|
||||
|
||||
This is a simple example for a plain text document, indexed to Solr
|
|
@ -54,7 +54,7 @@
|
|||
-->
|
||||
<maxBufferedDocs>10</maxBufferedDocs>
|
||||
<mergePolicy class="org.apache.lucene.index.LogDocMergePolicy"/>
|
||||
<lockType>single</lockType>
|
||||
<lockType>native</lockType>
|
||||
<unlockOnStartup>true</unlockOnStartup>
|
||||
</indexConfig>
|
||||
|
||||
|
|
|
@ -228,32 +228,35 @@ public class StatsComponentTest extends AbstractSolrTestCase {
|
|||
}
|
||||
|
||||
public void doTestFacetStatisticsResult(String f) throws Exception {
|
||||
assertU(adoc("id", "1", f, "10", "active_s", "true"));
|
||||
assertU(adoc("id", "2", f, "20", "active_s", "true"));
|
||||
assertU(adoc("id", "3", f, "30", "active_s", "false"));
|
||||
assertU(adoc("id", "4", f, "40", "active_s", "false"));
|
||||
assertU(adoc("id", "1", f, "10", "active_s", "true", "other_s", "foo"));
|
||||
assertU(adoc("id", "2", f, "20", "active_s", "true", "other_s", "bar"));
|
||||
assertU(adoc("id", "3", f, "30", "active_s", "false", "other_s", "foo"));
|
||||
assertU(adoc("id", "4", f, "40", "active_s", "false", "other_s", "foo"));
|
||||
assertU(commit());
|
||||
|
||||
assertQ("test value for active_s=true", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s","indent","true")
|
||||
, "//lst[@name='true']/double[@name='min'][.='10.0']"
|
||||
, "//lst[@name='true']/double[@name='max'][.='20.0']"
|
||||
, "//lst[@name='true']/double[@name='sum'][.='30.0']"
|
||||
, "//lst[@name='true']/long[@name='count'][.='2']"
|
||||
, "//lst[@name='true']/long[@name='missing'][.='0']"
|
||||
, "//lst[@name='true']/double[@name='sumOfSquares'][.='500.0']"
|
||||
, "//lst[@name='true']/double[@name='mean'][.='15.0']"
|
||||
, "//lst[@name='true']/double[@name='stddev'][.='7.0710678118654755']"
|
||||
final String pre = "//lst[@name='stats_fields']/lst[@name='"+f+"']/lst[@name='facets']/lst[@name='active_s']";
|
||||
|
||||
assertQ("test value for active_s=true", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s","stats.facet","other_s","indent","true")
|
||||
, "*[count("+pre+")=1]"
|
||||
, pre+"/lst[@name='true']/double[@name='min'][.='10.0']"
|
||||
, pre+"/lst[@name='true']/double[@name='max'][.='20.0']"
|
||||
, pre+"/lst[@name='true']/double[@name='sum'][.='30.0']"
|
||||
, pre+"/lst[@name='true']/long[@name='count'][.='2']"
|
||||
, pre+"/lst[@name='true']/long[@name='missing'][.='0']"
|
||||
, pre+"/lst[@name='true']/double[@name='sumOfSquares'][.='500.0']"
|
||||
, pre+"/lst[@name='true']/double[@name='mean'][.='15.0']"
|
||||
, pre+"/lst[@name='true']/double[@name='stddev'][.='7.0710678118654755']"
|
||||
);
|
||||
|
||||
assertQ("test value for active_s=false", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s")
|
||||
, "//lst[@name='false']/double[@name='min'][.='30.0']"
|
||||
, "//lst[@name='false']/double[@name='max'][.='40.0']"
|
||||
, "//lst[@name='false']/double[@name='sum'][.='70.0']"
|
||||
, "//lst[@name='false']/long[@name='count'][.='2']"
|
||||
, "//lst[@name='false']/long[@name='missing'][.='0']"
|
||||
, "//lst[@name='false']/double[@name='sumOfSquares'][.='2500.0']"
|
||||
, "//lst[@name='false']/double[@name='mean'][.='35.0']"
|
||||
, "//lst[@name='false']/double[@name='stddev'][.='7.0710678118654755']"
|
||||
, pre+"/lst[@name='false']/double[@name='min'][.='30.0']"
|
||||
, pre+"/lst[@name='false']/double[@name='max'][.='40.0']"
|
||||
, pre+"/lst[@name='false']/double[@name='sum'][.='70.0']"
|
||||
, pre+"/lst[@name='false']/long[@name='count'][.='2']"
|
||||
, pre+"/lst[@name='false']/long[@name='missing'][.='0']"
|
||||
, pre+"/lst[@name='false']/double[@name='sumOfSquares'][.='2500.0']"
|
||||
, pre+"/lst[@name='false']/double[@name='mean'][.='35.0']"
|
||||
, pre+"/lst[@name='false']/double[@name='stddev'][.='7.0710678118654755']"
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,237 @@
|
|||
package org.apache.solr.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.util.SimplePostTool.PageFetcher;
|
||||
import org.apache.solr.util.SimplePostTool.PageFetcherResult;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
public class SimplePostToolTest extends SolrTestCaseJ4 {
|
||||
SimplePostTool t_file, t_file_auto, t_file_rec, t_web, t_test;
|
||||
PageFetcher pf;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
String[] args = {"-"};
|
||||
System.setProperty("data", "files");
|
||||
t_file = SimplePostTool.parseArgsAndInit(args);
|
||||
|
||||
System.setProperty("auto", "yes");
|
||||
t_file_auto = SimplePostTool.parseArgsAndInit(args);
|
||||
|
||||
System.setProperty("recursive", "yes");
|
||||
t_file_rec = SimplePostTool.parseArgsAndInit(args);
|
||||
|
||||
System.setProperty("data", "web");
|
||||
t_web = SimplePostTool.parseArgsAndInit(args);
|
||||
|
||||
System.setProperty("params", "param1=foo¶m2=bar");
|
||||
t_test = SimplePostTool.parseArgsAndInit(args);
|
||||
|
||||
pf = new MockPageFetcher();
|
||||
SimplePostTool.pageFetcher = pf;
|
||||
SimplePostTool.mockMode = true;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseArgsAndInit() {
|
||||
assertEquals(false, t_file.auto);
|
||||
assertEquals(true, t_file_auto.auto);
|
||||
assertEquals(0, t_file_auto.recursive);
|
||||
assertEquals(999, t_file_rec.recursive);
|
||||
assertEquals(true, t_file.commit);
|
||||
assertEquals(false, t_file.optimize);
|
||||
assertEquals(null, t_file.out);
|
||||
|
||||
assertEquals(1, t_web.recursive);
|
||||
assertEquals(10, t_web.delay);
|
||||
|
||||
assertNotNull(t_test.solrUrl);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNormalizeUrlEnding() {
|
||||
assertEquals("http://example.com", SimplePostTool.normalizeUrlEnding("http://example.com/"));
|
||||
assertEquals("http://example.com", SimplePostTool.normalizeUrlEnding("http://example.com/#foo?bar=baz"));
|
||||
assertEquals("http://example.com/index.html", SimplePostTool.normalizeUrlEnding("http://example.com/index.html#hello"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComputeFullUrl() throws MalformedURLException {
|
||||
assertEquals("http://example.com/index.html", t_web.computeFullUrl(new URL("http://example.com/"), "/index.html"));
|
||||
assertEquals("http://example.com/index.html", t_web.computeFullUrl(new URL("http://example.com/foo/bar/"), "/index.html"));
|
||||
assertEquals("http://example.com/fil.html", t_web.computeFullUrl(new URL("http://example.com/foo.htm?baz#hello"), "fil.html"));
|
||||
// TODO: How to know what is the base if URL path ends with "foo"??
|
||||
// assertEquals("http://example.com/fil.html", t_web.computeFullUrl(new URL("http://example.com/foo?baz#hello"), "fil.html"));
|
||||
assertEquals(null, t_web.computeFullUrl(new URL("http://example.com/"), "fil.jpg"));
|
||||
assertEquals(null, t_web.computeFullUrl(new URL("http://example.com/"), "mailto:hello@foo.bar"));
|
||||
assertEquals(null, t_web.computeFullUrl(new URL("http://example.com/"), "ftp://server/file"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTypeSupported() {
|
||||
assertTrue(t_web.typeSupported("application/pdf"));
|
||||
assertTrue(t_web.typeSupported("text/xml"));
|
||||
assertFalse(t_web.typeSupported("text/foo"));
|
||||
|
||||
t_web.fileTypes = "doc,xls,ppt";
|
||||
t_web.globFileFilter = t_web.getFileFilterFromFileTypes(t_web.fileTypes);
|
||||
assertFalse(t_web.typeSupported("application/pdf"));
|
||||
assertTrue(t_web.typeSupported("application/msword"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIsOn() {
|
||||
assertTrue(SimplePostTool.isOn("true"));
|
||||
assertTrue(SimplePostTool.isOn("1"));
|
||||
assertFalse(SimplePostTool.isOn("off"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAppendParam() {
|
||||
assertEquals("http://example.com?foo=bar", SimplePostTool.appendParam("http://example.com", "foo=bar"));
|
||||
assertEquals("http://example.com/?a=b&foo=bar", SimplePostTool.appendParam("http://example.com/?a=b", "foo=bar"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAppendUrlPath() throws MalformedURLException {
|
||||
assertEquals(new URL("http://example.com/a?foo=bar"), SimplePostTool.appendUrlPath(new URL("http://example.com?foo=bar"), "/a"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGuessType() {
|
||||
File f = new File("foo.doc");
|
||||
assertEquals("application/msword", SimplePostTool.guessType(f));
|
||||
f = new File("foobar");
|
||||
assertEquals(null, SimplePostTool.guessType(f));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDoFilesMode() {
|
||||
t_file_auto.recursive = 0;
|
||||
File dir = getFile("exampledocs");
|
||||
int num = t_file_auto.postFiles(new File[] {dir}, 0, null, null);
|
||||
assertEquals(2, num);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDoWebMode() {
|
||||
// Uses mock pageFetcher
|
||||
t_web.delay = 0;
|
||||
t_web.recursive = 5;
|
||||
int num = t_web.postWebPages(new String[] {"http://example.com/#removeme"}, 0, null);
|
||||
assertEquals(5, num);
|
||||
|
||||
t_web.recursive = 1;
|
||||
num = t_web.postWebPages(new String[] {"http://example.com/"}, 0, null);
|
||||
assertEquals(3, num);
|
||||
|
||||
// Without respecting robots.txt
|
||||
SimplePostTool.pageFetcher.robotsCache.clear();
|
||||
t_web.recursive = 5;
|
||||
num = t_web.postWebPages(new String[] {"http://example.com/#removeme"}, 0, null);
|
||||
assertEquals(6, num);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRobotsExclusion() throws MalformedURLException {
|
||||
assertFalse(SimplePostTool.pageFetcher.isDisallowedByRobots(new URL("http://example.com/")));
|
||||
assertTrue(SimplePostTool.pageFetcher.isDisallowedByRobots(new URL("http://example.com/disallowed")));
|
||||
assertTrue("There should be two entries parsed from robots.txt", SimplePostTool.pageFetcher.robotsCache.get("example.com").size() == 2);
|
||||
}
|
||||
|
||||
class MockPageFetcher extends PageFetcher {
|
||||
HashMap<String,String> htmlMap = new HashMap<String,String>();
|
||||
HashMap<String,Set<URL>> linkMap = new HashMap<String,Set<URL>>();
|
||||
|
||||
public MockPageFetcher() throws IOException {
|
||||
(new SimplePostTool()).super();
|
||||
htmlMap.put("http://example.com", "<html><body><a href=\"http://example.com/page1\">page1</a><a href=\"http://example.com/page2\">page2</a></body></html>");
|
||||
htmlMap.put("http://example.com/index.html", "<html><body><a href=\"http://example.com/page1\">page1</a><a href=\"http://example.com/page2\">page2</a></body></html>");
|
||||
htmlMap.put("http://example.com/page1", "<html><body><a href=\"http://example.com/page1/foo\"></body></html>");
|
||||
htmlMap.put("http://example.com/page1/foo", "<html><body><a href=\"http://example.com/page1/foo/bar\"></body></html>");
|
||||
htmlMap.put("http://example.com/page1/foo/bar", "<html><body><a href=\"http://example.com/page1\"></body></html>");
|
||||
htmlMap.put("http://example.com/page2", "<html><body><a href=\"http://example.com/\"><a href=\"http://example.com/disallowed\"/></body></html>");
|
||||
htmlMap.put("http://example.com/disallowed", "<html><body><a href=\"http://example.com/\"></body></html>");
|
||||
|
||||
Set<URL> s = new HashSet<URL>();
|
||||
s.add(new URL("http://example.com/page1"));
|
||||
s.add(new URL("http://example.com/page2"));
|
||||
linkMap.put("http://example.com", s);
|
||||
linkMap.put("http://example.com/index.html", s);
|
||||
s = new HashSet<URL>();
|
||||
s.add(new URL("http://example.com/page1/foo"));
|
||||
linkMap.put("http://example.com/page1", s);
|
||||
s = new HashSet<URL>();
|
||||
s.add(new URL("http://example.com/page1/foo/bar"));
|
||||
linkMap.put("http://example.com/page1/foo", s);
|
||||
s = new HashSet<URL>();
|
||||
s.add(new URL("http://example.com/disallowed"));
|
||||
linkMap.put("http://example.com/page2", s);
|
||||
|
||||
// Simulate a robots.txt file with comments and a few disallows
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("# Comments appear after the \"#\" symbol at the start of a line, or after a directive\n");
|
||||
sb.append("User-agent: * # match all bots\n");
|
||||
sb.append("Disallow: # This is void\n");
|
||||
sb.append("Disallow: /disallow # Disallow this path\n");
|
||||
sb.append("Disallow: /nonexistingpath # Disallow this path\n");
|
||||
this.robotsCache.put("example.com", SimplePostTool.pageFetcher.
|
||||
parseRobotsTxt(new ByteArrayInputStream(sb.toString().getBytes("UTF-8"))));
|
||||
}
|
||||
|
||||
@Override
|
||||
public PageFetcherResult readPageFromUrl(URL u) {
|
||||
PageFetcherResult res = (new SimplePostTool()).new PageFetcherResult();
|
||||
if (isDisallowedByRobots(u)) {
|
||||
res.httpStatus = 403;
|
||||
return res;
|
||||
}
|
||||
res.httpStatus = 200;
|
||||
res.contentType = "text/html";
|
||||
try {
|
||||
res.content = htmlMap.get(u.toString()).getBytes("UTF-8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new RuntimeException();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<URL> getLinksFromWebPage(URL u, InputStream is, String type, URL postUrl) {
|
||||
Set<URL> s = linkMap.get(SimplePostTool.normalizeUrlEnding(u.toString()));
|
||||
if(s == null)
|
||||
s = new HashSet<URL>();
|
||||
return s;
|
||||
}
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue