mirror of https://github.com/apache/lucene.git
merge trunk (1364720-1364799)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1364800 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
33f6da286e
|
@ -896,7 +896,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// w.close();
|
// w.close();
|
||||||
// }
|
// }
|
||||||
} else {
|
} else {
|
||||||
assert sumTotalTermFreq == 0;
|
assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY && sumTotalTermFreq == -1;
|
||||||
assert sumDocFreq == 0;
|
assert sumDocFreq == 0;
|
||||||
assert docCount == 0;
|
assert docCount == 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,14 +49,17 @@ import org.apache.lucene.util.FixedBitSet;
|
||||||
*/
|
*/
|
||||||
public abstract class PostingsConsumer {
|
public abstract class PostingsConsumer {
|
||||||
|
|
||||||
/** Adds a new doc in this term. */
|
/** Adds a new doc in this term.
|
||||||
|
* <code>freq</code> will be -1 when term frequencies are omitted
|
||||||
|
* for the field. */
|
||||||
public abstract void startDoc(int docID, int freq) throws IOException;
|
public abstract void startDoc(int docID, int freq) throws IOException;
|
||||||
|
|
||||||
/** Add a new position & payload, and start/end offset. A
|
/** Add a new position & payload, and start/end offset. A
|
||||||
* null payload means no payload; a non-null payload with
|
* null payload means no payload; a non-null payload with
|
||||||
* zero length also means no payload. Caller may reuse
|
* zero length also means no payload. Caller may reuse
|
||||||
* the {@link BytesRef} for the payload between calls
|
* the {@link BytesRef} for the payload between calls
|
||||||
* (method must fully consume the payload). */
|
* (method must fully consume the payload). <code>startOffset</code>
|
||||||
|
* and <code>endOffset</code> will be -1 when offsets are not indexed. */
|
||||||
public abstract void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException;
|
public abstract void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException;
|
||||||
|
|
||||||
/** Called when we are done adding positions & payloads
|
/** Called when we are done adding positions & payloads
|
||||||
|
@ -78,7 +81,7 @@ public abstract class PostingsConsumer {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
visitedDocs.set(doc);
|
visitedDocs.set(doc);
|
||||||
this.startDoc(doc, 0);
|
this.startDoc(doc, -1);
|
||||||
this.finishDoc();
|
this.finishDoc();
|
||||||
df++;
|
df++;
|
||||||
}
|
}
|
||||||
|
@ -146,6 +149,6 @@ public abstract class PostingsConsumer {
|
||||||
df++;
|
df++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new TermStats(df, totTF);
|
return new TermStats(df, indexOptions == IndexOptions.DOCS_ONLY ? -1 : totTF);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,10 +57,14 @@ public abstract class TermsConsumer {
|
||||||
* no docs. */
|
* no docs. */
|
||||||
public abstract PostingsConsumer startTerm(BytesRef text) throws IOException;
|
public abstract PostingsConsumer startTerm(BytesRef text) throws IOException;
|
||||||
|
|
||||||
/** Finishes the current term; numDocs must be > 0. */
|
/** Finishes the current term; numDocs must be > 0.
|
||||||
|
* <code>stats.totalTermFreq</code> will be -1 when term
|
||||||
|
* frequencies are omitted for the field. */
|
||||||
public abstract void finishTerm(BytesRef text, TermStats stats) throws IOException;
|
public abstract void finishTerm(BytesRef text, TermStats stats) throws IOException;
|
||||||
|
|
||||||
/** Called when we are done adding terms to this field */
|
/** Called when we are done adding terms to this field.
|
||||||
|
* <code>sumTotalTermFreq</code> will be -1 when term
|
||||||
|
* frequencies are omitted for the field. */
|
||||||
public abstract void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException;
|
public abstract void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException;
|
||||||
|
|
||||||
/** Return the BytesRef Comparator used to sort terms
|
/** Return the BytesRef Comparator used to sort terms
|
||||||
|
@ -205,6 +209,6 @@ public abstract class TermsConsumer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
|
finish(indexOptions == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -430,7 +430,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
if (readTermFreq) {
|
if (readTermFreq) {
|
||||||
termDocFreq = postings.docFreqs[termID];
|
termDocFreq = postings.docFreqs[termID];
|
||||||
} else {
|
} else {
|
||||||
termDocFreq = 0;
|
termDocFreq = -1;
|
||||||
}
|
}
|
||||||
postings.lastDocCodes[termID] = -1;
|
postings.lastDocCodes[termID] = -1;
|
||||||
} else {
|
} else {
|
||||||
|
@ -441,7 +441,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
final int code = freq.readVInt();
|
final int code = freq.readVInt();
|
||||||
if (!readTermFreq) {
|
if (!readTermFreq) {
|
||||||
docID += code;
|
docID += code;
|
||||||
termDocFreq = 0;
|
termDocFreq = -1;
|
||||||
} else {
|
} else {
|
||||||
docID += code >>> 1;
|
docID += code >>> 1;
|
||||||
if ((code & 1) != 0) {
|
if ((code & 1) != 0) {
|
||||||
|
@ -469,7 +469,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
// 2nd sweep does the real flush, but I suspect
|
// 2nd sweep does the real flush, but I suspect
|
||||||
// that'd add too much time to flush.
|
// that'd add too much time to flush.
|
||||||
visitedDocs.set(docID);
|
visitedDocs.set(docID);
|
||||||
postingsConsumer.startDoc(docID, termDocFreq);
|
postingsConsumer.startDoc(docID, writeTermFreq ? termDocFreq : -1);
|
||||||
if (docID < delDocLimit) {
|
if (docID < delDocLimit) {
|
||||||
// Mark it deleted. TODO: we could also skip
|
// Mark it deleted. TODO: we could also skip
|
||||||
// writing its postings; this would be
|
// writing its postings; this would be
|
||||||
|
@ -542,11 +542,11 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
}
|
}
|
||||||
postingsConsumer.finishDoc();
|
postingsConsumer.finishDoc();
|
||||||
}
|
}
|
||||||
termsConsumer.finishTerm(text, new TermStats(numDocs, totTF));
|
termsConsumer.finishTerm(text, new TermStats(numDocs, writeTermFreq ? totTF : -1));
|
||||||
sumTotalTermFreq += totTF;
|
sumTotalTermFreq += totTF;
|
||||||
sumDocFreq += numDocs;
|
sumDocFreq += numDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
termsConsumer.finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
|
termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -116,7 +116,7 @@ public class TestCodecs extends LuceneTestCase {
|
||||||
sumDF += term.docs.length;
|
sumDF += term.docs.length;
|
||||||
sumTotalTermCount += term.write(termsConsumer);
|
sumTotalTermCount += term.write(termsConsumer);
|
||||||
}
|
}
|
||||||
termsConsumer.finish(sumTotalTermCount, sumDF, (int) visitedDocs.cardinality());
|
termsConsumer.finish(omitTF ? -1 : sumTotalTermCount, sumDF, (int) visitedDocs.cardinality());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -154,7 +154,7 @@ public class TestCodecs extends LuceneTestCase {
|
||||||
for(int i=0;i<docs.length;i++) {
|
for(int i=0;i<docs.length;i++) {
|
||||||
final int termDocFreq;
|
final int termDocFreq;
|
||||||
if (field.omitTF) {
|
if (field.omitTF) {
|
||||||
termDocFreq = 0;
|
termDocFreq = -1;
|
||||||
} else {
|
} else {
|
||||||
termDocFreq = positions[i].length;
|
termDocFreq = positions[i].length;
|
||||||
}
|
}
|
||||||
|
@ -165,10 +165,10 @@ public class TestCodecs extends LuceneTestCase {
|
||||||
final PositionData pos = positions[i][j];
|
final PositionData pos = positions[i][j];
|
||||||
postingsConsumer.addPosition(pos.pos, pos.payload, -1, -1);
|
postingsConsumer.addPosition(pos.pos, pos.payload, -1, -1);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
postingsConsumer.finishDoc();
|
postingsConsumer.finishDoc();
|
||||||
}
|
}
|
||||||
}
|
termsConsumer.finishTerm(text, new TermStats(docs.length, field.omitTF ? -1 : totTF));
|
||||||
termsConsumer.finishTerm(text, new TermStats(docs.length, totTF));
|
|
||||||
return totTF;
|
return totTF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -406,7 +406,7 @@ public class TestPostingsFormat extends LuceneTestCase {
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println(" " + docCount + ": docID=" + posting.docID + " freq=" + posting.positions.size());
|
System.out.println(" " + docCount + ": docID=" + posting.docID + " freq=" + posting.positions.size());
|
||||||
}
|
}
|
||||||
postingsConsumer.startDoc(posting.docID, posting.positions.size());
|
postingsConsumer.startDoc(posting.docID, doFreq ? posting.positions.size() : -1);
|
||||||
seenDocs.set(posting.docID);
|
seenDocs.set(posting.docID);
|
||||||
if (doPos) {
|
if (doPos) {
|
||||||
totalTF += posting.positions.size();
|
totalTF += posting.positions.size();
|
||||||
|
@ -428,12 +428,12 @@ public class TestPostingsFormat extends LuceneTestCase {
|
||||||
postingsConsumer.finishDoc();
|
postingsConsumer.finishDoc();
|
||||||
docCount++;
|
docCount++;
|
||||||
}
|
}
|
||||||
termsConsumer.finishTerm(term, new TermStats(postings.size(), totalTF));
|
termsConsumer.finishTerm(term, new TermStats(postings.size(), doFreq ? totalTF : -1));
|
||||||
sumTotalTF += totalTF;
|
sumTotalTF += totalTF;
|
||||||
sumDF += postings.size();
|
sumDF += postings.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
termsConsumer.finish(sumTotalTF, sumDF, seenDocs.cardinality());
|
termsConsumer.finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.cardinality());
|
||||||
}
|
}
|
||||||
|
|
||||||
fieldsConsumer.close();
|
fieldsConsumer.close();
|
||||||
|
|
|
@ -28,10 +28,26 @@ import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.spatial.query.SpatialArgs;
|
import org.apache.lucene.spatial.query.SpatialArgs;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The SpatialStrategy encapsulates an approach to indexing and searching based on shapes.
|
* The SpatialStrategy encapsulates an approach to indexing and searching based
|
||||||
|
* on shapes.
|
||||||
* <p/>
|
* <p/>
|
||||||
* Note that a SpatialStrategy is not involved with the Lucene stored field values of shapes, which is
|
* Different implementations will support different features. A strategy should
|
||||||
* immaterial to indexing & search.
|
* document these common elements:
|
||||||
|
* <ul>
|
||||||
|
* <li>Can it index more than one shape per field?</li>
|
||||||
|
* <li>What types of shapes can be indexed?</li>
|
||||||
|
* <li>What types of query shapes can be used?</li>
|
||||||
|
* <li>What types of query operations are supported?
|
||||||
|
* This might vary per shape.</li>
|
||||||
|
* <li>Are there caches? Under what circumstances are they used?
|
||||||
|
* Roughly how big are they? Is it segmented by Lucene segments, such as is
|
||||||
|
* done by the Lucene {@link org.apache.lucene.search.FieldCache} and
|
||||||
|
* {@link org.apache.lucene.index.DocValues} (ideal) or is it for the entire
|
||||||
|
* index?
|
||||||
|
* </ul>
|
||||||
|
* <p/>
|
||||||
|
* Note that a SpatialStrategy is not involved with the Lucene stored field
|
||||||
|
* values of shapes, which is immaterial to indexing & search.
|
||||||
* <p/>
|
* <p/>
|
||||||
* Thread-safe.
|
* Thread-safe.
|
||||||
*
|
*
|
||||||
|
|
|
@ -16,8 +16,49 @@
|
||||||
-->
|
-->
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>Apache Lucene Spatial Strategies</title>
|
<title>Apache Lucene Spatial Module</title>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
|
||||||
|
<h1>The Spatial Module for Apache Lucene</h1>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
The spatial module is new is Lucene 4, replacing the old contrib module
|
||||||
|
that came before it. The principle interface to the module is
|
||||||
|
a {@link org.apache.lucene.spatial.SpatialStrategy}
|
||||||
|
which encapsulates an approach to indexing and searching
|
||||||
|
based on shapes. Different Strategies have different features and
|
||||||
|
performance profiles, which are documented at each Strategy class level.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
For some sample code showing how to use the API, see SpatialExample.java in
|
||||||
|
the tests.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
The spatial module uses
|
||||||
|
<a href="https://github.com/spatial4j/spatial4j">Spatial4j</a>
|
||||||
|
heavily. Spatial4j is an ASL licensed library with these capabilities:
|
||||||
|
<ul>
|
||||||
|
<li>Provides shape implementations, namely point, rectangle,
|
||||||
|
and circle. Both geospatial contexts and plain 2D Euclidean/Cartesian contexts
|
||||||
|
are supported.
|
||||||
|
With an additional dependency, it adds polygon and other geometry shape
|
||||||
|
support via integration with
|
||||||
|
<a href="http://sourceforge.net/projects/jts-topo-suite/">JTS Topology Suite</a>.
|
||||||
|
This includes dateline wrap support.</li>
|
||||||
|
<li>Shape parsing and serialization, including
|
||||||
|
<a href="http://en.wikipedia.org/wiki/Well-known_text">Well-Known Text (WKT)</a>
|
||||||
|
(via JTS).</li>
|
||||||
|
<li>Distance and other spatial related math calculations.</li>
|
||||||
|
</ul>
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Historical note: The new spatial module was once known as
|
||||||
|
Lucene Spatial Playground (LSP) as an external project. In ~March 2012, LSP
|
||||||
|
split into this new module as part of Lucene and Spatial4j externally. A
|
||||||
|
large chunk of the LSP implementation originated as SOLR-2155 which uses
|
||||||
|
trie/prefix-tree algorithms with a geohash encoding.
|
||||||
|
</p>
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
|
@ -0,0 +1,180 @@
|
||||||
|
package org.apache.lucene.spatial;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import com.spatial4j.core.context.SpatialContext;
|
||||||
|
import com.spatial4j.core.context.simple.SimpleSpatialContext;
|
||||||
|
import com.spatial4j.core.shape.Shape;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.IntField;
|
||||||
|
import org.apache.lucene.document.StoredField;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.IndexableField;
|
||||||
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
|
import org.apache.lucene.search.Filter;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
|
import org.apache.lucene.search.Sort;
|
||||||
|
import org.apache.lucene.search.SortField;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
|
||||||
|
import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree;
|
||||||
|
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
|
||||||
|
import org.apache.lucene.spatial.query.SpatialArgs;
|
||||||
|
import org.apache.lucene.spatial.query.SpatialArgsParser;
|
||||||
|
import org.apache.lucene.spatial.query.SpatialOperation;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class serves as example code to show how to use the Lucene spatial
|
||||||
|
* module.
|
||||||
|
*/
|
||||||
|
public class SpatialExample extends LuceneTestCase {
|
||||||
|
|
||||||
|
public static void main(String[] args) throws IOException {
|
||||||
|
new SpatialExample().test();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test() throws IOException {
|
||||||
|
init();
|
||||||
|
indexPoints();
|
||||||
|
search();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The Spatial4j {@link SpatialContext} is a sort of global-ish singleton
|
||||||
|
* needed by Lucene spatial. It's a facade to the rest of Spatial4j, acting
|
||||||
|
* as a factory for {@link Shape}s and provides access to reading and writing
|
||||||
|
* them from Strings.
|
||||||
|
*/
|
||||||
|
private SpatialContext ctx;//"ctx" is the conventional variable name
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The Lucene spatial {@link SpatialStrategy} encapsulates an approach to
|
||||||
|
* indexing and searching shapes, and providing relevancy scores for them.
|
||||||
|
* It's a simple API to unify different approaches.
|
||||||
|
* <p />
|
||||||
|
* Note that these are initialized with a field name.
|
||||||
|
*/
|
||||||
|
private SpatialStrategy strategy;
|
||||||
|
|
||||||
|
private Directory directory;
|
||||||
|
|
||||||
|
protected void init() {
|
||||||
|
//Typical geospatial context with kilometer units.
|
||||||
|
// These can also be constructed from a factory: SpatialContextFactory
|
||||||
|
this.ctx = SimpleSpatialContext.GEO_KM;
|
||||||
|
|
||||||
|
int maxLevels = 10;//results in sub-meter precision for geohash
|
||||||
|
//TODO demo lookup by detail distance
|
||||||
|
// This can also be constructed from a factory: SpatialPrefixTreeFactory
|
||||||
|
SpatialPrefixTree grid = new GeohashPrefixTree(ctx, maxLevels);
|
||||||
|
|
||||||
|
this.strategy = new RecursivePrefixTreeStrategy(grid, "myGeoField");
|
||||||
|
|
||||||
|
this.directory = new RAMDirectory();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void indexPoints() throws IOException {
|
||||||
|
IndexWriterConfig iwConfig = new IndexWriterConfig(TEST_VERSION_CURRENT,null);
|
||||||
|
IndexWriter indexWriter = new IndexWriter(directory, iwConfig);
|
||||||
|
|
||||||
|
//Spatial4j is x-y order for arguments
|
||||||
|
indexWriter.addDocument(newSampleDocument(
|
||||||
|
2, ctx.makePoint(-80.93, 33.77)));
|
||||||
|
|
||||||
|
//When parsing a string to a shape, the presence of a comma means it's y-x
|
||||||
|
// order (lon, lat)
|
||||||
|
indexWriter.addDocument(newSampleDocument(
|
||||||
|
4, ctx.readShape("-50.7693246, 60.9289094")));
|
||||||
|
|
||||||
|
indexWriter.addDocument(newSampleDocument(
|
||||||
|
20, ctx.makePoint(0.1,0.1), ctx.makePoint(0, 0)));
|
||||||
|
|
||||||
|
indexWriter.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Document newSampleDocument(int id, Shape... shapes) {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new IntField("id", id, Field.Store.YES));
|
||||||
|
//Potentially more than one shape in this field is supported by some
|
||||||
|
// strategies; see the javadocs of the SpatialStrategy impl to see.
|
||||||
|
for (Shape shape : shapes) {
|
||||||
|
for (IndexableField f : strategy.createIndexableFields(shape)) {
|
||||||
|
doc.add(f);
|
||||||
|
}
|
||||||
|
//store it too; the format is up to you
|
||||||
|
doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape)));
|
||||||
|
}
|
||||||
|
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void search() throws IOException {
|
||||||
|
IndexReader indexReader = DirectoryReader.open(directory);
|
||||||
|
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
||||||
|
Sort idSort = new Sort(new SortField("id", SortField.Type.INT));
|
||||||
|
|
||||||
|
//--Filter by circle (<= distance from a point)
|
||||||
|
{
|
||||||
|
//Search with circle
|
||||||
|
//note: SpatialArgs can be parsed from a string
|
||||||
|
SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,
|
||||||
|
ctx.makeCircle(-80.0, 33.0, 200));//200km (since km == ctx.getDistanceUnits
|
||||||
|
Filter filter = strategy.makeFilter(args);
|
||||||
|
TopDocs docs = indexSearcher.search(new MatchAllDocsQuery(), filter, 10, idSort);
|
||||||
|
assertDocMatchedIds(indexSearcher, docs, 2);
|
||||||
|
}
|
||||||
|
//--Match all, order by distance
|
||||||
|
{
|
||||||
|
SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,//doesn't matter
|
||||||
|
ctx.makePoint(60, -50));
|
||||||
|
ValueSource valueSource = strategy.makeValueSource(args);//the distance
|
||||||
|
Sort reverseDistSort = new Sort(valueSource.getSortField(false)).rewrite(indexSearcher);//true=asc dist
|
||||||
|
TopDocs docs = indexSearcher.search(new MatchAllDocsQuery(), 10, reverseDistSort);
|
||||||
|
assertDocMatchedIds(indexSearcher, docs, 4, 20, 2);
|
||||||
|
}
|
||||||
|
//demo arg parsing
|
||||||
|
{
|
||||||
|
SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,
|
||||||
|
ctx.makeCircle(-80.0, 33.0, 200));
|
||||||
|
SpatialArgs args2 = new SpatialArgsParser().parse("Intersects(Circle(33,-80 d=200))", ctx);
|
||||||
|
assertEquals(args.toString(),args2.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
indexReader.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertDocMatchedIds(IndexSearcher indexSearcher, TopDocs docs, int... ids) throws IOException {
|
||||||
|
int[] gotIds = new int[docs.totalHits];
|
||||||
|
for (int i = 0; i < gotIds.length; i++) {
|
||||||
|
gotIds[i] = indexSearcher.doc(docs.scoreDocs[i].doc).getField("id").numericValue().intValue();
|
||||||
|
}
|
||||||
|
assertArrayEquals(ids,gotIds);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -18,16 +18,24 @@ package org.apache.lucene.codecs.asserting;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.FieldsConsumer;
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.PostingsConsumer;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
|
import org.apache.lucene.codecs.TermStats;
|
||||||
|
import org.apache.lucene.codecs.TermsConsumer;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
|
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
|
||||||
import org.apache.lucene.index.AssertingAtomicReader;
|
import org.apache.lucene.index.AssertingAtomicReader;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
import org.apache.lucene.index.FieldsEnum;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Just like {@link Lucene40PostingsFormat} but with additional asserts.
|
* Just like {@link Lucene40PostingsFormat} but with additional asserts.
|
||||||
|
@ -39,10 +47,9 @@ public class AssertingPostingsFormat extends PostingsFormat {
|
||||||
super("Asserting");
|
super("Asserting");
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: we could add some useful checks here?
|
|
||||||
@Override
|
@Override
|
||||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||||
return in.fieldsConsumer(state);
|
return new AssertingFieldsConsumer(in.fieldsConsumer(state));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -85,4 +92,164 @@ public class AssertingPostingsFormat extends PostingsFormat {
|
||||||
return in.getUniqueTermCount();
|
return in.getUniqueTermCount();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static class AssertingFieldsConsumer extends FieldsConsumer {
|
||||||
|
private final FieldsConsumer in;
|
||||||
|
|
||||||
|
AssertingFieldsConsumer(FieldsConsumer in) {
|
||||||
|
this.in = in;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsConsumer addField(FieldInfo field) throws IOException {
|
||||||
|
TermsConsumer consumer = in.addField(field);
|
||||||
|
assert consumer != null;
|
||||||
|
return new AssertingTermsConsumer(consumer, field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum TermsConsumerState { INITIAL, START, FINISHED };
|
||||||
|
static class AssertingTermsConsumer extends TermsConsumer {
|
||||||
|
private final TermsConsumer in;
|
||||||
|
private final FieldInfo fieldInfo;
|
||||||
|
private BytesRef lastTerm = null;
|
||||||
|
private TermsConsumerState state = TermsConsumerState.INITIAL;
|
||||||
|
private AssertingPostingsConsumer lastPostingsConsumer = null;
|
||||||
|
private long sumTotalTermFreq = 0;
|
||||||
|
private long sumDocFreq = 0;
|
||||||
|
private OpenBitSet visitedDocs = new OpenBitSet();
|
||||||
|
|
||||||
|
AssertingTermsConsumer(TermsConsumer in, FieldInfo fieldInfo) {
|
||||||
|
this.in = in;
|
||||||
|
this.fieldInfo = fieldInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PostingsConsumer startTerm(BytesRef text) throws IOException {
|
||||||
|
assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START && lastPostingsConsumer.docFreq == 0;
|
||||||
|
state = TermsConsumerState.START;
|
||||||
|
assert lastTerm == null || in.getComparator().compare(text, lastTerm) > 0;
|
||||||
|
lastTerm = BytesRef.deepCopyOf(text);
|
||||||
|
return lastPostingsConsumer = new AssertingPostingsConsumer(in.startTerm(text), fieldInfo, visitedDocs);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
|
||||||
|
assert state == TermsConsumerState.START;
|
||||||
|
state = TermsConsumerState.INITIAL;
|
||||||
|
assert text.equals(lastTerm);
|
||||||
|
assert stats.docFreq > 0; // otherwise, this method should not be called.
|
||||||
|
assert stats.docFreq == lastPostingsConsumer.docFreq;
|
||||||
|
sumDocFreq += stats.docFreq;
|
||||||
|
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
|
||||||
|
assert stats.totalTermFreq == -1;
|
||||||
|
} else {
|
||||||
|
assert stats.totalTermFreq == lastPostingsConsumer.totalTermFreq;
|
||||||
|
sumTotalTermFreq += stats.totalTermFreq;
|
||||||
|
}
|
||||||
|
in.finishTerm(text, stats);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
|
||||||
|
assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START && lastPostingsConsumer.docFreq == 0;
|
||||||
|
state = TermsConsumerState.FINISHED;
|
||||||
|
assert docCount >= 0;
|
||||||
|
assert docCount == visitedDocs.cardinality();
|
||||||
|
assert sumDocFreq >= docCount;
|
||||||
|
assert sumDocFreq == this.sumDocFreq;
|
||||||
|
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
|
||||||
|
assert sumTotalTermFreq == -1;
|
||||||
|
} else {
|
||||||
|
assert sumTotalTermFreq >= sumDocFreq;
|
||||||
|
assert sumTotalTermFreq == this.sumTotalTermFreq;
|
||||||
|
}
|
||||||
|
in.finish(sumTotalTermFreq, sumDocFreq, docCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparator<BytesRef> getComparator() throws IOException {
|
||||||
|
return in.getComparator();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum PostingsConsumerState { INITIAL, START };
|
||||||
|
static class AssertingPostingsConsumer extends PostingsConsumer {
|
||||||
|
private final PostingsConsumer in;
|
||||||
|
private final FieldInfo fieldInfo;
|
||||||
|
private final OpenBitSet visitedDocs;
|
||||||
|
private PostingsConsumerState state = PostingsConsumerState.INITIAL;
|
||||||
|
private int freq;
|
||||||
|
private int positionCount;
|
||||||
|
private int lastPosition = 0;
|
||||||
|
private int lastStartOffset = 0;
|
||||||
|
int docFreq = 0;
|
||||||
|
long totalTermFreq = 0;
|
||||||
|
|
||||||
|
AssertingPostingsConsumer(PostingsConsumer in, FieldInfo fieldInfo, OpenBitSet visitedDocs) {
|
||||||
|
this.in = in;
|
||||||
|
this.fieldInfo = fieldInfo;
|
||||||
|
this.visitedDocs = visitedDocs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startDoc(int docID, int freq) throws IOException {
|
||||||
|
assert state == PostingsConsumerState.INITIAL;
|
||||||
|
state = PostingsConsumerState.START;
|
||||||
|
assert docID >= 0;
|
||||||
|
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
|
||||||
|
assert freq == -1;
|
||||||
|
this.freq = 0; // we don't expect any positions here
|
||||||
|
} else {
|
||||||
|
assert freq > 0;
|
||||||
|
this.freq = freq;
|
||||||
|
totalTermFreq += freq;
|
||||||
|
}
|
||||||
|
this.positionCount = 0;
|
||||||
|
this.lastPosition = 0;
|
||||||
|
this.lastStartOffset = 0;
|
||||||
|
docFreq++;
|
||||||
|
visitedDocs.set(docID);
|
||||||
|
in.startDoc(docID, freq);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
|
||||||
|
assert state == PostingsConsumerState.START;
|
||||||
|
assert positionCount < freq;
|
||||||
|
positionCount++;
|
||||||
|
assert position >= lastPosition || position == -1; /* we still allow -1 from old 3.x indexes */
|
||||||
|
lastPosition = position;
|
||||||
|
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
|
||||||
|
assert startOffset >= 0;
|
||||||
|
assert startOffset >= lastStartOffset;
|
||||||
|
lastStartOffset = startOffset;
|
||||||
|
assert endOffset >= startOffset;
|
||||||
|
} else {
|
||||||
|
assert startOffset == -1;
|
||||||
|
assert endOffset == -1;
|
||||||
|
}
|
||||||
|
if (payload != null) {
|
||||||
|
assert fieldInfo.hasPayloads();
|
||||||
|
}
|
||||||
|
in.addPosition(position, payload, startOffset, endOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finishDoc() throws IOException {
|
||||||
|
assert state == PostingsConsumerState.START;
|
||||||
|
state = PostingsConsumerState.INITIAL;
|
||||||
|
if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||||
|
assert positionCount == 0; // we should not have fed any positions!
|
||||||
|
} else {
|
||||||
|
assert positionCount == freq;
|
||||||
|
}
|
||||||
|
in.finishDoc();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -131,6 +131,8 @@ Bug Fixes
|
||||||
* SOLR-3663: There are a couple of bugs in the sync process when a leader goes down and a
|
* SOLR-3663: There are a couple of bugs in the sync process when a leader goes down and a
|
||||||
new leader is elected. (Mark Miller)
|
new leader is elected. (Mark Miller)
|
||||||
|
|
||||||
|
* SOLR-3623: Fixed inconsistent treatment of third-party dependencies for
|
||||||
|
solr contribs analysis-extras & uima (hossman)
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
|
@ -70,21 +70,32 @@
|
||||||
-->
|
-->
|
||||||
<property name="solr.spec.version" value="5.0.0.${dateversion}" />
|
<property name="solr.spec.version" value="5.0.0.${dateversion}" />
|
||||||
|
|
||||||
|
<path id="solr.lucene.libs">
|
||||||
|
<!-- List of jars that will be used as the foundation for both
|
||||||
|
the base classpath, as well as copied into the lucene-libs dir
|
||||||
|
in the release.
|
||||||
|
-->
|
||||||
|
<!-- NOTE: lucene-core is explicitly not included because of the
|
||||||
|
base.classpath (compilation & tests are done directly against
|
||||||
|
the class files w/o needing to build the jar)
|
||||||
|
-->
|
||||||
|
<pathelement location="${analyzers-common.jar}"/>
|
||||||
|
<pathelement location="${analyzers-kuromoji.jar}"/>
|
||||||
|
<pathelement location="${analyzers-phonetic.jar}"/>
|
||||||
|
<pathelement location="${highlighter.jar}"/>
|
||||||
|
<pathelement location="${memory.jar}"/>
|
||||||
|
<pathelement location="${misc.jar}"/>
|
||||||
|
<pathelement location="${spatial.jar}"/>
|
||||||
|
<pathelement location="${suggest.jar}"/>
|
||||||
|
<pathelement location="${grouping.jar}"/>
|
||||||
|
<pathelement location="${queries.jar}"/>
|
||||||
|
<pathelement location="${queryparser.jar}"/>
|
||||||
|
</path>
|
||||||
|
|
||||||
<path id="solr.base.classpath">
|
<path id="solr.base.classpath">
|
||||||
<pathelement path="${analyzers-common.jar}"/>
|
|
||||||
<pathelement path="${analyzers-kuromoji.jar}"/>
|
|
||||||
<pathelement path="${analyzers-phonetic.jar}"/>
|
|
||||||
<pathelement path="${analyzers-uima.jar}"/>
|
|
||||||
<pathelement path="${highlighter.jar}"/>
|
|
||||||
<pathelement path="${memory.jar}"/>
|
|
||||||
<pathelement path="${misc.jar}"/>
|
|
||||||
<pathelement path="${spatial.jar}"/>
|
|
||||||
<pathelement path="${suggest.jar}"/>
|
|
||||||
<pathelement path="${grouping.jar}"/>
|
|
||||||
<pathelement path="${queries.jar}"/>
|
|
||||||
<pathelement path="${queryparser.jar}"/>
|
|
||||||
<pathelement location="${common-solr.dir}/build/solr-solrj/classes/java"/>
|
<pathelement location="${common-solr.dir}/build/solr-solrj/classes/java"/>
|
||||||
<pathelement location="${common-solr.dir}/build/solr-core/classes/java"/>
|
<pathelement location="${common-solr.dir}/build/solr-core/classes/java"/>
|
||||||
|
<path refid="solr.lucene.libs" />
|
||||||
<path refid="additional.dependencies"/>
|
<path refid="additional.dependencies"/>
|
||||||
<path refid="base.classpath"/>
|
<path refid="base.classpath"/>
|
||||||
</path>
|
</path>
|
||||||
|
@ -125,7 +136,7 @@
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="prep-lucene-jars"
|
<target name="prep-lucene-jars"
|
||||||
depends="jar-lucene-core, jar-analyzers-phonetic, jar-analyzers-kuromoji, jar-analyzers-morfologik, jar-suggest, jar-highlighter, jar-memory,
|
depends="jar-lucene-core, jar-analyzers-phonetic, jar-analyzers-kuromoji, jar-suggest, jar-highlighter, jar-memory,
|
||||||
jar-misc, jar-spatial, jar-grouping, jar-queries, jar-queryparser">
|
jar-misc, jar-spatial, jar-grouping, jar-queries, jar-queryparser">
|
||||||
<property name="solr.deps.compiled" value="true"/>
|
<property name="solr.deps.compiled" value="true"/>
|
||||||
</target>
|
</target>
|
||||||
|
@ -137,19 +148,11 @@
|
||||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||||
</ant>
|
</ant>
|
||||||
<copy todir="${lucene-libs}" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
|
<copy todir="${lucene-libs}" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
|
||||||
|
<path refid="solr.lucene.libs" />
|
||||||
|
<!-- NOTE: lucene-core is not already included in "solr.lucene.libs"
|
||||||
|
because of it's use in classpaths.
|
||||||
|
-->
|
||||||
<fileset file="${lucene-core.jar}" />
|
<fileset file="${lucene-core.jar}" />
|
||||||
<fileset file="${analyzers-common.jar}" />
|
|
||||||
<fileset file="${analyzers-kuromoji.jar}" />
|
|
||||||
<fileset file="${analyzers-phonetic.jar}" />
|
|
||||||
<fileset file="${suggest.jar}" />
|
|
||||||
<fileset file="${grouping.jar}" />
|
|
||||||
<fileset file="${queries.jar}" />
|
|
||||||
<fileset file="${queryparser.jar}" />
|
|
||||||
<fileset file="${highlighter.jar}" />
|
|
||||||
<fileset file="${memory.jar}" />
|
|
||||||
<fileset file="${misc.jar}" />
|
|
||||||
<fileset file="${spatial.jar}" />
|
|
||||||
<fileset refid="analyzers-morfologik.fileset" />
|
|
||||||
</copy>
|
</copy>
|
||||||
</sequential>
|
</sequential>
|
||||||
</target>
|
</target>
|
||||||
|
|
|
@ -9,8 +9,11 @@ Relies upon the following lucene components (in lucene-libs/):
|
||||||
* lucene-analyzers-icu-X.Y.jar
|
* lucene-analyzers-icu-X.Y.jar
|
||||||
* lucene-analyzers-smartcn-X.Y.jar
|
* lucene-analyzers-smartcn-X.Y.jar
|
||||||
* lucene-analyzers-stempel-X.Y.jar
|
* lucene-analyzers-stempel-X.Y.jar
|
||||||
|
* lucene-analyzers-morfologik-X.Y.jar
|
||||||
|
* lucene-analyzers-smartcn-X.Y.jar
|
||||||
|
|
||||||
And the ICU library (in lib/):
|
And the following third-party library (in lib/):
|
||||||
|
|
||||||
* icu4j-X.Y.jar
|
* icu4j-X.Y.jar
|
||||||
|
* morfologik-*.jar
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,16 @@
|
||||||
|
|
||||||
<import file="../contrib-build.xml"/>
|
<import file="../contrib-build.xml"/>
|
||||||
|
|
||||||
|
<path id="analysis.extras.lucene.libs">
|
||||||
|
<pathelement location="${analyzers-icu.jar}"/>
|
||||||
|
<pathelement location="${analyzers-smartcn.jar}"/>
|
||||||
|
<pathelement location="${analyzers-stempel.jar}"/>
|
||||||
|
<pathelement location="${analyzers-morfologik.jar}"/>
|
||||||
|
</path>
|
||||||
|
|
||||||
<path id="classpath">
|
<path id="classpath">
|
||||||
<fileset dir="lib" excludes="${common.classpath.excludes}"/>
|
<fileset dir="lib" excludes="${common.classpath.excludes}"/>
|
||||||
<pathelement path="${analyzers-icu.jar}"/>
|
<path refid="analysis.extras.lucene.libs" />
|
||||||
<pathelement path="${analyzers-smartcn.jar}"/>
|
|
||||||
<pathelement path="${analyzers-stempel.jar}"/>
|
|
||||||
<fileset refid="analyzers-morfologik.fileset" />
|
|
||||||
<path refid="solr.base.classpath"/>
|
<path refid="solr.base.classpath"/>
|
||||||
</path>
|
</path>
|
||||||
|
|
||||||
|
@ -38,10 +42,7 @@
|
||||||
depends="jar-analyzers-icu, jar-analyzers-smartcn, jar-analyzers-stempel, jar-analyzers-morfologik">
|
depends="jar-analyzers-icu, jar-analyzers-smartcn, jar-analyzers-stempel, jar-analyzers-morfologik">
|
||||||
<mkdir dir="${build.dir}/lucene-libs"/>
|
<mkdir dir="${build.dir}/lucene-libs"/>
|
||||||
<copy todir="${build.dir}/lucene-libs" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
|
<copy todir="${build.dir}/lucene-libs" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
|
||||||
<fileset file="${analyzers-icu.jar}"/>
|
<path refid="analysis.extras.lucene.libs" />
|
||||||
<fileset file="${analyzers-smartcn.jar}"/>
|
|
||||||
<fileset file="${analyzers-stempel.jar}"/>
|
|
||||||
<fileset refid="analyzers-morfologik.fileset" />
|
|
||||||
</copy>
|
</copy>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,9 @@
|
||||||
<info organisation="org.apache.solr" module="analysis-extras"/>
|
<info organisation="org.apache.solr" module="analysis-extras"/>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency org="com.ibm.icu" name="icu4j" rev="4.8.1.1" transitive="false"/>
|
<dependency org="com.ibm.icu" name="icu4j" rev="4.8.1.1" transitive="false"/>
|
||||||
|
<dependency org="org.carrot2" name="morfologik-polish" rev="1.5.3" transitive="false"/>
|
||||||
|
<dependency org="org.carrot2" name="morfologik-fsa" rev="1.5.3" transitive="false"/>
|
||||||
|
<dependency org="org.carrot2" name="morfologik-stemming" rev="1.5.3" transitive="false"/>
|
||||||
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
|
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</ivy-module>
|
</ivy-module>
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
d1f729cd3019e6d86485226202f84458141a5688
|
|
@ -0,0 +1,29 @@
|
||||||
|
|
||||||
|
Copyright (c) 2006 Dawid Weiss
|
||||||
|
Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer in the documentation
|
||||||
|
and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Morfologik nor the names of its contributors
|
||||||
|
may be used to endorse or promote products derived from this software
|
||||||
|
without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,2 @@
|
||||||
|
This product includes BSD-licensed software developed by Dawid Weiss and Marcin Miłkowski
|
||||||
|
(http://morfologik.blogspot.com/).
|
|
@ -0,0 +1 @@
|
||||||
|
8217b6f7ad018ceda0e824b2e60340000da4397a
|
|
@ -0,0 +1,62 @@
|
||||||
|
BSD-licensed dictionary of Polish (Morfologik)
|
||||||
|
|
||||||
|
Copyright (c) 2012, Marcin Miłkowski
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
|
||||||
|
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||||
|
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||||
|
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
||||||
|
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
--
|
||||||
|
|
||||||
|
BSD-licensed dictionary of Polish (SGJP)
|
||||||
|
http://sgjp.pl/morfeusz/
|
||||||
|
|
||||||
|
Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński,
|
||||||
|
Marcin Woliński, Robert Wołosz
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
|
||||||
|
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||||
|
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||||
|
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
||||||
|
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,6 @@
|
||||||
|
|
||||||
|
This product includes data from BSD-licensed dictionary of Polish (Morfologik)
|
||||||
|
(http://morfologik.blogspot.com/)
|
||||||
|
|
||||||
|
This product includes data from BSD-licensed dictionary of Polish (SGJP)
|
||||||
|
(http://sgjp.pl/morfeusz/)
|
|
@ -0,0 +1 @@
|
||||||
|
c4ead57b78fa71b00553ff21da6fb5a326e914e8
|
|
@ -0,0 +1,29 @@
|
||||||
|
|
||||||
|
Copyright (c) 2006 Dawid Weiss
|
||||||
|
Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer in the documentation
|
||||||
|
and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Morfologik nor the names of its contributors
|
||||||
|
may be used to endorse or promote products derived from this software
|
||||||
|
without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,2 @@
|
||||||
|
This product includes BSD-licensed software developed by Dawid Weiss and Marcin Miłkowski
|
||||||
|
(http://morfologik.blogspot.com/).
|
|
@ -6,6 +6,7 @@ To start using Solr UIMA Metadata Extraction Library you should go through the f
|
||||||
or set <lib/> tags in solrconfig.xml appropriately to point those jar files.
|
or set <lib/> tags in solrconfig.xml appropriately to point those jar files.
|
||||||
|
|
||||||
<lib dir="../../contrib/uima/lib" />
|
<lib dir="../../contrib/uima/lib" />
|
||||||
|
<lib dir="../../contrib/uima/lucene-libs" />
|
||||||
<lib dir="../../dist/" regex="apache-solr-uima-\d.*\.jar" />
|
<lib dir="../../dist/" regex="apache-solr-uima-\d.*\.jar" />
|
||||||
|
|
||||||
2. modify your schema.xml adding the fields you want to be hold metadata specifying proper values for type, indexed, stored and multiValued options:
|
2. modify your schema.xml adding the fields you want to be hold metadata specifying proper values for type, indexed, stored and multiValued options:
|
||||||
|
|
|
@ -191,7 +191,7 @@ public class LeaderElectionIntegrationTest extends SolrTestCaseJ4 {
|
||||||
int newLeaderPort = getLeaderPort(leader);
|
int newLeaderPort = getLeaderPort(leader);
|
||||||
int retry = 0;
|
int retry = 0;
|
||||||
while (leaderPort == newLeaderPort) {
|
while (leaderPort == newLeaderPort) {
|
||||||
if (retry++ == 20) {
|
if (retry++ == 60) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
Thread.sleep(1000);
|
Thread.sleep(1000);
|
||||||
|
|
Loading…
Reference in New Issue