mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-17 10:25:15 +00:00
Upgrade to Lucene 3.2, closes #997.
This commit is contained in:
parent
c872be75ae
commit
6788c6c375
20
.idea/libraries/lucene.xml
generated
20
.idea/libraries/lucene.xml
generated
@ -1,19 +1,19 @@
|
||||
<component name="libraryTable">
|
||||
<library name="lucene">
|
||||
<CLASSES>
|
||||
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-highlighter/jars/lucene-highlighter-3.1.0.jar!/" />
|
||||
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-memory/jars/lucene-memory-3.1.0.jar!/" />
|
||||
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-core/jars/lucene-core-3.1.0.jar!/" />
|
||||
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-analyzers/jars/lucene-analyzers-3.1.0.jar!/" />
|
||||
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-queries/jars/lucene-queries-3.1.0.jar!/" />
|
||||
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-highlighter/jars/lucene-highlighter-3.2.0.jar!/" />
|
||||
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-memory/jars/lucene-memory-3.2.0.jar!/" />
|
||||
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-core/jars/lucene-core-3.2.0.jar!/" />
|
||||
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-analyzers/jars/lucene-analyzers-3.2.0.jar!/" />
|
||||
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-queries/jars/lucene-queries-3.2.0.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES>
|
||||
<root url="file://$USER_HOME$/opt/lucene/3.1.0.src/contrib/memory/src/java" />
|
||||
<root url="file://$USER_HOME$/opt/lucene/3.1.0.src/src/java" />
|
||||
<root url="file://$USER_HOME$/opt/lucene/3.1.0.src/contrib/highlighter/src/java" />
|
||||
<root url="file://$USER_HOME$/opt/lucene/3.1.0.src/contrib/queries/src/java" />
|
||||
<root url="file://$USER_HOME$/opt/lucene/3.1.0.src/contrib/analyzers/common/src/java" />
|
||||
<root url="file://$USER_HOME$/opt/lucene/3.2.0.src/contrib/memory/src/java" />
|
||||
<root url="file://$USER_HOME$/opt/lucene/3.2.0.src/src/java" />
|
||||
<root url="file://$USER_HOME$/opt/lucene/3.2.0.src/contrib/highlighter/src/java" />
|
||||
<root url="file://$USER_HOME$/opt/lucene/3.2.0.src/contrib/queries/src/java" />
|
||||
<root url="file://$USER_HOME$/opt/lucene/3.2.0.src/contrib/analyzers/common/src/java" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
@ -37,11 +37,11 @@ dependencies {
|
||||
|
||||
compile('net.java.dev.jna:jna:3.2.7') { transitive = false }
|
||||
|
||||
compile('org.apache.lucene:lucene-core:3.1.0') { transitive = false }
|
||||
compile('org.apache.lucene:lucene-analyzers:3.1.0') { transitive = false }
|
||||
compile('org.apache.lucene:lucene-queries:3.1.0') { transitive = false }
|
||||
compile('org.apache.lucene:lucene-memory:3.1.0') { transitive = false }
|
||||
compile('org.apache.lucene:lucene-highlighter:3.1.0') { transitive = false }
|
||||
compile('org.apache.lucene:lucene-core:3.2.0') { transitive = false }
|
||||
compile('org.apache.lucene:lucene-analyzers:3.2.0') { transitive = false }
|
||||
compile('org.apache.lucene:lucene-queries:3.2.0') { transitive = false }
|
||||
compile('org.apache.lucene:lucene-memory:3.2.0') { transitive = false }
|
||||
compile('org.apache.lucene:lucene-highlighter:3.2.0') { transitive = false }
|
||||
}
|
||||
|
||||
configurations {
|
||||
|
@ -19,7 +19,6 @@
|
||||
|
||||
package org.elasticsearch.common.bloom;
|
||||
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.elasticsearch.common.UUID;
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.common.logging.ESLoggerFactory;
|
||||
@ -66,9 +65,8 @@ public class BloomFilterFactory {
|
||||
return new ObsBloomFilter(spec.K, bucketsFor(numElements, spec.bucketsPerElement));
|
||||
}
|
||||
|
||||
private static OpenBitSet bucketsFor(long numElements, int bucketsPer) {
|
||||
long numBits = numElements * bucketsPer + EXCESS; //TODO overflow?
|
||||
return new OpenBitSet((long) Math.min(Long.MAX_VALUE, numBits));
|
||||
private static long bucketsFor(long numElements, int bucketsPer) {
|
||||
return numElements * bucketsPer + EXCESS;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws UnsupportedEncodingException {
|
||||
|
@ -29,10 +29,12 @@ public class ObsBloomFilter implements BloomFilter {
|
||||
private final int hashCount;
|
||||
|
||||
private final OpenBitSet bitset;
|
||||
private final long size;
|
||||
|
||||
ObsBloomFilter(int hashCount, OpenBitSet bs) {
|
||||
ObsBloomFilter(int hashCount, long size) {
|
||||
this.hashCount = hashCount;
|
||||
this.bitset = bs;
|
||||
this.bitset = new OpenBitSet(size);
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
long emptyBuckets() {
|
||||
@ -46,7 +48,7 @@ public class ObsBloomFilter implements BloomFilter {
|
||||
}
|
||||
|
||||
private long buckets() {
|
||||
return bitset.size();
|
||||
return size;
|
||||
}
|
||||
|
||||
private long[] getHashBuckets(ByteBuffer key) {
|
||||
|
@ -36,6 +36,7 @@ public class PathHierarchyTokenizerFactory extends AbstractTokenizerFactory {
|
||||
|
||||
private final char delimiter;
|
||||
private final char replacement;
|
||||
private final int skip;
|
||||
|
||||
@Inject public PathHierarchyTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
@ -57,9 +58,10 @@ public class PathHierarchyTokenizerFactory extends AbstractTokenizerFactory {
|
||||
} else {
|
||||
this.replacement = replacement.charAt(0);
|
||||
}
|
||||
this.skip = settings.getAsInt("skip", PathHierarchyTokenizer.DEFAULT_SKIP);
|
||||
}
|
||||
|
||||
@Override public Tokenizer create(Reader reader) {
|
||||
return new PathHierarchyTokenizer(reader, bufferSize, delimiter, replacement);
|
||||
return new PathHierarchyTokenizer(reader, bufferSize, delimiter, replacement, skip);
|
||||
}
|
||||
}
|
||||
|
@ -19,8 +19,17 @@
|
||||
|
||||
package org.elasticsearch.deps.lucene;
|
||||
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.document.FieldSelectorResult;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
@ -254,87 +263,6 @@ public class SimpleLuceneTests {
|
||||
termDocs.next();
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify doc freqs update with refresh of readers.
|
||||
*/
|
||||
@Test public void testTermEnumDocFreq() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
IndexWriter indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
|
||||
IndexReader reader = indexWriter.getReader();
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("id", "1", Field.Store.NO, Field.Index.ANALYZED));
|
||||
doc.add(new Field("value", "aaa", Field.Store.NO, Field.Index.ANALYZED));
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
reader = refreshReader(reader);
|
||||
|
||||
TermEnum termEnum = reader.terms(new Term("value", ""));
|
||||
assertThat(termEnum.term().text(), equalTo("aaa"));
|
||||
assertThat(termEnum.docFreq(), equalTo(1));
|
||||
termEnum.close();
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new Field("id", "2", Field.Store.NO, Field.Index.ANALYZED));
|
||||
doc.add(new Field("value", "bbb bbb", Field.Store.NO, Field.Index.ANALYZED));
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
reader = refreshReader(reader);
|
||||
|
||||
termEnum = reader.terms(new Term("value", ""));
|
||||
assertThat(termEnum.term().text(), equalTo("aaa"));
|
||||
assertThat(termEnum.docFreq(), equalTo(1));
|
||||
termEnum.next();
|
||||
assertThat(termEnum.term().text(), equalTo("bbb"));
|
||||
assertThat(termEnum.docFreq(), equalTo(1));
|
||||
termEnum.close();
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new Field("id", "3", Field.Store.NO, Field.Index.ANALYZED));
|
||||
doc.add(new Field("value", "bbb", Field.Store.NO, Field.Index.ANALYZED));
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
reader = refreshReader(reader);
|
||||
|
||||
termEnum = reader.terms(new Term("value", ""));
|
||||
assertThat(termEnum.term().text(), equalTo("aaa"));
|
||||
assertThat(termEnum.docFreq(), equalTo(1));
|
||||
termEnum.next();
|
||||
assertThat(termEnum.term().text(), equalTo("bbb"));
|
||||
assertThat(termEnum.docFreq(), equalTo(2));
|
||||
termEnum.close();
|
||||
|
||||
indexWriter.deleteDocuments(new Term("id", "3"));
|
||||
|
||||
reader = refreshReader(reader);
|
||||
|
||||
// won't see the changes until optimize
|
||||
termEnum = reader.terms(new Term("value", ""));
|
||||
assertThat(termEnum.term().text(), equalTo("aaa"));
|
||||
assertThat(termEnum.docFreq(), equalTo(1));
|
||||
termEnum.next();
|
||||
assertThat(termEnum.term().text(), equalTo("bbb"));
|
||||
assertThat(termEnum.docFreq(), equalTo(2));
|
||||
termEnum.close();
|
||||
|
||||
indexWriter.expungeDeletes();
|
||||
|
||||
reader = refreshReader(reader);
|
||||
|
||||
termEnum = reader.terms(new Term("value", ""));
|
||||
assertThat(termEnum.term().text(), equalTo("aaa"));
|
||||
assertThat(termEnum.docFreq(), equalTo(1));
|
||||
termEnum.next();
|
||||
assertThat(termEnum.term().text(), equalTo("bbb"));
|
||||
assertThat(termEnum.docFreq(), equalTo(1));
|
||||
termEnum.close();
|
||||
|
||||
|
||||
reader.close();
|
||||
indexWriter.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* A test just to verify that term freqs are not stored for numeric fields. <tt>int1</tt> is not storing termFreq
|
||||
* and <tt>int2</tt> does.
|
||||
|
Loading…
x
Reference in New Issue
Block a user