Upgrade to Lucene 3.2, closes #997.

This commit is contained in:
kimchy 2011-06-05 01:57:10 +03:00
parent c872be75ae
commit 6788c6c375
6 changed files with 36 additions and 106 deletions

View File

@ -1,19 +1,19 @@
<component name="libraryTable">
<library name="lucene">
<CLASSES>
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-highlighter/jars/lucene-highlighter-3.1.0.jar!/" />
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-memory/jars/lucene-memory-3.1.0.jar!/" />
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-core/jars/lucene-core-3.1.0.jar!/" />
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-analyzers/jars/lucene-analyzers-3.1.0.jar!/" />
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-queries/jars/lucene-queries-3.1.0.jar!/" />
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-highlighter/jars/lucene-highlighter-3.2.0.jar!/" />
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-memory/jars/lucene-memory-3.2.0.jar!/" />
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-core/jars/lucene-core-3.2.0.jar!/" />
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-analyzers/jars/lucene-analyzers-3.2.0.jar!/" />
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-queries/jars/lucene-queries-3.2.0.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES>
<root url="file://$USER_HOME$/opt/lucene/3.1.0.src/contrib/memory/src/java" />
<root url="file://$USER_HOME$/opt/lucene/3.1.0.src/src/java" />
<root url="file://$USER_HOME$/opt/lucene/3.1.0.src/contrib/highlighter/src/java" />
<root url="file://$USER_HOME$/opt/lucene/3.1.0.src/contrib/queries/src/java" />
<root url="file://$USER_HOME$/opt/lucene/3.1.0.src/contrib/analyzers/common/src/java" />
<root url="file://$USER_HOME$/opt/lucene/3.2.0.src/contrib/memory/src/java" />
<root url="file://$USER_HOME$/opt/lucene/3.2.0.src/src/java" />
<root url="file://$USER_HOME$/opt/lucene/3.2.0.src/contrib/highlighter/src/java" />
<root url="file://$USER_HOME$/opt/lucene/3.2.0.src/contrib/queries/src/java" />
<root url="file://$USER_HOME$/opt/lucene/3.2.0.src/contrib/analyzers/common/src/java" />
</SOURCES>
</library>
</component>

View File

@ -37,11 +37,11 @@ dependencies {
compile('net.java.dev.jna:jna:3.2.7') { transitive = false }
compile('org.apache.lucene:lucene-core:3.1.0') { transitive = false }
compile('org.apache.lucene:lucene-analyzers:3.1.0') { transitive = false }
compile('org.apache.lucene:lucene-queries:3.1.0') { transitive = false }
compile('org.apache.lucene:lucene-memory:3.1.0') { transitive = false }
compile('org.apache.lucene:lucene-highlighter:3.1.0') { transitive = false }
compile('org.apache.lucene:lucene-core:3.2.0') { transitive = false }
compile('org.apache.lucene:lucene-analyzers:3.2.0') { transitive = false }
compile('org.apache.lucene:lucene-queries:3.2.0') { transitive = false }
compile('org.apache.lucene:lucene-memory:3.2.0') { transitive = false }
compile('org.apache.lucene:lucene-highlighter:3.2.0') { transitive = false }
}
configurations {

View File

@ -19,7 +19,6 @@
package org.elasticsearch.common.bloom;
import org.apache.lucene.util.OpenBitSet;
import org.elasticsearch.common.UUID;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.ESLoggerFactory;
@ -66,9 +65,8 @@ public class BloomFilterFactory {
return new ObsBloomFilter(spec.K, bucketsFor(numElements, spec.bucketsPerElement));
}
private static OpenBitSet bucketsFor(long numElements, int bucketsPer) {
long numBits = numElements * bucketsPer + EXCESS; //TODO overflow?
return new OpenBitSet((long) Math.min(Long.MAX_VALUE, numBits));
private static long bucketsFor(long numElements, int bucketsPer) {
return numElements * bucketsPer + EXCESS;
}
public static void main(String[] args) throws UnsupportedEncodingException {

View File

@ -29,10 +29,12 @@ public class ObsBloomFilter implements BloomFilter {
private final int hashCount;
private final OpenBitSet bitset;
private final long size;
ObsBloomFilter(int hashCount, OpenBitSet bs) {
ObsBloomFilter(int hashCount, long size) {
this.hashCount = hashCount;
this.bitset = bs;
this.bitset = new OpenBitSet(size);
this.size = size;
}
long emptyBuckets() {
@ -46,7 +48,7 @@ public class ObsBloomFilter implements BloomFilter {
}
private long buckets() {
return bitset.size();
return size;
}
private long[] getHashBuckets(ByteBuffer key) {

View File

@ -36,6 +36,7 @@ public class PathHierarchyTokenizerFactory extends AbstractTokenizerFactory {
private final char delimiter;
private final char replacement;
private final int skip;
@Inject public PathHierarchyTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
@ -57,9 +58,10 @@ public class PathHierarchyTokenizerFactory extends AbstractTokenizerFactory {
} else {
this.replacement = replacement.charAt(0);
}
this.skip = settings.getAsInt("skip", PathHierarchyTokenizer.DEFAULT_SKIP);
}
@Override public Tokenizer create(Reader reader) {
return new PathHierarchyTokenizer(reader, bufferSize, delimiter, replacement);
return new PathHierarchyTokenizer(reader, bufferSize, delimiter, replacement, skip);
}
}

View File

@ -19,8 +19,17 @@
package org.elasticsearch.deps.lucene;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
@ -254,87 +263,6 @@ public class SimpleLuceneTests {
termDocs.next();
}
/**
* Verify doc freqs update with refresh of readers.
*/
@Test public void testTermEnumDocFreq() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED);
IndexReader reader = indexWriter.getReader();
Document doc = new Document();
doc.add(new Field("id", "1", Field.Store.NO, Field.Index.ANALYZED));
doc.add(new Field("value", "aaa", Field.Store.NO, Field.Index.ANALYZED));
indexWriter.addDocument(doc);
reader = refreshReader(reader);
TermEnum termEnum = reader.terms(new Term("value", ""));
assertThat(termEnum.term().text(), equalTo("aaa"));
assertThat(termEnum.docFreq(), equalTo(1));
termEnum.close();
doc = new Document();
doc.add(new Field("id", "2", Field.Store.NO, Field.Index.ANALYZED));
doc.add(new Field("value", "bbb bbb", Field.Store.NO, Field.Index.ANALYZED));
indexWriter.addDocument(doc);
reader = refreshReader(reader);
termEnum = reader.terms(new Term("value", ""));
assertThat(termEnum.term().text(), equalTo("aaa"));
assertThat(termEnum.docFreq(), equalTo(1));
termEnum.next();
assertThat(termEnum.term().text(), equalTo("bbb"));
assertThat(termEnum.docFreq(), equalTo(1));
termEnum.close();
doc = new Document();
doc.add(new Field("id", "3", Field.Store.NO, Field.Index.ANALYZED));
doc.add(new Field("value", "bbb", Field.Store.NO, Field.Index.ANALYZED));
indexWriter.addDocument(doc);
reader = refreshReader(reader);
termEnum = reader.terms(new Term("value", ""));
assertThat(termEnum.term().text(), equalTo("aaa"));
assertThat(termEnum.docFreq(), equalTo(1));
termEnum.next();
assertThat(termEnum.term().text(), equalTo("bbb"));
assertThat(termEnum.docFreq(), equalTo(2));
termEnum.close();
indexWriter.deleteDocuments(new Term("id", "3"));
reader = refreshReader(reader);
// won't see the changes until optimize
termEnum = reader.terms(new Term("value", ""));
assertThat(termEnum.term().text(), equalTo("aaa"));
assertThat(termEnum.docFreq(), equalTo(1));
termEnum.next();
assertThat(termEnum.term().text(), equalTo("bbb"));
assertThat(termEnum.docFreq(), equalTo(2));
termEnum.close();
indexWriter.expungeDeletes();
reader = refreshReader(reader);
termEnum = reader.terms(new Term("value", ""));
assertThat(termEnum.term().text(), equalTo("aaa"));
assertThat(termEnum.docFreq(), equalTo(1));
termEnum.next();
assertThat(termEnum.term().text(), equalTo("bbb"));
assertThat(termEnum.docFreq(), equalTo(1));
termEnum.close();
reader.close();
indexWriter.close();
}
/**
* A test just to verify that term freqs are not stored for numeric fields. <tt>int1</tt> is not storing termFreq
* and <tt>int2</tt> does.