mirror of https://github.com/apache/lucene.git
add RawTFSimilarity class (#13749)
This commit is contained in:
parent
a4c79c8d30
commit
a817426511
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search.similarities;
|
||||
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
|
||||
/** Similarity that returns the raw TF as score. */
|
||||
public class RawTFSimilarity extends Similarity {
|
||||
|
||||
/** Default constructor: parameter-free */
|
||||
public RawTFSimilarity() {
|
||||
super();
|
||||
}
|
||||
|
||||
/** Primary constructor. */
|
||||
public RawTFSimilarity(boolean discountOverlaps) {
|
||||
super(discountOverlaps);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimScorer scorer(
|
||||
float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
return new SimScorer() {
|
||||
@Override
|
||||
public float score(float freq, long norm) {
|
||||
return boost * freq;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -29,14 +29,13 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.RawTFSimilarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.tests.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.tests.index.RandomIndexWriter;
|
||||
|
@ -75,7 +74,7 @@ public class TestBooleanQueryVisitSubscorers extends LuceneTestCase {
|
|||
searcher = newSearcher(reader, true, false);
|
||||
searcher.setSimilarity(new ClassicSimilarity());
|
||||
scorerSearcher = new ScorerIndexSearcher(reader);
|
||||
scorerSearcher.setSimilarity(new CountingSimilarity());
|
||||
scorerSearcher.setSimilarity(new RawTFSimilarity());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -345,24 +344,4 @@ public class TestBooleanQueryVisitSubscorers extends LuceneTestCase {
|
|||
return builder;
|
||||
}
|
||||
}
|
||||
|
||||
// Similarity that just returns the frequency as the score
|
||||
private static class CountingSimilarity extends Similarity {
|
||||
|
||||
@Override
|
||||
public long computeNorm(FieldInvertState state) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimScorer scorer(
|
||||
float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
return new SimScorer() {
|
||||
@Override
|
||||
public float score(float freq, long norm) {
|
||||
return freq;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,12 +29,11 @@ import org.apache.lucene.document.Field.Store;
|
|||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.RawTFSimilarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.tests.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.tests.index.RandomIndexWriter;
|
||||
|
@ -67,7 +66,7 @@ public class TestConjunctions extends LuceneTestCase {
|
|||
reader = writer.getReader();
|
||||
writer.close();
|
||||
searcher = newSearcher(reader);
|
||||
searcher.setSimilarity(new TFSimilarity());
|
||||
searcher.setSimilarity(new RawTFSimilarity());
|
||||
}
|
||||
|
||||
static Document doc(String v1, String v2) {
|
||||
|
@ -93,26 +92,6 @@ public class TestConjunctions extends LuceneTestCase {
|
|||
super.tearDown();
|
||||
}
|
||||
|
||||
// Similarity that returns the TF as score
|
||||
private static class TFSimilarity extends Similarity {
|
||||
|
||||
@Override
|
||||
public long computeNorm(FieldInvertState state) {
|
||||
return 1; // we dont care
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimScorer scorer(
|
||||
float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
return new SimScorer() {
|
||||
@Override
|
||||
public float score(float freq, long norm) {
|
||||
return freq;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
public void testScorerGetChildren() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search.similarities;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.tests.search.similarities.BaseSimilarityTestCase;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
public class TestRawTFSimilarity extends BaseSimilarityTestCase {
|
||||
|
||||
private Directory directory;
|
||||
private IndexReader indexReader;
|
||||
private IndexSearcher indexSearcher;
|
||||
|
||||
@Override
|
||||
protected Similarity getSimilarity(Random random) {
|
||||
return new RawTFSimilarity();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = newDirectory();
|
||||
try (IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig())) {
|
||||
final Document document1 = new Document();
|
||||
final Document document2 = new Document();
|
||||
final Document document3 = new Document();
|
||||
document1.add(LuceneTestCase.newTextField("test", "one", Field.Store.YES));
|
||||
document2.add(LuceneTestCase.newTextField("test", "two two", Field.Store.YES));
|
||||
document3.add(LuceneTestCase.newTextField("test", "three three three", Field.Store.YES));
|
||||
indexWriter.addDocument(document1);
|
||||
indexWriter.addDocument(document2);
|
||||
indexWriter.addDocument(document3);
|
||||
indexWriter.commit();
|
||||
}
|
||||
indexReader = DirectoryReader.open(directory);
|
||||
indexSearcher = newSearcher(indexReader);
|
||||
indexSearcher.setSimilarity(new RawTFSimilarity());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
IOUtils.close(indexReader, directory);
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testOne() throws IOException {
|
||||
implTest("one", 1f);
|
||||
}
|
||||
|
||||
public void testTwo() throws IOException {
|
||||
implTest("two", 2f);
|
||||
}
|
||||
|
||||
public void testThree() throws IOException {
|
||||
implTest("three", 3f);
|
||||
}
|
||||
|
||||
private void implTest(String text, float expectedScore) throws IOException {
|
||||
Query query = new TermQuery(new Term("test", text));
|
||||
TopDocs topDocs = indexSearcher.search(query, 1);
|
||||
assertEquals(1, topDocs.totalHits.value());
|
||||
assertEquals(1, topDocs.scoreDocs.length);
|
||||
assertEquals(expectedScore, topDocs.scoreDocs[0].score, 0.0);
|
||||
}
|
||||
|
||||
public void testBoostQuery() throws IOException {
|
||||
Query query = new TermQuery(new Term("test", "three"));
|
||||
float boost = 14f;
|
||||
TopDocs topDocs = indexSearcher.search(new BoostQuery(query, boost), 1);
|
||||
assertEquals(1, topDocs.totalHits.value());
|
||||
assertEquals(1, topDocs.scoreDocs.length);
|
||||
assertEquals(42f, topDocs.scoreDocs[0].score, 0.0);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue