From 4db3e7b8a7ca818002af9041bf10660c25905915 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Tue, 13 Sep 2016 05:04:18 -0400 Subject: [PATCH] LUCENE-7439: improve test case --- .../apache/lucene/search/TestFuzzyQuery.java | 64 ++++++++++++++++--- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java index a59449c4a17..1e90525d891 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java @@ -18,7 +18,9 @@ package org.apache.lucene.search; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -523,6 +525,7 @@ public class TestFuzzyQuery extends LuceneTestCase { w.addDocument(doc); } DirectoryReader r = w.getReader(); + //System.out.println("TEST: reader=" + r); IndexSearcher s = newSearcher(r); int iters = atLeast(1000); for(int iter=0;iter[] expected = new Set[3]; + List[] expected = new List[3]; for(int ed=0;ed<3;ed++) { - expected[ed] = new HashSet(); + expected[ed] = new ArrayList(); } for(String term : terms) { if (term.startsWith(queryPrefix) == false) { continue; } int ed = getDistance(term, queryTerm); - if (Math.min(queryTerm.length(), term.length()) > ed) { + if (Math.min(queryTerm.length(), term.length()) > ed) { + float score = 1f - (float) ed / (float) Math.min(queryTerm.length(), term.length()); while (ed < 3) { - expected[ed].add(term); + expected[ed].add(new TermAndScore(term, score)); ed++; } } } for(int ed=0;ed<3;ed++) { - FuzzyQuery query = new FuzzyQuery(new Term("field", queryTerm), ed, prefixLength, terms.size(), true); + Collections.sort(expected[ed]); + int queueSize = TestUtil.nextInt(random(), 1, terms.size()); + /* + System.out.println("\nTEST: query=" + queryTerm + " ed=" + ed + " queueSize=" + queueSize + " vs expected match size=" + expected[ed].size() + " prefixLength=" + prefixLength); + for(TermAndScore ent : expected[ed]) { + System.out.println(" " + ent); + } + */ + FuzzyQuery query = new FuzzyQuery(new Term("field", queryTerm), ed, prefixLength, queueSize, true); TopDocs hits = s.search(query, terms.size()); Set actual = new HashSet<>(); for(ScoreDoc hit : hits.scoreDocs) { Document doc = s.doc(hit.doc); actual.add(doc.get("field")); + //System.out.println(" actual: " + doc.get("field") + " score=" + hit.score); } - if (actual.equals(expected[ed]) == false) { + Set expectedTop = new HashSet<>(); + int limit = Math.min(queueSize, expected[ed].size()); + for(int i=0;i { + final String term; + final float score; + + public TermAndScore(String term, float score) { + this.term = term; + this.score = score; + } + + @Override + public int compareTo(TermAndScore other) { + // higher score sorts first, and if scores are tied, lower term sorts first + if (score > other.score) { + return -1; + } else if (score < other.score) { + return 1; + } else { + return term.compareTo(other.term); + } + } + + @Override + public String toString() { + return term + " score=" + score; + } + } + // Poached from LuceneLevenshteinDistance.java (from suggest module): it supports transpositions (treats them as ed=1, not ed=2) private static int getDistance(String target, String other) { IntsRef targetPoints;