From e04abc52e7cf632b6b03e81d80ab7cd54b24e9e1 Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Sun, 5 Jul 2009 17:16:16 +0000 Subject: [PATCH] LUCENE-1599: Add clone support for SpanQuerys. SpanRegexQuery counts on this functionality and does not work correctly without it. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@791280 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 4 + .../search/regex/TestSpanRegexQuery.java | 93 ++++++++++++++++--- .../lucene/search/spans/SpanNearQuery.java | 13 ++- .../lucene/search/spans/SpanNotQuery.java | 5 +- .../lucene/search/spans/SpanOrQuery.java | 14 ++- 5 files changed, 114 insertions(+), 15 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 5ff60ec89b1..5842bbea0ea 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -349,6 +349,10 @@ Bug fixes 16. LUCENE-1681: Fix infinite loop caused by a call to DocValues methods getMinValue, getMaxValue, getAverageValue. (Simon Willnauer via Mark Miller) +17. LUCENE-1599: Add clone support for SpanQuerys. SpanRegexQuery counts + on this functionality and does not work correctly without it. + (Mark Miller) + New features 1. LUCENE-1411: Added expert API to open an IndexWriter on a prior diff --git a/contrib/regex/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java b/contrib/regex/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java index a41745c517c..6321e6f51b8 100644 --- a/contrib/regex/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java +++ b/contrib/regex/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java @@ -17,32 +17,46 @@ package org.apache.lucene.search.regex; * limitations under the License. */ +import java.io.IOException; + import junit.framework.TestCase; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Term; + import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; import org.apache.lucene.search.Hits; -import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MultiSearcher; +import org.apache.lucene.search.spans.SpanFirstQuery; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.search.spans.SpanFirstQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.store.RAMDirectory; public class TestSpanRegexQuery extends TestCase { + Directory indexStoreA = new RAMDirectory(); + + Directory indexStoreB = new RAMDirectory(); + public void testSpanRegex() throws Exception { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true); Document doc = new Document(); -// doc.add(new Field("field", "the quick brown fox jumps over the lazy dog", Field.Store.NO, Field.Index.ANALYZED)); -// writer.addDocument(doc); -// doc = new Document(); - doc.add(new Field("field", "auto update", Field.Store.NO, Field.Index.ANALYZED)); + // doc.add(new Field("field", "the quick brown fox jumps over the lazy dog", + // Field.Store.NO, Field.Index.ANALYZED)); + // writer.addDocument(doc); + // doc = new Document(); + doc.add(new Field("field", "auto update", Field.Store.NO, + Field.Index.ANALYZED)); writer.addDocument(doc); doc = new Document(); - doc.add(new Field("field", "first auto update", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(new Field("field", "first auto update", Field.Store.NO, + Field.Index.ANALYZED)); writer.addDocument(doc); writer.optimize(); writer.close(); @@ -50,8 +64,63 @@ public class TestSpanRegexQuery extends TestCase { IndexSearcher searcher = new IndexSearcher(directory); SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "aut.*")); SpanFirstQuery sfq = new SpanFirstQuery(srq, 1); -// SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6, true); + // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6, + // true); Hits hits = searcher.search(sfq); assertEquals(1, hits.length()); } + + public void testSpanRegexBug() throws CorruptIndexException, IOException { + createRAMDirectories(); + + SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "a.*")); + SpanRegexQuery stq = new SpanRegexQuery(new Term("field", "b.*")); + SpanNearQuery query = new SpanNearQuery(new SpanQuery[] { srq, stq }, 6, + true); + + // 1. Search the same store which works + IndexSearcher[] arrSearcher = new IndexSearcher[2]; + arrSearcher[0] = new IndexSearcher(indexStoreA); + arrSearcher[1] = new IndexSearcher(indexStoreB); + MultiSearcher searcher = new MultiSearcher(arrSearcher); + Hits hits = searcher.search(query); + arrSearcher[0].close(); + arrSearcher[1].close(); + + // Will fail here + // We expect 2 but only one matched + // The rewriter function only write it once on the first IndexSearcher + // So it's using term: a1 b1 to search on the second IndexSearcher + // As a result, it won't match the document in the second IndexSearcher + assertEquals(2, hits.length()); + indexStoreA.close(); + indexStoreB.close(); + } + + private void createRAMDirectories() throws CorruptIndexException, + LockObtainFailedException, IOException { + // creating a document to store + Document lDoc = new Document(); + lDoc.add(new Field("field", "a1 b1", Field.Store.NO, + Field.Index.ANALYZED_NO_NORMS)); + + // creating a document to store + Document lDoc2 = new Document(); + lDoc2.add(new Field("field", "a2 b2", Field.Store.NO, + Field.Index.ANALYZED_NO_NORMS)); + + // creating first index writer + IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(), + true, IndexWriter.MaxFieldLength.LIMITED); + writerA.addDocument(lDoc); + writerA.optimize(); + writerA.close(); + + // creating second index writer + IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), + true, IndexWriter.MaxFieldLength.LIMITED); + writerB.addDocument(lDoc2); + writerB.optimize(); + writerB.close(); + } } diff --git a/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index c47f939b77f..462d78ec28d 100644 --- a/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -33,7 +33,7 @@ import org.apache.lucene.util.ToStringUtils; /** Matches spans which are near one another. One can specify slop, the * maximum number of intervening unmatched positions, as well as whether * matches are required to be in-order. */ -public class SpanNearQuery extends SpanQuery { +public class SpanNearQuery extends SpanQuery implements Cloneable { private List clauses; private int slop; private boolean inOrder; @@ -151,6 +151,17 @@ public class SpanNearQuery extends SpanQuery { return this; // no clauses rewrote } } + + public Object clone() { + int sz = clauses.size(); + SpanQuery[] newClauses = new SpanQuery[sz]; + + for (int i = 0; i < sz; i++) { + SpanQuery clause = (SpanQuery) clauses.get(i); + newClauses[i] = (SpanQuery) clause.clone(); + } + return new SpanNearQuery(newClauses, slop, inOrder); + } /** Returns true iff o is equal to this. */ public boolean equals(Object o) { diff --git a/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/src/java/org/apache/lucene/search/spans/SpanNotQuery.java index 315ca7a918d..bb473294385 100644 --- a/src/java/org/apache/lucene/search/spans/SpanNotQuery.java +++ b/src/java/org/apache/lucene/search/spans/SpanNotQuery.java @@ -27,7 +27,7 @@ import java.util.Collection; import java.util.Set; /** Removes matches which overlap with another SpanQuery. */ -public class SpanNotQuery extends SpanQuery { +public class SpanNotQuery extends SpanQuery implements Cloneable { private SpanQuery include; private SpanQuery exclude; @@ -68,6 +68,9 @@ public class SpanNotQuery extends SpanQuery { return buffer.toString(); } + public Object clone() { + return new SpanNotQuery((SpanQuery)include.clone(),(SpanQuery) exclude.clone()); + } public Spans getSpans(final IndexReader reader) throws IOException { return new PayloadSpans() { diff --git a/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/src/java/org/apache/lucene/search/spans/SpanOrQuery.java index 6564e7237ca..908341304e3 100644 --- a/src/java/org/apache/lucene/search/spans/SpanOrQuery.java +++ b/src/java/org/apache/lucene/search/spans/SpanOrQuery.java @@ -31,7 +31,7 @@ import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.search.Query; /** Matches the union of its clauses.*/ -public class SpanOrQuery extends SpanQuery { +public class SpanOrQuery extends SpanQuery implements Cloneable { private List clauses; private String field; @@ -79,6 +79,18 @@ public class SpanOrQuery extends SpanQuery { clause.extractTerms(terms); } } + + public Object clone() { + int sz = clauses.size(); + SpanQuery[] newClauses = new SpanQuery[sz]; + + for (int i = 0; i < sz; i++) { + SpanQuery clause = (SpanQuery) clauses.get(i); + newClauses[i] = (SpanQuery) clause.clone(); + } + SpanOrQuery soq = new SpanOrQuery(newClauses); + return soq; + } public Query rewrite(IndexReader reader) throws IOException { SpanOrQuery clone = null;