diff --git a/src/java/org/apache/lucene/analysis/Token.java b/src/java/org/apache/lucene/analysis/Token.java index 48fcb854579..1228fa8e436 100644 --- a/src/java/org/apache/lucene/analysis/Token.java +++ b/src/java/org/apache/lucene/analysis/Token.java @@ -859,11 +859,20 @@ public class Token extends AttributeImpl } public void copyTo(AttributeImpl target) { - Token to = (Token) target; - to.reinit(this); - // reinit shares the payload, so clone it: - if (payload !=null) { - to.payload = (Payload) payload.clone(); + if (target instanceof Token) { + final Token to = (Token) target; + to.reinit(this); + // reinit shares the payload, so clone it: + if (payload !=null) { + to.payload = (Payload) payload.clone(); + } + } else { + ((TermAttribute) target).setTermBuffer(termBuffer, 0, termLength); + ((OffsetAttribute) target).setOffset(startOffset, endOffset); + ((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement); + ((PayloadAttribute) target).setPayload((payload == null) ? null : (Payload) payload.clone()); + ((FlagsAttribute) target).setFlags(flags); + ((TypeAttribute) target).setType(type); } } } diff --git a/src/java/org/apache/lucene/analysis/TokenWrapper.java b/src/java/org/apache/lucene/analysis/TokenWrapper.java index 0428e22b3d0..0e667f219fd 100644 --- a/src/java/org/apache/lucene/analysis/TokenWrapper.java +++ b/src/java/org/apache/lucene/analysis/TokenWrapper.java @@ -157,6 +157,10 @@ final class TokenWrapper extends AttributeImpl } public void copyTo(AttributeImpl target) { - ((TokenWrapper) target).delegate = (Token) this.delegate.clone(); + if (target instanceof TokenWrapper) { + ((TokenWrapper) target).delegate = (Token) this.delegate.clone(); + } else { + this.delegate.copyTo(target); + } } } diff --git a/src/java/org/apache/lucene/util/AttributeImpl.java b/src/java/org/apache/lucene/util/AttributeImpl.java index d6a4d7bfda1..d8f456f3285 100644 --- a/src/java/org/apache/lucene/util/AttributeImpl.java +++ b/src/java/org/apache/lucene/util/AttributeImpl.java @@ -101,8 +101,8 @@ public abstract class AttributeImpl implements Cloneable, Serializable, Attribut /** * Copies the values from this Attribute into the passed-in - * target attribute. The type of the target must match the type - * of this attribute. + * target attribute. The target implementation must support all the + * Attributes this implementation supports. */ public abstract void copyTo(AttributeImpl target); diff --git a/src/test/org/apache/lucene/analysis/BaseTokenTestCase.java b/src/test/org/apache/lucene/analysis/BaseTokenTestCase.java index 816d7615276..d61482e9cc1 100644 --- a/src/test/org/apache/lucene/analysis/BaseTokenTestCase.java +++ b/src/test/org/apache/lucene/analysis/BaseTokenTestCase.java @@ -19,35 +19,25 @@ package org.apache.lucene.analysis; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Iterator; import java.util.List; -import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; + +public abstract class BaseTokenTestCase extends BaseTokenStreamTestCase { -/* TODO: Convert to new TokenStream API. Token instances must be removed for that to work */ -public abstract class BaseTokenTestCase extends LuceneTestCase { public static String tsToString(TokenStream in) throws IOException { - StringBuffer out = new StringBuffer(); - Token t = in.next(); - if (null != t) - out.append(new String(t.termBuffer(), 0, t.termLength())); - - for (t = in.next(); null != t; t = in.next()) { - out.append(" ").append(new String(t.termBuffer(), 0, t.termLength())); + final TermAttribute termAtt = (TermAttribute) in.addAttribute(TermAttribute.class); + final StringBuffer out = new StringBuffer(); + in.reset(); + while (in.incrementToken()) { + if (out.length()>0) out.append(' '); + out.append(termAtt.term()); } in.close(); return out.toString(); } -/* - public List tok2str(Iterable tokLst) { - ArrayList lst = new ArrayList(); - for ( Token t : tokLst ) { - lst.add( new String(t.termBuffer(), 0, t.termLength())); - } - return lst; - } -*/ public void assertTokEqual(List/**/ a, List/**/ b) { assertTokEq(a,b,false); @@ -64,7 +54,7 @@ public abstract class BaseTokenTestCase extends LuceneTestCase { for (Iterator iter = a.iterator(); iter.hasNext();) { Token tok = (Token)iter.next(); pos += tok.getPositionIncrement(); - if (!tokAt(b, new String(tok.termBuffer(), 0, tok.termLength()), pos + if (!tokAt(b, tok.term(), pos , checkOff ? tok.startOffset() : -1 , checkOff ? tok.endOffset() : -1 )) @@ -79,7 +69,7 @@ public abstract class BaseTokenTestCase extends LuceneTestCase { for (Iterator iter = lst.iterator(); iter.hasNext();) { Token tok = (Token)iter.next(); pos += tok.getPositionIncrement(); - if (pos==tokPos && new String(tok.termBuffer(), 0, tok.termLength()).equals(val) + if (pos==tokPos && tok.term().equals(val) && (startOff==-1 || tok.startOffset()==startOff) && (endOff ==-1 || tok.endOffset() ==endOff ) ) @@ -146,41 +136,22 @@ public abstract class BaseTokenTestCase extends LuceneTestCase { static List/**/ getTokens(TokenStream tstream) throws IOException { List/**/ tokens = new ArrayList/**/(); - while (true) { - Token t = tstream.next(); - if (t==null) break; + tstream.reset(); + while (tstream.incrementToken()) { + final Token t = new Token(); + for (Iterator it = tstream.getAttributeImplsIterator(); it.hasNext();) { + final AttributeImpl att = (AttributeImpl) it.next(); + try { + att.copyTo(t); + } catch (ClassCastException ce) { + // ignore Attributes unsupported by Token + } + } tokens.add(t); } + tstream.close(); + return tokens; } -/* - public static class IterTokenStream extends TokenStream { - Iterator toks; - public IterTokenStream(Token... toks) { - this.toks = Arrays.asList(toks).iterator(); - } - public IterTokenStream(Iterable toks) { - this.toks = toks.iterator(); - } - public IterTokenStream(Iterator toks) { - this.toks = toks; - } - public IterTokenStream(String ... text) { - int off = 0; - ArrayList t = new ArrayList( text.length ); - for( String txt : text ) { - t.add( new Token( txt, off, off+txt.length() ) ); - off += txt.length() + 2; - } - this.toks = t.iterator(); - } - @Override - public Token next() { - if (toks.hasNext()) { - return toks.next(); - } - return null; - } - } -*/ + } diff --git a/src/test/org/apache/lucene/index/TestIndexWriter.java b/src/test/org/apache/lucene/index/TestIndexWriter.java index 46156f04c9f..aa1c019e7d5 100644 --- a/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -29,21 +29,23 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.HashSet; import java.util.Random; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.SimpleAnalyzer; -import org.apache.lucene.analysis.SinkTokenizer; import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.TeeSinkTokenFilter; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.search.IndexSearcher; @@ -62,7 +64,6 @@ import org.apache.lucene.store.LockFactory; import org.apache.lucene.store.MockRAMDirectory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.SingleInstanceLockFactory; -import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util._TestUtil; @@ -70,8 +71,16 @@ import org.apache.lucene.util._TestUtil; * * @version $Id$ */ -public class TestIndexWriter extends LuceneTestCase -{ +public class TestIndexWriter extends BaseTokenStreamTestCase { + public TestIndexWriter(String name) { + super(name, new HashSet(Arrays.asList(new String[]{ + "testExceptionFromTokenStream", "testDocumentsWriterExceptions", "testNegativePositions", + "testEndOffsetPositionWithCachingTokenFilter", "testEndOffsetPositionWithTeeSinkTokenFilter", + "testEndOffsetPositionStandard", "testEndOffsetPositionStandardEmptyField", + "testEndOffsetPositionStandardEmptyField2" + }))); + } + public void testDocCount() throws IOException { Directory dir = new RAMDirectory(); @@ -3530,16 +3539,22 @@ public class TestIndexWriter extends LuceneTestCase // LUCENE-1255 public void testNegativePositions() throws Throwable { - SinkTokenizer tokens = new SinkTokenizer(); - Token t = new Token(); - t.setTermBuffer("a"); - t.setPositionIncrement(0); - tokens.add(t); - t.setTermBuffer("b"); - t.setPositionIncrement(1); - tokens.add(t); - t.setTermBuffer("c"); - tokens.add(t); + final TokenStream tokens = new TokenStream() { + final TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class); + final PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class); + + final Iterator tokens = Arrays.asList(new String[]{"a","b","c"}).iterator(); + boolean first = true; + + public boolean incrementToken() { + if (!tokens.hasNext()) return false; + clearAttributes(); + termAtt.setTermBuffer((String) tokens.next()); + posIncrAtt.setPositionIncrement(first ? 0 : 1); + first = false; + return true; + } + }; MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);