mirror of https://github.com/apache/lucene.git
LUCENE-1843: Convert some tests to new TokenStream API, better support of cross-impl AttributeImpl.copyTo()
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@806847 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
23d976abbd
commit
426ffd809e
|
@ -859,11 +859,20 @@ public class Token extends AttributeImpl
|
||||||
}
|
}
|
||||||
|
|
||||||
public void copyTo(AttributeImpl target) {
|
public void copyTo(AttributeImpl target) {
|
||||||
Token to = (Token) target;
|
if (target instanceof Token) {
|
||||||
to.reinit(this);
|
final Token to = (Token) target;
|
||||||
// reinit shares the payload, so clone it:
|
to.reinit(this);
|
||||||
if (payload !=null) {
|
// reinit shares the payload, so clone it:
|
||||||
to.payload = (Payload) payload.clone();
|
if (payload !=null) {
|
||||||
|
to.payload = (Payload) payload.clone();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
((TermAttribute) target).setTermBuffer(termBuffer, 0, termLength);
|
||||||
|
((OffsetAttribute) target).setOffset(startOffset, endOffset);
|
||||||
|
((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
|
||||||
|
((PayloadAttribute) target).setPayload((payload == null) ? null : (Payload) payload.clone());
|
||||||
|
((FlagsAttribute) target).setFlags(flags);
|
||||||
|
((TypeAttribute) target).setType(type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -157,6 +157,10 @@ final class TokenWrapper extends AttributeImpl
|
||||||
}
|
}
|
||||||
|
|
||||||
public void copyTo(AttributeImpl target) {
|
public void copyTo(AttributeImpl target) {
|
||||||
((TokenWrapper) target).delegate = (Token) this.delegate.clone();
|
if (target instanceof TokenWrapper) {
|
||||||
|
((TokenWrapper) target).delegate = (Token) this.delegate.clone();
|
||||||
|
} else {
|
||||||
|
this.delegate.copyTo(target);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -101,8 +101,8 @@ public abstract class AttributeImpl implements Cloneable, Serializable, Attribut
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copies the values from this Attribute into the passed-in
|
* Copies the values from this Attribute into the passed-in
|
||||||
* target attribute. The type of the target must match the type
|
* target attribute. The target implementation must support all the
|
||||||
* of this attribute.
|
* Attributes this implementation supports.
|
||||||
*/
|
*/
|
||||||
public abstract void copyTo(AttributeImpl target);
|
public abstract void copyTo(AttributeImpl target);
|
||||||
|
|
||||||
|
|
|
@ -19,35 +19,25 @@ package org.apache.lucene.analysis;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.AttributeImpl;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
|
|
||||||
|
public abstract class BaseTokenTestCase extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
/* TODO: Convert to new TokenStream API. Token instances must be removed for that to work */
|
|
||||||
public abstract class BaseTokenTestCase extends LuceneTestCase {
|
|
||||||
public static String tsToString(TokenStream in) throws IOException {
|
public static String tsToString(TokenStream in) throws IOException {
|
||||||
StringBuffer out = new StringBuffer();
|
final TermAttribute termAtt = (TermAttribute) in.addAttribute(TermAttribute.class);
|
||||||
Token t = in.next();
|
final StringBuffer out = new StringBuffer();
|
||||||
if (null != t)
|
in.reset();
|
||||||
out.append(new String(t.termBuffer(), 0, t.termLength()));
|
while (in.incrementToken()) {
|
||||||
|
if (out.length()>0) out.append(' ');
|
||||||
for (t = in.next(); null != t; t = in.next()) {
|
out.append(termAtt.term());
|
||||||
out.append(" ").append(new String(t.termBuffer(), 0, t.termLength()));
|
|
||||||
}
|
}
|
||||||
in.close();
|
in.close();
|
||||||
return out.toString();
|
return out.toString();
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
public List<String> tok2str(Iterable<Token> tokLst) {
|
|
||||||
ArrayList<String> lst = new ArrayList<String>();
|
|
||||||
for ( Token t : tokLst ) {
|
|
||||||
lst.add( new String(t.termBuffer(), 0, t.termLength()));
|
|
||||||
}
|
|
||||||
return lst;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
public void assertTokEqual(List/*<Token>*/ a, List/*<Token>*/ b) {
|
public void assertTokEqual(List/*<Token>*/ a, List/*<Token>*/ b) {
|
||||||
assertTokEq(a,b,false);
|
assertTokEq(a,b,false);
|
||||||
|
@ -64,7 +54,7 @@ public abstract class BaseTokenTestCase extends LuceneTestCase {
|
||||||
for (Iterator iter = a.iterator(); iter.hasNext();) {
|
for (Iterator iter = a.iterator(); iter.hasNext();) {
|
||||||
Token tok = (Token)iter.next();
|
Token tok = (Token)iter.next();
|
||||||
pos += tok.getPositionIncrement();
|
pos += tok.getPositionIncrement();
|
||||||
if (!tokAt(b, new String(tok.termBuffer(), 0, tok.termLength()), pos
|
if (!tokAt(b, tok.term(), pos
|
||||||
, checkOff ? tok.startOffset() : -1
|
, checkOff ? tok.startOffset() : -1
|
||||||
, checkOff ? tok.endOffset() : -1
|
, checkOff ? tok.endOffset() : -1
|
||||||
))
|
))
|
||||||
|
@ -79,7 +69,7 @@ public abstract class BaseTokenTestCase extends LuceneTestCase {
|
||||||
for (Iterator iter = lst.iterator(); iter.hasNext();) {
|
for (Iterator iter = lst.iterator(); iter.hasNext();) {
|
||||||
Token tok = (Token)iter.next();
|
Token tok = (Token)iter.next();
|
||||||
pos += tok.getPositionIncrement();
|
pos += tok.getPositionIncrement();
|
||||||
if (pos==tokPos && new String(tok.termBuffer(), 0, tok.termLength()).equals(val)
|
if (pos==tokPos && tok.term().equals(val)
|
||||||
&& (startOff==-1 || tok.startOffset()==startOff)
|
&& (startOff==-1 || tok.startOffset()==startOff)
|
||||||
&& (endOff ==-1 || tok.endOffset() ==endOff )
|
&& (endOff ==-1 || tok.endOffset() ==endOff )
|
||||||
)
|
)
|
||||||
|
@ -146,41 +136,22 @@ public abstract class BaseTokenTestCase extends LuceneTestCase {
|
||||||
|
|
||||||
static List/*<Token>*/ getTokens(TokenStream tstream) throws IOException {
|
static List/*<Token>*/ getTokens(TokenStream tstream) throws IOException {
|
||||||
List/*<Token>*/ tokens = new ArrayList/*<Token>*/();
|
List/*<Token>*/ tokens = new ArrayList/*<Token>*/();
|
||||||
while (true) {
|
tstream.reset();
|
||||||
Token t = tstream.next();
|
while (tstream.incrementToken()) {
|
||||||
if (t==null) break;
|
final Token t = new Token();
|
||||||
|
for (Iterator it = tstream.getAttributeImplsIterator(); it.hasNext();) {
|
||||||
|
final AttributeImpl att = (AttributeImpl) it.next();
|
||||||
|
try {
|
||||||
|
att.copyTo(t);
|
||||||
|
} catch (ClassCastException ce) {
|
||||||
|
// ignore Attributes unsupported by Token
|
||||||
|
}
|
||||||
|
}
|
||||||
tokens.add(t);
|
tokens.add(t);
|
||||||
}
|
}
|
||||||
|
tstream.close();
|
||||||
|
|
||||||
return tokens;
|
return tokens;
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
public static class IterTokenStream extends TokenStream {
|
|
||||||
Iterator<Token> toks;
|
|
||||||
public IterTokenStream(Token... toks) {
|
|
||||||
this.toks = Arrays.asList(toks).iterator();
|
|
||||||
}
|
|
||||||
public IterTokenStream(Iterable<Token> toks) {
|
|
||||||
this.toks = toks.iterator();
|
|
||||||
}
|
|
||||||
public IterTokenStream(Iterator<Token> toks) {
|
|
||||||
this.toks = toks;
|
|
||||||
}
|
|
||||||
public IterTokenStream(String ... text) {
|
|
||||||
int off = 0;
|
|
||||||
ArrayList<Token> t = new ArrayList<Token>( text.length );
|
|
||||||
for( String txt : text ) {
|
|
||||||
t.add( new Token( txt, off, off+txt.length() ) );
|
|
||||||
off += txt.length() + 2;
|
|
||||||
}
|
|
||||||
this.toks = t.iterator();
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public Token next() {
|
|
||||||
if (toks.hasNext()) {
|
|
||||||
return toks.next();
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,21 +29,23 @@ import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CachingTokenFilter;
|
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||||
import org.apache.lucene.analysis.SinkTokenizer;
|
|
||||||
import org.apache.lucene.analysis.StopAnalyzer;
|
import org.apache.lucene.analysis.StopAnalyzer;
|
||||||
import org.apache.lucene.analysis.TeeSinkTokenFilter;
|
import org.apache.lucene.analysis.TeeSinkTokenFilter;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
@ -62,7 +64,6 @@ import org.apache.lucene.store.LockFactory;
|
||||||
import org.apache.lucene.store.MockRAMDirectory;
|
import org.apache.lucene.store.MockRAMDirectory;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.lucene.store.SingleInstanceLockFactory;
|
import org.apache.lucene.store.SingleInstanceLockFactory;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
import org.apache.lucene.util._TestUtil;
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
|
@ -70,8 +71,16 @@ import org.apache.lucene.util._TestUtil;
|
||||||
*
|
*
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
*/
|
*/
|
||||||
public class TestIndexWriter extends LuceneTestCase
|
public class TestIndexWriter extends BaseTokenStreamTestCase {
|
||||||
{
|
public TestIndexWriter(String name) {
|
||||||
|
super(name, new HashSet(Arrays.asList(new String[]{
|
||||||
|
"testExceptionFromTokenStream", "testDocumentsWriterExceptions", "testNegativePositions",
|
||||||
|
"testEndOffsetPositionWithCachingTokenFilter", "testEndOffsetPositionWithTeeSinkTokenFilter",
|
||||||
|
"testEndOffsetPositionStandard", "testEndOffsetPositionStandardEmptyField",
|
||||||
|
"testEndOffsetPositionStandardEmptyField2"
|
||||||
|
})));
|
||||||
|
}
|
||||||
|
|
||||||
public void testDocCount() throws IOException
|
public void testDocCount() throws IOException
|
||||||
{
|
{
|
||||||
Directory dir = new RAMDirectory();
|
Directory dir = new RAMDirectory();
|
||||||
|
@ -3530,16 +3539,22 @@ public class TestIndexWriter extends LuceneTestCase
|
||||||
|
|
||||||
// LUCENE-1255
|
// LUCENE-1255
|
||||||
public void testNegativePositions() throws Throwable {
|
public void testNegativePositions() throws Throwable {
|
||||||
SinkTokenizer tokens = new SinkTokenizer();
|
final TokenStream tokens = new TokenStream() {
|
||||||
Token t = new Token();
|
final TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
||||||
t.setTermBuffer("a");
|
final PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
|
||||||
t.setPositionIncrement(0);
|
|
||||||
tokens.add(t);
|
final Iterator tokens = Arrays.asList(new String[]{"a","b","c"}).iterator();
|
||||||
t.setTermBuffer("b");
|
boolean first = true;
|
||||||
t.setPositionIncrement(1);
|
|
||||||
tokens.add(t);
|
public boolean incrementToken() {
|
||||||
t.setTermBuffer("c");
|
if (!tokens.hasNext()) return false;
|
||||||
tokens.add(t);
|
clearAttributes();
|
||||||
|
termAtt.setTermBuffer((String) tokens.next());
|
||||||
|
posIncrAtt.setPositionIncrement(first ? 0 : 1);
|
||||||
|
first = false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
MockRAMDirectory dir = new MockRAMDirectory();
|
MockRAMDirectory dir = new MockRAMDirectory();
|
||||||
IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
|
IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||||
|
|
Loading…
Reference in New Issue