SOLR-1256: Show the output of CharFilters in analysis.jsp

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@792370 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Koji Sekiguchi 2009-07-09 01:35:30 +00:00
parent 4bd75f0221
commit 1ed4c39835
6 changed files with 88 additions and 15 deletions

View File

@ -247,6 +247,9 @@ New Features
This is an advanced debug log file that can be used to aid developers in fixing IndexWriter bugs. See the commented This is an advanced debug log file that can be used to aid developers in fixing IndexWriter bugs. See the commented
out example in the example solrconfig.xml under the indexDefaults section. out example in the example solrconfig.xml under the indexDefaults section.
(Chris Harris, Mark Miller) (Chris Harris, Mark Miller)
64. SOLR-1256: Show the output of CharFilters in analysis.jsp. (koji)
Optimizations Optimizations
---------------------- ----------------------
1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the 1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the

View File

@ -60,4 +60,16 @@ public abstract class CharFilter extends CharStream {
public int read(char[] cbuf, int off, int len) throws IOException { public int read(char[] cbuf, int off, int len) throws IOException {
return input.read(cbuf, off, len); return input.read(cbuf, off, len);
} }
public boolean markSupported(){
return input.markSupported();
}
public void mark( int readAheadLimit ) throws IOException {
input.mark(readAheadLimit);
}
public void reset() throws IOException {
input.reset();
}
} }

View File

@ -54,4 +54,16 @@ public final class CharReader extends CharStream {
public int read(char[] cbuf, int off, int len) throws IOException { public int read(char[] cbuf, int off, int len) throws IOException {
return input.read(cbuf, off, len ); return input.read(cbuf, off, len );
} }
public boolean markSupported(){
return input.markSupported();
}
public void mark( int readAheadLimit ) throws IOException {
input.mark(readAheadLimit);
}
public void reset() throws IOException {
input.reset();
}
} }

View File

@ -120,16 +120,4 @@ public class MappingCharFilter extends BaseCharFilter {
} }
return l == 0 ? -1 : l; return l == 0 ? -1 : l;
} }
public boolean markSupported(){
return false;
}
public void mark( int readAheadLimit ) throws IOException {
throw new IOException( "mark/reset not supported" );
}
public void reset() throws IOException {
throw new IOException( "mark/reset not supported" );
}
} }

View File

@ -42,6 +42,22 @@ public class TestMappingCharFilter extends BaseTokenTestCase {
normMap.add( "empty", "" ); normMap.add( "empty", "" );
} }
public void testReaderReset() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "x" ) ) );
char[] buf = new char[10];
int len = cs.read(buf, 0, 10);
assertEquals( 1, len );
assertEquals( 'x', buf[0]) ;
len = cs.read(buf, 0, 10);
assertEquals( -1, len );
// rewind
cs.reset();
len = cs.read(buf, 0, 10);
assertEquals( 1, len );
assertEquals( 'x', buf[0]) ;
}
public void testNothingChange() throws Exception { public void testNothingChange() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "x" ) ) ); CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "x" ) ) );
TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs ); TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs );

View File

@ -19,6 +19,9 @@
org.apache.lucene.analysis.Token, org.apache.lucene.analysis.Token,
org.apache.lucene.analysis.TokenStream, org.apache.lucene.analysis.TokenStream,
org.apache.lucene.index.Payload, org.apache.lucene.index.Payload,
org.apache.solr.analysis.CharReader,
org.apache.solr.analysis.CharStream,
org.apache.solr.analysis.CharFilterFactory,
org.apache.solr.analysis.TokenFilterFactory, org.apache.solr.analysis.TokenFilterFactory,
org.apache.solr.analysis.TokenizerChain, org.apache.solr.analysis.TokenizerChain,
org.apache.solr.analysis.TokenizerFactory, org.apache.solr.analysis.TokenizerFactory,
@ -171,19 +174,32 @@
<%! <%!
private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set<Tok> match) throws Exception { private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set<Tok> match) throws Exception {
Reader reader = new StringReader(val); CharStream reader = CharReader.get(new StringReader(val));
FieldType ft = field.getType(); FieldType ft = field.getType();
Analyzer analyzer = queryAnalyser ? Analyzer analyzer = queryAnalyser ?
ft.getQueryAnalyzer() : ft.getAnalyzer(); ft.getQueryAnalyzer() : ft.getAnalyzer();
if (analyzer instanceof TokenizerChain) { if (analyzer instanceof TokenizerChain) {
TokenizerChain tchain = (TokenizerChain)analyzer; TokenizerChain tchain = (TokenizerChain)analyzer;
CharFilterFactory[] cfiltfacs = tchain.getCharFilterFactories();
TokenizerFactory tfac = tchain.getTokenizerFactory(); TokenizerFactory tfac = tchain.getTokenizerFactory();
TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories(); TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories();
TokenStream tstream = tfac.create(tchain.charStream(reader)); if( cfiltfacs != null ){
for(CharFilterFactory cfiltfac : cfiltfacs ){
reader = cfiltfac.create(reader);
if(verbose){
writeHeader(out, cfiltfac.getClass(), cfiltfac.getArgs());
writeCharStream(out, reader);
}
}
}
// StringReader should support reset()
reader.reset();
TokenStream tstream = tfac.create(reader);
List<Token> tokens = getTokens(tstream); List<Token> tokens = getTokens(tstream);
tstream = tfac.create(tchain.charStream(reader)); tstream = tfac.create(reader);
if (verbose) { if (verbose) {
writeHeader(out, tfac.getClass(), tfac.getArgs()); writeHeader(out, tfac.getClass(), tfac.getArgs());
} }
@ -453,4 +469,30 @@
out.println("</table>"); out.println("</table>");
} }
static void writeCharStream(JspWriter out, CharStream input) throws IOException {
out.println("<table width=\"auto\" class=\"analysis\" border=\"1\">");
out.println("<tr>");
out.print("<th NOWRAP>");
XML.escapeCharData("text",out);
out.println("</th>");
// StringReader should support reset()
input.reset();
final int BUFFER_SIZE = 1024;
char[] buf = new char[BUFFER_SIZE];
int len = 0;
StringBuilder sb = new StringBuilder();
do {
len = input.read( buf, 0, BUFFER_SIZE );
sb.append(buf, 0, len);
} while( len == BUFFER_SIZE );
out.print("<td class=\"debugdata\">");
XML.escapeCharData(sb.toString(),out);
out.println("</td>");
out.println("</tr>");
out.println("</table>");
}
%> %>