From 1ed4c398353ac9f97e65239f81a948727bfe95c8 Mon Sep 17 00:00:00 2001 From: Koji Sekiguchi Date: Thu, 9 Jul 2009 01:35:30 +0000 Subject: [PATCH] SOLR-1256: Show the output of CharFilters in analysis.jsp git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@792370 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 3 ++ .../org/apache/solr/analysis/CharFilter.java | 12 +++++ .../org/apache/solr/analysis/CharReader.java | 12 +++++ .../solr/analysis/MappingCharFilter.java | 12 ----- .../solr/analysis/TestMappingCharFilter.java | 16 +++++++ src/webapp/web/admin/analysis.jsp | 48 +++++++++++++++++-- 6 files changed, 88 insertions(+), 15 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 8034be98ac6..5f9a70bc9a0 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -247,6 +247,9 @@ New Features This is an advanced debug log file that can be used to aid developers in fixing IndexWriter bugs. See the commented out example in the example solrconfig.xml under the indexDefaults section. (Chris Harris, Mark Miller) + +64. SOLR-1256: Show the output of CharFilters in analysis.jsp. (koji) + Optimizations ---------------------- 1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the diff --git a/src/java/org/apache/solr/analysis/CharFilter.java b/src/java/org/apache/solr/analysis/CharFilter.java index 9616bcf9a9f..db22717b429 100644 --- a/src/java/org/apache/solr/analysis/CharFilter.java +++ b/src/java/org/apache/solr/analysis/CharFilter.java @@ -60,4 +60,16 @@ public abstract class CharFilter extends CharStream { public int read(char[] cbuf, int off, int len) throws IOException { return input.read(cbuf, off, len); } + + public boolean markSupported(){ + return input.markSupported(); + } + + public void mark( int readAheadLimit ) throws IOException { + input.mark(readAheadLimit); + } + + public void reset() throws IOException { + input.reset(); + } } diff --git a/src/java/org/apache/solr/analysis/CharReader.java b/src/java/org/apache/solr/analysis/CharReader.java index 7c164684889..99c67317346 100644 --- a/src/java/org/apache/solr/analysis/CharReader.java +++ b/src/java/org/apache/solr/analysis/CharReader.java @@ -54,4 +54,16 @@ public final class CharReader extends CharStream { public int read(char[] cbuf, int off, int len) throws IOException { return input.read(cbuf, off, len ); } + + public boolean markSupported(){ + return input.markSupported(); + } + + public void mark( int readAheadLimit ) throws IOException { + input.mark(readAheadLimit); + } + + public void reset() throws IOException { + input.reset(); + } } diff --git a/src/java/org/apache/solr/analysis/MappingCharFilter.java b/src/java/org/apache/solr/analysis/MappingCharFilter.java index bf8e54f3e81..e5114c391c5 100644 --- a/src/java/org/apache/solr/analysis/MappingCharFilter.java +++ b/src/java/org/apache/solr/analysis/MappingCharFilter.java @@ -120,16 +120,4 @@ public class MappingCharFilter extends BaseCharFilter { } return l == 0 ? -1 : l; } - - public boolean markSupported(){ - return false; - } - - public void mark( int readAheadLimit ) throws IOException { - throw new IOException( "mark/reset not supported" ); - } - - public void reset() throws IOException { - throw new IOException( "mark/reset not supported" ); - } } diff --git a/src/test/org/apache/solr/analysis/TestMappingCharFilter.java b/src/test/org/apache/solr/analysis/TestMappingCharFilter.java index 3374348bad8..008261be300 100644 --- a/src/test/org/apache/solr/analysis/TestMappingCharFilter.java +++ b/src/test/org/apache/solr/analysis/TestMappingCharFilter.java @@ -42,6 +42,22 @@ public class TestMappingCharFilter extends BaseTokenTestCase { normMap.add( "empty", "" ); } + public void testReaderReset() throws Exception { + CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "x" ) ) ); + char[] buf = new char[10]; + int len = cs.read(buf, 0, 10); + assertEquals( 1, len ); + assertEquals( 'x', buf[0]) ; + len = cs.read(buf, 0, 10); + assertEquals( -1, len ); + + // rewind + cs.reset(); + len = cs.read(buf, 0, 10); + assertEquals( 1, len ); + assertEquals( 'x', buf[0]) ; + } + public void testNothingChange() throws Exception { CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "x" ) ) ); TokenStream ts = new CharStreamAwareWhitespaceTokenizer( cs ); diff --git a/src/webapp/web/admin/analysis.jsp b/src/webapp/web/admin/analysis.jsp index 3a5f01b2f9b..441445ad6dc 100644 --- a/src/webapp/web/admin/analysis.jsp +++ b/src/webapp/web/admin/analysis.jsp @@ -19,6 +19,9 @@ org.apache.lucene.analysis.Token, org.apache.lucene.analysis.TokenStream, org.apache.lucene.index.Payload, + org.apache.solr.analysis.CharReader, + org.apache.solr.analysis.CharStream, + org.apache.solr.analysis.CharFilterFactory, org.apache.solr.analysis.TokenFilterFactory, org.apache.solr.analysis.TokenizerChain, org.apache.solr.analysis.TokenizerFactory, @@ -171,19 +174,32 @@ <%! private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set match) throws Exception { - Reader reader = new StringReader(val); + CharStream reader = CharReader.get(new StringReader(val)); FieldType ft = field.getType(); Analyzer analyzer = queryAnalyser ? ft.getQueryAnalyzer() : ft.getAnalyzer(); if (analyzer instanceof TokenizerChain) { TokenizerChain tchain = (TokenizerChain)analyzer; + CharFilterFactory[] cfiltfacs = tchain.getCharFilterFactories(); TokenizerFactory tfac = tchain.getTokenizerFactory(); TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories(); - TokenStream tstream = tfac.create(tchain.charStream(reader)); + if( cfiltfacs != null ){ + for(CharFilterFactory cfiltfac : cfiltfacs ){ + reader = cfiltfac.create(reader); + if(verbose){ + writeHeader(out, cfiltfac.getClass(), cfiltfac.getArgs()); + writeCharStream(out, reader); + } + } + } + + // StringReader should support reset() + reader.reset(); + TokenStream tstream = tfac.create(reader); List tokens = getTokens(tstream); - tstream = tfac.create(tchain.charStream(reader)); + tstream = tfac.create(reader); if (verbose) { writeHeader(out, tfac.getClass(), tfac.getArgs()); } @@ -453,4 +469,30 @@ out.println(""); } + static void writeCharStream(JspWriter out, CharStream input) throws IOException { + out.println(""); + out.println(""); + + out.print(""); + + // StringReader should support reset() + input.reset(); + final int BUFFER_SIZE = 1024; + char[] buf = new char[BUFFER_SIZE]; + int len = 0; + StringBuilder sb = new StringBuilder(); + do { + len = input.read( buf, 0, BUFFER_SIZE ); + sb.append(buf, 0, len); + } while( len == BUFFER_SIZE ); + out.print(""); + + out.println(""); + out.println("
"); + XML.escapeCharData("text",out); + out.println(""); + XML.escapeCharData(sb.toString(),out); + out.println("
"); + } + %>