<%@ page contentType="text/html; charset=utf-8" pageEncoding="UTF-8"%> <%@ page import="org.apache.lucene.analysis.Analyzer, org.apache.lucene.analysis.Token, org.apache.lucene.analysis.TokenStream, org.apache.solr.analysis.TokenFilterFactory, org.apache.solr.analysis.TokenizerChain, org.apache.solr.analysis.TokenizerFactory, org.apache.solr.schema.FieldType, org.apache.solr.schema.SchemaField, org.apache.solr.util.XML, javax.servlet.jsp.JspWriter,java.io.IOException "%> <%@ page import="java.io.Reader"%> <%@ page import="java.io.StringReader"%> <%@ page import="java.util.*"%> <%@include file="header.jsp" %> <% String name = request.getParameter("name"); if (name==null || name.length()==0) name=""; String val = request.getParameter("val"); if (val==null || val.length()==0) val=""; String qval = request.getParameter("qval"); if (qval==null || qval.length()==0) qval=""; String verboseS = request.getParameter("verbose"); boolean verbose = verboseS!=null && verboseS.equalsIgnoreCase("on"); String qverboseS = request.getParameter("qverbose"); boolean qverbose = qverboseS!=null && qverboseS.equalsIgnoreCase("on"); String highlightS = request.getParameter("highlight"); boolean highlight = highlightS!=null && highlightS.equalsIgnoreCase("on"); %>

Field Analysis

Field name
Field value (Index)
verbose output >
highlight matches >
Field value (Query)
verbose output >
<% SchemaField field=null; if (name!="") { try { field = schema.getField(name); } catch (Exception e) { out.println("Unknown Field " + name + ""); } } if (field!=null) { HashSet matches = null; if (qval!="" && highlight) { Reader reader = new StringReader(qval); Analyzer analyzer = field.getType().getQueryAnalyzer(); TokenStream tstream = analyzer.tokenStream(field.getName(),reader); List tokens = getTokens(tstream); matches = new HashSet(); for (Token t : tokens) { matches.add( new Tok(t,0)); } } if (val!="") { out.println("

Index Analyzer

"); doAnalyzer(out, field, val, false, verbose,matches); } if (qval!="") { out.println("

Query Analyzer

"); doAnalyzer(out, field, qval, true, qverbose,null); } } %> <%! private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set match) throws Exception { Reader reader = new StringReader(val); FieldType ft = field.getType(); Analyzer analyzer = queryAnalyser ? ft.getQueryAnalyzer() : ft.getAnalyzer(); if (analyzer instanceof TokenizerChain) { TokenizerChain tchain = (TokenizerChain)analyzer; TokenizerFactory tfac = tchain.getTokenizerFactory(); TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories(); TokenStream tstream = tfac.create(reader); List tokens = getTokens(tstream); tstream = tfac.create(reader); if (verbose) { writeHeader(out, tfac.getClass(), tfac.getArgs()); } writeTokens(out, tokens, ft, verbose, match); for (TokenFilterFactory filtfac : filtfacs) { if (verbose) { writeHeader(out, filtfac.getClass(), filtfac.getArgs()); } final Iterator iter = tokens.iterator(); tstream = filtfac.create( new TokenStream() { public Token next() throws IOException { return iter.hasNext() ? iter.next() : null; } } ); tokens = getTokens(tstream); writeTokens(out, tokens, ft, verbose, match); } } else { TokenStream tstream = analyzer.tokenStream(field.getName(),reader); List tokens = getTokens(tstream); if (verbose) { writeHeader(out, analyzer.getClass(), new HashMap()); } writeTokens(out, tokens, ft, verbose, match); } } static List getTokens(TokenStream tstream) throws IOException { List tokens = new ArrayList(); while (true) { Token t = tstream.next(); if (t==null) break; tokens.add(t); } return tokens; } private static class Tok { Token token; int pos; Tok(Token token, int pos) { this.token=token; this.pos=pos; } public boolean equals(Object o) { return ((Tok)o).token.termText().equals(token.termText()); } public int hashCode() { return token.termText().hashCode(); } public String toString() { return token.termText(); } } private static interface ToStr { public String toStr(Object o); } private static void printRow(JspWriter out, String header, List[] arrLst, ToStr converter, boolean multival, boolean verbose, Set match) throws IOException { // find the maximum number of terms for any position int maxSz=1; if (multival) { for (List lst : arrLst) { maxSz = Math.max(lst.size(), maxSz); } } for (int idx=0; idx"); if (idx==0 && verbose) { if (header != null) { out.print(""); XML.escapeCharData(header,out); out.println(""); } } for (List lst : arrLst) { if (lst.size() <= idx) continue; if (match!=null && match.contains(lst.get(idx))) { out.print(" 1) { out.print("rowspan=\""+maxSz+'"'); } out.print('>'); XML.escapeCharData(converter.toStr(lst.get(idx)), out); out.print(""); } out.println(""); } } static void writeHeader(JspWriter out, Class clazz, Map args) throws IOException { out.print("

"); out.print(clazz.getName()); XML.escapeCharData(" "+args,out); out.println("

"); } // readable, raw, pos, type, start/end static void writeTokens(JspWriter out, List tokens, final FieldType ft, boolean verbose, Set match) throws IOException { // Use a map to tell what tokens are in what positions // because some tokenizers/filters may do funky stuff with // very large increments, or negative increments. HashMap> map = new HashMap>(); boolean needRaw=false; int pos=0; for (Token t : tokens) { if (!t.termText().equals(ft.indexedToReadable(t.termText()))) { needRaw=true; } pos += t.getPositionIncrement(); List lst = map.get(pos); if (lst==null) { lst = new ArrayList(1); map.put(pos,lst); } Tok tok = new Tok(t,pos); lst.add(tok); } List[] arr = (List[])map.values().toArray(new ArrayList[map.size()]); /*** // This generics version works fine with Resin, but fails with Tomcat 5.5 // with java.lang.AbstractMethodError // at java.util.Arrays.mergeSort(Arrays.java:1284) // at java.util.Arrays.sort(Arrays.java:1223) Arrays.sort(arr, new Comparator>() { public int compare(List toks, List toks1) { return toks.get(0).pos - toks1.get(0).pos; } } ***/ Arrays.sort(arr, new Comparator() { public int compare(Object a, Object b) { List toks = (List)a; List toks1 = (List)b; return toks.get(0).pos - toks1.get(0).pos; } } ); out.println(""); if (verbose) { printRow(out,"term position", arr, new ToStr() { public String toStr(Object o) { return Integer.toString(((Tok)o).pos); } } ,false ,verbose ,null); } printRow(out,"term text", arr, new ToStr() { public String toStr(Object o) { return ft.indexedToReadable( ((Tok)o).token.termText() ); } } ,true ,verbose ,match ); if (needRaw) { printRow(out,"raw text", arr, new ToStr() { public String toStr(Object o) { // todo: output in hex or something? // check if it's all ascii or not? return ((Tok)o).token.termText(); } } ,true ,verbose ,match ); } if (verbose) { printRow(out,"term type", arr, new ToStr() { public String toStr(Object o) { return ((Tok)o).token.type(); } } ,true ,verbose, null ); } if (verbose) { printRow(out,"source start,end", arr, new ToStr() { public String toStr(Object o) { Token t = ((Tok)o).token; return Integer.toString(t.startOffset()) + ',' + t.endOffset() ; } } ,true ,verbose ,null ); } out.println("
"); } %>