From 720278edd1fb6217f55e3e1b881bfe7dc1469faa Mon Sep 17 00:00:00 2001 From: magese Date: Wed, 26 Dec 2018 14:03:37 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E9=85=8D=E7=BD=AE=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E5=90=8D=E7=A7=B0=E8=AF=BB=E5=8F=96=EF=BC=8C=E6=9B=B4?= =?UTF-8?q?=E6=8D=A2=E4=BD=BF=E7=94=A8=E7=9A=84IO=E6=B5=81=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../analyzer/lucene/IKTokenizerFactory.java | 49 +++++++++++++------ 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/wltea/analyzer/lucene/IKTokenizerFactory.java b/src/main/java/org/wltea/analyzer/lucene/IKTokenizerFactory.java index 0c441af..cbf17e6 100644 --- a/src/main/java/org/wltea/analyzer/lucene/IKTokenizerFactory.java +++ b/src/main/java/org/wltea/analyzer/lucene/IKTokenizerFactory.java @@ -21,8 +21,8 @@ * 版权声明 2012,乌龙茶工作室 * provided by Linliangyi and copyright 2012 by Oolong studio * - * 7.5版本 由 Magese (magese@live.cn) 更新 - * release 7.5 update by Magese(magese@live.cn) + * 7.6版本 由 Magese (magese@live.cn) 更新 + * release 7.6 update by Magese(magese@live.cn) * */ package org.wltea.analyzer.lucene; @@ -36,6 +36,11 @@ import org.wltea.analyzer.dic.Dictionary; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; import java.util.*; /** @@ -50,7 +55,9 @@ public class IKTokenizerFactory extends TokenizerFactory implements ResourceLoad public IKTokenizerFactory(Map args) { super(args); String useSmartArg = args.get("useSmart"); + String confArg = args.get("conf"); this.setUseSmart(Boolean.parseBoolean(useSmartArg)); + this.setConf(confArg); } @Override @@ -67,10 +74,10 @@ public class IKTokenizerFactory extends TokenizerFactory implements ResourceLoad */ @Override public void inform(ResourceLoader resourceLoader) throws IOException { - System.out.println(String.format("IKTokenizerFactory "+ this.hashCode() +" inform conf: %s", this.conf)); + System.out.println(String.format("IKTokenizerFactory " + this.hashCode() + " inform conf: %s", getConf())); this.loader = resourceLoader; update(); - if ((this.conf != null) && (!this.conf.trim().isEmpty())) { + if ((getConf() != null) && (!getConf().trim().isEmpty())) { UpdateThread.getInstance().register(this); } } @@ -82,24 +89,27 @@ public class IKTokenizerFactory extends TokenizerFactory implements ResourceLoad */ @Override public void update() throws IOException { + // 默认UTF-8解码 + CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT); + // 获取ik.conf配置文件信息 Properties p = canUpdate(); if (p != null) { // 获取词典表名称集合 List dicPaths = SplitFileNames(p.getProperty("files")); // 获取词典文件的IO流 - List inputStreamList = new ArrayList<>(); + List inputStreamReaderList = new ArrayList<>(); for (String path : dicPaths) { if ((path != null) && (!path.isEmpty())) { - InputStream is = this.loader.openResource(path); - if (is != null) { - inputStreamList.add(is); - } + Reader isr = new InputStreamReader(loader.openResource(path), decoder); + inputStreamReaderList.add(isr); } } // 如果IO流集合不为空则执行加载词典 - if (!inputStreamList.isEmpty()) - Dictionary.reloadDic(inputStreamList); + if (!inputStreamReaderList.isEmpty()) + Dictionary.reloadDic(inputStreamReaderList); } } @@ -108,10 +118,10 @@ public class IKTokenizerFactory extends TokenizerFactory implements ResourceLoad */ private Properties canUpdate() { try { - if (this.conf == null) + if (getConf() == null) return null; Properties p = new Properties(); - InputStream confStream = this.loader.openResource(this.conf); // 获取配置文件流 + InputStream confStream = this.loader.openResource(getConf()); // 获取配置文件流 p.load(confStream); // 读取配置文件 confStream.close(); // 关闭文件流 String lastupdate = p.getProperty("lastupdate", "0"); // 获取最后更新数字 @@ -122,13 +132,13 @@ public class IKTokenizerFactory extends TokenizerFactory implements ResourceLoad String paths = p.getProperty("files"); // 获取词典文件名 if ((paths == null) || (paths.trim().isEmpty())) return null; - System.out.println("loading ik.conf files success."); + System.out.println("loading " + getConf() + " files success."); return p; } this.lastUpdateTime = t; return null; } catch (Exception e) { - System.err.println("parsing ik.conf NullPointerException!!!" + Arrays.toString(e.getStackTrace())); + System.err.println("parsing " + getConf() + " NullPointerException!!!" + Arrays.toString(e.getStackTrace())); } return null; } @@ -148,6 +158,7 @@ public class IKTokenizerFactory extends TokenizerFactory implements ResourceLoad return result; } + /* getter & setter */ private boolean useSmart() { return useSmart; } @@ -155,4 +166,12 @@ public class IKTokenizerFactory extends TokenizerFactory implements ResourceLoad private void setUseSmart(boolean useSmart) { this.useSmart = useSmart; } + + private String getConf() { + return conf; + } + + private void setConf(String conf) { + this.conf = conf; + } } \ No newline at end of file