LUCENE-5107: Properties files by Lucene are now written in UTF-8 encoding, Unicode is no longer escaped. Reading of legacy properties files with \u escapes is still possible

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1502615 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2013-07-12 17:10:22 +00:00
parent bc645837a2
commit 40968cf653
18 changed files with 81 additions and 54 deletions

View File

@ -347,6 +347,10 @@ Changes in runtime behavior
to decide if a CFS must be written, instead IndexWriterConfig now has a
property to enable / disable CFS for newly created segments. (Simon Willnauer)
* LUCENE-5107: Properties files by Lucene are now written in UTF-8 encoding,
Unicode is no longer escaped. Reading of legacy properties files with
\u escapes is still possible. (Uwe Schindler, Robert Muir)
======================= Lucene 4.3.1 =======================
Bug Fixes

View File

@ -20,8 +20,11 @@ package org.apache.lucene.analysis.cn.smart;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Properties;
import org.apache.lucene.util.IOUtils;
/**
* Manages analysis data configuration for SmartChineseAnalyzer
* <p>
@ -77,13 +80,13 @@ public class AnalyzerProfile {
Properties prop = new Properties();
try {
FileInputStream input = new FileInputStream(propFile);
prop.load(input);
prop.load(new InputStreamReader(input, IOUtils.CHARSET_UTF_8));
String dir = prop.getProperty("analysis.data.dir", "");
input.close();
return dir;
} catch (IOException e) {
return "";
}
return "";
}
}

View File

@ -18,9 +18,9 @@ package org.apache.lucene.benchmark.byTask.utils;
*/
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@ -80,8 +80,7 @@ public class Config {
}
// read props from string
this.props = new Properties();
// props.load always assumes iso8859-1...
props.load(new ByteArrayInputStream(sb.toString().getBytes("ISO-8859-1")));
props.load(new StringReader(sb.toString()));
// make sure work dir is set properly
if (props.get("work.dir") == null) {

View File

@ -22,9 +22,10 @@ java.util.concurrent.Executors#newScheduledThreadPool(int)
java.util.concurrent.Executors#defaultThreadFactory()
java.util.concurrent.Executors#privilegedThreadFactory()
@defaultMessage Properties files should be read/written with InputStream/OutputStream for maximum compatibility, as it uses the official "properties file format" with unicode escapes and properly defined encoding
java.util.Properties#load(java.io.Reader)
java.util.Properties#store(java.io.Writer,java.lang.String)
@defaultMessage Properties files should be read/written with Reader/Writer, using UTF-8 charset. This allows reading older files with unicode escapes, too.
java.util.Properties#load(java.io.InputStream)
java.util.Properties#save(java.io.OutputStream,java.lang.String)
java.util.Properties#store(java.io.OutputStream,java.lang.String)
java.lang.Character#codePointBefore(char[],int) @ Implicit start offset is error-prone when the char[] is a buffer and the first chars are random chars
java.lang.Character#codePointAt(char[],int) @ Implicit end offset is error-prone when the char[] is a buffer and the last chars are random chars

View File

@ -376,6 +376,10 @@ Other Changes
* SOLR-4948, SOLR-5009: Tidied up CoreContainer construction logic.
(Alan Woodward, Uwe Schindler, Steve Rowe)
* LUCENE-5107: Properties files by Solr are now written in UTF-8 encoding,
Unicode is no longer escaped. Reading of legacy properties files with
\u escapes is still possible. (Uwe Schindler, Robert Muir)
================== 4.3.1 ==================
Versions of Major Components

View File

@ -22,9 +22,10 @@ import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVE
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
@ -33,6 +34,7 @@ import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import org.apache.lucene.util.IOUtils;
import org.apache.solr.core.SolrCore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -179,7 +181,7 @@ public class SimplePropertiesWriter extends DIHProperties {
@Override
public void persist(Map<String, Object> propObjs) {
OutputStream propOutput = null;
Writer propOutput = null;
Properties existingProps = mapToProperties(readIndexerProperties());
Properties newProps = mapToProperties(propObjs);
try {
@ -189,18 +191,14 @@ public class SimplePropertiesWriter extends DIHProperties {
filePath += File.separator;
}
filePath += filename;
propOutput = new FileOutputStream(filePath);
propOutput = new OutputStreamWriter(new FileOutputStream(filePath), IOUtils.CHARSET_UTF_8);
existingProps.store(propOutput, null);
log.info("Wrote last indexed time to " + filename);
} catch (Exception e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Unable to persist Index Start Time", e);
} finally {
try {
if (propOutput != null) propOutput.close();
} catch (IOException e) {
propOutput = null;
}
IOUtils.closeWhileHandlingException(propOutput);
}
}
@ -215,16 +213,12 @@ public class SimplePropertiesWriter extends DIHProperties {
}
filePath += filename;
propInput = new FileInputStream(filePath);
props.load(propInput);
props.load(new InputStreamReader(propInput, IOUtils.CHARSET_UTF_8));
log.info("Read " + filename);
} catch (Exception e) {
log.warn("Unable to read: " + filename);
} finally {
try {
if (propInput != null) propInput.close();
} catch (IOException e) {
propInput = null;
}
IOUtils.closeWhileHandlingException(propInput);
}
return propertiesToMap(props);
}

View File

@ -16,11 +16,12 @@
*/
package org.apache.solr.handler.dataimport;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.Map;
import java.util.Properties;
import org.apache.lucene.util.IOUtils;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.zookeeper.KeeperException.NodeExistsException;
import org.slf4j.Logger;
@ -63,10 +64,10 @@ public class ZKPropertiesWriter extends SimplePropertiesWriter {
public void persist(Map<String, Object> propObjs) {
Properties existing = mapToProperties(readIndexerProperties());
existing.putAll(mapToProperties(propObjs));
ByteArrayOutputStream output = new ByteArrayOutputStream();
StringWriter output = new StringWriter();
try {
existing.store(output, "");
byte[] bytes = output.toByteArray();
existing.store(output, null);
byte[] bytes = output.toString().getBytes(IOUtils.CHARSET_UTF_8);
if (!zkClient.exists(path, false)) {
try {
zkClient.makePath(path, false);
@ -89,8 +90,7 @@ public class ZKPropertiesWriter extends SimplePropertiesWriter {
try {
byte[] data = zkClient.getData(path, null, null, false);
if (data != null) {
ByteArrayInputStream input = new ByteArrayInputStream(data);
props.load(input);
props.load(new StringReader(new String(data, "UTF-8")));
}
} catch (Throwable e) {
log.warn(

View File

@ -17,6 +17,7 @@
package org.apache.solr.response;
import org.apache.lucene.util.IOUtils;
import org.apache.solr.client.solrj.SolrResponse;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.SolrResponseBase;
@ -150,7 +151,7 @@ public class VelocityResponseWriter implements QueryResponseWriter {
try {
is = resourceLoader.getResourceStream(propFile);
Properties props = new Properties();
props.load(is);
props.load(new InputStreamReader(is, IOUtils.CHARSET_UTF_8));
engine.init(props);
}
finally {

View File

@ -179,7 +179,7 @@ class SolrZkServerProps extends QuorumPeerConfig {
Properties cfg = new Properties();
FileInputStream in = new FileInputStream(configFile);
try {
cfg.load(in);
cfg.load(new InputStreamReader(in, IOUtils.CHARSET_UTF_8));
} finally {
in.close();
}

View File

@ -22,11 +22,13 @@ import com.google.common.collect.ImmutableMap;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.cloud.CloudDescriptor;
import org.apache.solr.common.SolrException;
import org.apache.solr.util.IOUtils;
import org.apache.solr.util.PropertiesUtil;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Locale;
import java.util.Properties;
@ -183,15 +185,18 @@ public class CoreDescriptor {
String filename = coreProperties.getProperty(CORE_PROPERTIES, DEFAULT_EXTERNAL_PROPERTIES_FILE);
File propertiesFile = resolvePaths(filename);
if (propertiesFile.exists()) {
FileInputStream in = null;
try {
in = new FileInputStream(propertiesFile);
Properties externalProps = new Properties();
externalProps.load(new FileInputStream(propertiesFile));
externalProps.load(new InputStreamReader(in, "UTF-8"));
coreProperties.putAll(externalProps);
}
catch (IOException e) {
} catch (IOException e) {
String message = String.format(Locale.ROOT, "Could not load properties from %s: %s:",
propertiesFile.getAbsoluteFile(), e.toString());
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, message);
} finally {
IOUtils.closeQuietly(in);
}
}
}

View File

@ -17,6 +17,7 @@ package org.apache.solr.core;
* limitations under the License.
*/
import com.google.common.base.Charsets;
import com.google.common.collect.Lists;
import org.apache.solr.common.SolrException;
import org.apache.solr.util.IOUtils;
@ -27,7 +28,9 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Date;
import java.util.List;
import java.util.Properties;
@ -73,10 +76,10 @@ public class CorePropertiesLocator implements CoresLocator {
private void writePropertiesFile(CoreDescriptor cd, File propfile) {
Properties p = buildCoreProperties(cd);
OutputStream os = null;
Writer os = null;
try {
os = new FileOutputStream(propfile);
p.store(os, "Written by CorePropertiesLocator on " + new Date());
os = new OutputStreamWriter(new FileOutputStream(propfile), Charsets.UTF_8);
p.store(os, "Written by CorePropertiesLocator");
}
catch (IOException e) {
logger.error("Couldn't persist core properties to {}: {}", propfile.getAbsolutePath(), e);
@ -134,7 +137,7 @@ public class CorePropertiesLocator implements CoresLocator {
File instanceDir = propertiesFile.getParentFile();
Properties coreProperties = new Properties();
fis = new FileInputStream(propertiesFile);
coreProperties.load(fis);
coreProperties.load(new InputStreamReader(fis, Charsets.UTF_8));
String name = createName(coreProperties, instanceDir);
return new CoreDescriptor(cc, name, instanceDir.getAbsolutePath(), coreProperties);
}

View File

@ -96,6 +96,7 @@ import javax.xml.parsers.ParserConfigurationException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Writer;
import java.lang.reflect.Constructor;
import java.net.URL;
@ -252,7 +253,7 @@ public final class SolrCore implements SolrInfoMBean {
final InputStream is = new PropertiesInputStream(input);
try {
p.load(is);
p.load(new InputStreamReader(is, "UTF-8"));
String s = p.getProperty("index");
if (s != null && s.trim().length() > 0) {

View File

@ -20,6 +20,7 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Writer;
import java.nio.ByteBuffer;
@ -48,6 +49,7 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import static org.apache.lucene.util.IOUtils.CHARSET_UTF_8;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams;
@ -813,7 +815,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
try {
final InputStream is = new PropertiesInputStream(input);
Properties props = new Properties();
props.load(is);
props.load(new InputStreamReader(is, CHARSET_UTF_8));
return props;
} finally {
input.close();

View File

@ -40,7 +40,9 @@ import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.text.SimpleDateFormat;
@ -73,6 +75,7 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import static org.apache.lucene.util.IOUtils.CHARSET_UTF_8;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpClientUtil;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
@ -578,7 +581,7 @@ public class SnapPuller {
}
final IndexOutput out = dir.createOutput(REPLICATION_PROPERTIES, DirectoryFactory.IOCONTEXT_NO_CACHE);
OutputStream outFile = new PropertiesOutputStream(out);
Writer outFile = new OutputStreamWriter(new PropertiesOutputStream(out), CHARSET_UTF_8);
try {
props.store(outFile, "Replication details");
dir.sync(Collections.singleton(REPLICATION_PROPERTIES));
@ -890,7 +893,7 @@ public class SnapPuller {
final InputStream is = new PropertiesInputStream(input);
try {
p.load(is);
p.load(new InputStreamReader(is, CHARSET_UTF_8));
} catch (Exception e) {
LOG.error("Unable to load " + SnapPuller.INDEX_PROPERTIES, e);
} finally {
@ -904,9 +907,9 @@ public class SnapPuller {
}
final IndexOutput out = dir.createOutput(SnapPuller.INDEX_PROPERTIES, DirectoryFactory.IOCONTEXT_NO_CACHE);
p.put("index", tmpIdxDirName);
OutputStream os = null;
Writer os = null;
try {
os = new PropertiesOutputStream(out);
os = new OutputStreamWriter(new PropertiesOutputStream(out), CHARSET_UTF_8);
p.store(os, SnapPuller.INDEX_PROPERTIES);
dir.sync(Collections.singleton(INDEX_PROPERTIES));
} catch (Exception e) {

View File

@ -124,7 +124,7 @@ public class TestSolrCoreProperties extends LuceneTestCase {
Properties p = new Properties();
p.setProperty("foo.foo1", "f1");
p.setProperty("foo.foo2", "f2");
FileOutputStream fos = new FileOutputStream(confDir + File.separator + "solrcore.properties");
Writer fos = new OutputStreamWriter(new FileOutputStream(confDir + File.separator + "solrcore.properties"), IOUtils.CHARSET_UTF_8);
p.store(fos, null);
IOUtils.close(fos);
}

View File

@ -19,6 +19,8 @@ package org.apache.solr.core;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Properties;
import javax.xml.parsers.ParserConfigurationException;
@ -30,6 +32,7 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
@ -97,15 +100,15 @@ public class TestArbitraryIndexDir extends AbstractSolrTestCase{
File newDir = new File(h.getCore().getDataDir() + "index_temp");
newDir.mkdirs();
p.put("index", newDir.getName());
FileOutputStream os = null;
Writer os = null;
try {
os = new FileOutputStream(idxprops);
os = new OutputStreamWriter(new FileOutputStream(idxprops), IOUtils.CHARSET_UTF_8);
p.store(os, "index properties");
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Unable to write " + SnapPuller.INDEX_PROPERTIES, e);
} finally {
if (os != null) os.close();
IOUtils.closeWhileHandlingException(os);
}
//add a doc in the new index dir

View File

@ -27,6 +27,8 @@ import org.junit.Test;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Properties;
public class TestCoreDiscovery extends SolrTestCaseJ4 {
@ -78,7 +80,7 @@ public class TestCoreDiscovery extends SolrTestCaseJ4 {
private void addCoreWithProps(Properties stockProps, File propFile) throws Exception {
if (!propFile.getParentFile().exists()) propFile.getParentFile().mkdirs();
FileOutputStream out = new FileOutputStream(propFile);
Writer out = new OutputStreamWriter(new FileOutputStream(propFile), IOUtils.CHARSET_UTF_8);
try {
stockProps.store(out, null);
} finally {

View File

@ -18,6 +18,7 @@
package org.apache.solr.handler.admin;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.util.IOUtils;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CoreAdminParams;
@ -30,6 +31,7 @@ import org.junit.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Properties;
public class CoreAdminCreateDiscoverTest extends SolrTestCaseJ4 {
@ -112,7 +114,7 @@ public class CoreAdminCreateDiscoverTest extends SolrTestCaseJ4 {
File propFile = new File(solrHomeDirectory, coreSysProps + "/" + CorePropertiesLocator.PROPERTIES_FILENAME);
FileInputStream is = new FileInputStream(propFile);
try {
props.load(is);
props.load(new InputStreamReader(is, IOUtils.CHARSET_UTF_8));
} finally {
org.apache.commons.io.IOUtils.closeQuietly(is);
}
@ -217,7 +219,7 @@ public class CoreAdminCreateDiscoverTest extends SolrTestCaseJ4 {
File propFile = new File(solrHomeDirectory, coreNormal + "/" + CorePropertiesLocator.PROPERTIES_FILENAME);
FileInputStream is = new FileInputStream(propFile);
try {
props.load(is);
props.load(new InputStreamReader(is, IOUtils.CHARSET_UTF_8));
} finally {
org.apache.commons.io.IOUtils.closeQuietly(is);
}