XSLX2CSV: Do not double-encode if the value is already having quotes and escape double-quotes

Most CSV formats use "" (two quotes) to escape a "-character, we should do this in this
example as well to produce files that can be parsed by other CSV processors correctly.

Also cases where the value is already enclosed in quotes should not lead to additional quotes

Add a simple initial test to module "examples" verify basic functionality of XSLX2CSV
as I often rely on it for converting some very large xlsx-files to csv

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1888418 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2021-04-06 06:11:01 +00:00
parent ddc13a4c48
commit 3aa712c32b
3 changed files with 30 additions and 5 deletions

View File

@ -20,4 +20,7 @@ dependencies {
implementation project(':scratchpad')
implementation "org.apache.logging.log4j:log4j-core:${log4jVersion}"
testImplementation project(path: ':ooxml', configuration: 'tests')
testImplementation project(path: ':main', configuration: 'tests')
}

View File

@ -128,6 +128,12 @@ public class XLSX2CSV {
for (int i=0; i<missedCols; i++) {
output.append(',');
}
// no need to append anything if we do not have a value
if (formattedValue == null) {
return;
}
currentCol = thisCol;
// Number or string?
@ -136,8 +142,14 @@ public class XLSX2CSV {
Double.parseDouble(formattedValue);
output.append(formattedValue);
} catch (Exception e) {
// let's remove quotes if they are already there
if (formattedValue.startsWith("\"") && formattedValue.endsWith("\"")) {
formattedValue = formattedValue.substring(1, formattedValue.length()-1);
}
output.append('"');
output.append(formattedValue);
// encode double-quote with two double-quotes to produce a valid CSV format
output.append(formattedValue.replace("\"", "\"\""));
output.append('"');
}
}

View File

@ -50,7 +50,17 @@ jar {
}
}
test {
// for some reason catching the OOM does not work when run from Gradle
exclude '**/MemoryUsage.class'
}
// Create a separate jar for test-code to depend on it in other projects
// See http://stackoverflow.com/questions/5144325/gradle-test-dependency
task testJar(type: Jar, dependsOn: testClasses) {
baseName = "test-${project.archivesBaseName}"
from sourceSets.test.output
}
configurations {
tests
}
artifacts {
tests testJar
}