BAEL-5755: Convert an XML file to CSV file

This commit is contained in:
Vladyslav Chernov 2023-09-17 21:57:35 -07:00
parent fa06d77df8
commit 0bb7a8db27
4 changed files with 315 additions and 0 deletions

View File

@ -0,0 +1,113 @@
package com.baeldung.xml.xml2csv;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamReader;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
public class Xml2CsvExample {
private static final String BASE_PATH = "xml/src/main/resources/xml2csv/";
private static final String STYLE_XSL = BASE_PATH + "style.xsl";
private static final String DATA_XML = BASE_PATH + "data.xml";
private static final String OUTPUT_CSV_XSTL = BASE_PATH + "output_xstl.csv";
private static final String OUTPUT_CSV_STAX = BASE_PATH + "output_stax.csv";
public static void main(String[] args) {
try {
convertXml2CsvXslt(STYLE_XSL, DATA_XML, OUTPUT_CSV_XSTL);
convertXml2CsvStax(DATA_XML, OUTPUT_CSV_STAX);
} catch (IOException | TransformerException e) {
e.printStackTrace();
}
}
protected static void convertXml2CsvXslt(String xslPath, String xmlPath, String csvPath) throws IOException, TransformerException {
StreamSource styleSource = new StreamSource(new File(xslPath));
Transformer transformer = TransformerFactory.newInstance()
.newTransformer(styleSource);
Source source = new StreamSource(new File(xmlPath));
Result outputTarget = new StreamResult(new File(csvPath));
transformer.transform(source, outputTarget);
}
protected static void convertXml2CsvStax(String xmlFilePath, String csvFilePath) throws IOException, TransformerException {
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
try (InputStream in = Files.newInputStream(Paths.get(xmlFilePath)); BufferedWriter writer = new BufferedWriter(new FileWriter(csvFilePath))) {
// Write header to CSV
writer.write("bookstore_id,book_id,category,title,author_id,author_name,price\n");
XMLStreamReader reader = inputFactory.createXMLStreamReader(in);
String currentElement;
StringBuilder csvRow = new StringBuilder();
StringBuilder bookstoreInfo = new StringBuilder();
while (reader.hasNext()) {
int eventType = reader.next();
switch (eventType) {
case XMLStreamConstants.START_ELEMENT:
currentElement = reader.getLocalName();
if ("Bookstore".equals(currentElement)) {
bookstoreInfo.setLength(0); // clear previous bookstore info
bookstoreInfo.append(reader.getAttributeValue(null, "id"))
.append(",");
}
if ("Book".equals(currentElement)) {
csvRow.append(bookstoreInfo)
.append(reader.getAttributeValue(null, "id"))
.append(",")
.append(reader.getAttributeValue(null, "category"))
.append(",");
}
if ("Author".equals(currentElement)) {
csvRow.append(reader.getAttributeValue(null, "id"))
.append(",");
}
break;
case XMLStreamConstants.CHARACTERS:
if (!reader.isWhiteSpace()) {
csvRow.append(reader.getText()
.trim())
.append(",");
}
break;
case XMLStreamConstants.END_ELEMENT:
if ("Book".equals(reader.getLocalName())) {
// remove the last comma and add a newline
csvRow.setLength(csvRow.length() - 1);
csvRow.append("\n");
writer.write(csvRow.toString());
// Reset the StringBuilder for the next row
csvRow.setLength(0);
}
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}

View File

@ -0,0 +1,26 @@
<?xml version="1.0"?>
<Bookstores>
<Bookstore id="S001">
<Books>
<Book id="B001" category="Fiction">
<Title>Death and the Penguin</Title>
<Author id="A001">Andrey Kurkov</Author>
<Price>10.99</Price>
</Book>
<Book id="B002" category="Poetry">
<Title>Kobzar</Title>
<Author id="A002">Taras Shevchenko</Author>
<Price>8.50</Price>
</Book>
</Books>
</Bookstore>
<Bookstore id="S002">
<Books>
<Book id="B003" category="Novel">
<Title>Voroshilovgrad</Title>
<Author id="A003">Serhiy Zhadan</Author>
<Price>12.99</Price>
</Book>
</Books>
</Bookstore>
</Bookstores>

View File

@ -0,0 +1,21 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" omit-xml-declaration="yes" indent="no"/>
<xsl:template match="/">
<xsl:text>bookstore_id,book_id,category,title,author_id,author_name,price</xsl:text>
<xsl:text>&#xA;</xsl:text>
<xsl:for-each select="//Bookstore">
<xsl:variable name="bookstore_id" select="@id"/>
<xsl:for-each select="./Books/Book">
<xsl:variable name="book_id" select="@id"/>
<xsl:variable name="category" select="@category"/>
<xsl:variable name="title" select="Title"/>
<xsl:variable name="author_id" select="Author/@id"/>
<xsl:variable name="author_name" select="Author"/>
<xsl:variable name="price" select="Price"/>
<xsl:value-of select="concat($bookstore_id, ',', $book_id, ',', $category, ',', $title, ',', $author_id, ',', $author_name, ',', $price)"/>
<xsl:text>&#xA;</xsl:text>
</xsl:for-each>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,155 @@
package com.baeldung.xml.xml2csv;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import javax.xml.transform.TransformerException;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Test;
public class Xml2CsvExampleUnitTest {
private static final String BASE_PATH = "src/main/resources/xml2csv/";
private static final String STYLE_XSL = BASE_PATH + "style.xsl";
private static final String DATA_XML = BASE_PATH + "data.xml";
private static final String TEMP_OUTPUT_CSV = BASE_PATH + "tempOutput.xml";
@AfterEach
public void teardown() {
new File(TEMP_OUTPUT_CSV).delete();
}
@Test
public void whenConvertXml2CsvXslt_thenCsvFileIsCreated() throws IOException, TransformerException {
Xml2CsvExample.convertXml2CsvXslt(STYLE_XSL, DATA_XML, TEMP_OUTPUT_CSV);
File csvFile = new File(TEMP_OUTPUT_CSV);
assertTrue(csvFile.exists());
}
@Test
public void whenConvertXml2CsvStax_thenCsvFileIsCreated() throws IOException, TransformerException {
Xml2CsvExample.convertXml2CsvStax(DATA_XML, TEMP_OUTPUT_CSV);
File csvFile = new File(TEMP_OUTPUT_CSV);
assertTrue(csvFile.exists());
}
@Test
public void whenConvertXml2CsvXslt_thenCsvFileIsNotEmpty() throws IOException, TransformerException {
Xml2CsvExample.convertXml2CsvXslt(STYLE_XSL, DATA_XML, TEMP_OUTPUT_CSV);
File csvFile = new File(TEMP_OUTPUT_CSV);
BufferedReader reader = new BufferedReader(new FileReader(csvFile));
String firstLine = reader.readLine();
assertNotNull(firstLine);
assertFalse(firstLine.isEmpty());
reader.close();
}
@Test
public void whenConvertXml2CsvStax_thenCsvFileIsNotEmpty() throws IOException, TransformerException {
Xml2CsvExample.convertXml2CsvStax(DATA_XML, TEMP_OUTPUT_CSV);
File csvFile = new File(TEMP_OUTPUT_CSV);
BufferedReader reader = new BufferedReader(new FileReader(csvFile));
String firstLine = reader.readLine();
assertNotNull(firstLine);
assertFalse(firstLine.isEmpty());
reader.close();
}
@Test
public void whenConvertXml2CsvXsltWithWrongXSL_thenThrowsException() {
String xslWrongPath = BASE_PATH + "wrongFile.xsl";
assertThrows(TransformerException.class, () -> Xml2CsvExample.convertXml2CsvXslt(xslWrongPath, DATA_XML, TEMP_OUTPUT_CSV));
}
@Test
public void whenConvertXml2CsvXslt_thenCsvMatchesPattern() throws IOException, TransformerException {
String headerPattern = "^bookstore_id,book_id,category,title,author_id,author_name,price$";
String dataPattern = "^[A-Z0-9]+,[A-Z0-9]+,[a-zA-Z]+,[a-zA-Z0-9\\s]+,[A-Z0-9]+,[a-zA-Z\\s]+,\\d+(\\.\\d{2})?$";
Xml2CsvExample.convertXml2CsvXslt(STYLE_XSL, DATA_XML, TEMP_OUTPUT_CSV);
File csvFile = new File(TEMP_OUTPUT_CSV);
try(BufferedReader reader = new BufferedReader(new FileReader(csvFile))) {
String line;
boolean isFirstLine = true;
while ((line = reader.readLine()) != null) {
if (isFirstLine) {
assertTrue(line.matches(headerPattern), "Header does not match pattern");
isFirstLine = false;
} else {
assertTrue(line.matches(dataPattern), "Data line does not match pattern");
}
}
}
}
@Test
public void whenConvertXml2Stax_thenCsvMatchesPattern() throws IOException, TransformerException {
String headerPattern = "^bookstore_id,book_id,category,title,author_id,author_name,price$";
String dataPattern = "^[A-Z0-9]+,[A-Z0-9]+,[a-zA-Z]+,[a-zA-Z0-9\\s]+,[A-Z0-9]+,[a-zA-Z\\s]+,\\d+(\\.\\d{2})?$";
Xml2CsvExample.convertXml2CsvStax(DATA_XML, TEMP_OUTPUT_CSV);
File csvFile = new File(TEMP_OUTPUT_CSV);
try(BufferedReader reader = new BufferedReader(new FileReader(csvFile))) {
String line;
boolean isFirstLine = true;
while ((line = reader.readLine()) != null) {
if (isFirstLine) {
assertTrue(line.matches(headerPattern), "Header does not match pattern");
isFirstLine = false;
} else {
assertTrue(line.matches(dataPattern), "Data line does not match pattern");
}
}
}
}
@Test
public void whenConcurrentConversion_thenNoErrors() throws InterruptedException {
int numThreads = 10;
ExecutorService service = Executors.newFixedThreadPool(numThreads);
CountDownLatch latch = new CountDownLatch(numThreads);
for (int i = 0; i < numThreads; i++) {
final int threadId = i;
service.execute(() -> {
String threadSpecificOutputCsv = BASE_PATH + "tempOutput" + threadId + ".csv";
try {
Xml2CsvExample.convertXml2CsvXslt(STYLE_XSL, DATA_XML, threadSpecificOutputCsv);
assertTrue(Files.exists(Paths.get(threadSpecificOutputCsv)), "File should exist");
} catch (IOException | TransformerException e) {
fail("Exception should not be thrown: " + e.getMessage());
} finally {
new File(threadSpecificOutputCsv).delete();
latch.countDown();
}
});
}
latch.await();
}
}