Code for parsing HTML table using Jsoup

As suggested, moved from libraries-4 directory to jsoup directory in the
repo.
This commit is contained in:
Omkar A 2024-03-06 19:14:47 +05:30
parent 5af7b8c289
commit 3160b21ea6
3 changed files with 247 additions and 0 deletions

View File

@ -0,0 +1,122 @@
package com.baeldung.jsoup;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class JsoupTableParser {
public Document loadFromString(String html) {
return Jsoup.parse(html);
}
public Document loadFromURL(String url) throws IOException {
Document doc = Jsoup.connect(url)
.get();
return doc;
}
public Document loadFromFile(String filePath) {
Document doc = null;
try {
File input = new File(JsoupTableParser.class.getClassLoader()
.getResource(filePath)
.getFile());
doc = Jsoup.parse(input, "UTF-8", "");
} catch (IOException e) {
e.printStackTrace();
}
return doc;
}
public List<Map<String, String>> parseTable(Document doc, int tableOrder) {
Element table = doc.select("table")
.get(tableOrder);
Element tbody = table.select("tbody")
.get(0);
Elements dataRows = tbody.select("tr");
Elements headerRow = table.select("tr")
.get(0)
.select("th,td");
List<String> headers = new ArrayList<String>();
for (Element header : headerRow) {
headers.add(header.text());
}
List<Map<String, String>> parsedDataRows = new ArrayList<Map<String, String>>();
for (int row = 0; row < dataRows.size(); row++) {
Elements colVals = dataRows.get(row)
.select("th,td");
int colCount = 0;
Map<String, String> dataRow = new HashMap<String, String>();
for (Element colVal : colVals) {
dataRow.put(headers.get(colCount++), colVal.text());
}
parsedDataRows.add(dataRow);
}
return parsedDataRows;
}
public void updateTableData(Document doc, int tableOrder, String updateValue) {
Element table = doc.select("table")
.get(tableOrder);
Element tbody = table.select("tbody")
.get(0);
Elements dataRows = tbody.select("tr");
for (int row = 0; row < dataRows.size(); row++) {
Elements colVals = dataRows.get(row)
.select("th,td");
for (int colCount = 0; colCount < colVals.size(); colCount++) {
colVals.get(colCount)
.text(updateValue);
}
}
}
public void addRowToTable(Document doc, int tableOrder) {
Element table = doc.select("table")
.get(tableOrder);
Element tbody = table.select("tbody")
.get(0);
Elements rows = table.select("tr");
Elements headerCols = rows.get(0)
.select("th,td");
int numCols = headerCols.size();
Elements colVals = new Elements(numCols);
for (int colCount = 0; colCount < numCols; colCount++) {
Element colVal = new Element("td");
colVal.text("11");
colVals.add(colVal);
}
Elements dataRows = tbody.select("tr");
Element newDataRow = new Element("tr");
newDataRow.appendChildren(colVals);
dataRows.add(newDataRow);
tbody.html(dataRows.toString());
}
public void deleteRowFromTable(Document doc, int tableOrder, int rowNumber) {
Element table = doc.select("table")
.get(tableOrder);
Element tbody = table.select("tbody")
.get(0);
Elements dataRows = tbody.select("tr");
if (rowNumber < dataRows.size()) {
dataRows.remove(rowNumber);
}
}
}

View File

@ -0,0 +1,73 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Student Results</title>
<style>
table {
border-collapse: collapse;
width: 50%;
margin: 20px;
}
th, td {
border: 1px solid #dddddd;
text-align: left;
padding: 8px;
}
th {
background-color: #f2f2f2;
}
</style>
</head>
<body>
<h2>Student Results</h2>
<table>
<thead>
<tr>
<th>Name</th>
<th>Maths</th>
<th>English</th>
<th>Science</th>
</tr>
</thead>
<tbody>
<tr>
<td>Student 1</td>
<td>90</td>
<td>85</td>
<td>92</td>
</tr>
<tr>
<td>Student 2</td>
<td>88</td>
<td>91</td>
<td>87</td>
</tr>
<tr>
<td>Student 3</td>
<td>78</td>
<td>95</td>
<td>89</td>
</tr>
<tr>
<td>Student 4</td>
<td>94</td>
<td>82</td>
<td>91</td>
</tr>
<tr>
<td>Student 5</td>
<td>85</td>
<td>88</td>
<td>93</td>
</tr>
</tbody>
</table>
</body>
</html>

View File

@ -0,0 +1,52 @@
package com.baeldung.jsoup;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.util.List;
import java.util.Map;
import org.jsoup.nodes.Document;
import org.junit.Test;
public class JsoupTableParserUnitTest {
@Test
public void whenDocumentTableParsed_thenTableDataReturned() {
JsoupTableParser jsoParser = new JsoupTableParser();
Document doc = jsoParser.loadFromFile("Students.html");
List<Map<String, String>> tableData = jsoParser.parseTable(doc, 0);
assertEquals("90", tableData.get(0).get("Maths"));
}
@Test
public void whenTableUpdated_thenUpdatedDataReturned() {
JsoupTableParser jsoParser = new JsoupTableParser();
Document doc = jsoParser.loadFromFile("Students.html");
jsoParser.updateTableData(doc, 0, "50");
List<Map<String, String>> tableData = jsoParser.parseTable(doc, 0);
assertEquals("50", tableData.get(2)
.get("Maths"));
}
@Test
public void whenTableRowAdded_thenRowCountIncreased() {
JsoupTableParser jsoParser = new JsoupTableParser();
Document doc = jsoParser.loadFromFile("Students.html");
List<Map<String, String>> tableData = jsoParser.parseTable(doc, 0);
int countBeforeAdd = tableData.size();
jsoParser.addRowToTable(doc, 0);
tableData = jsoParser.parseTable(doc, 0);
assertEquals(countBeforeAdd + 1, tableData.size());
}
@Test
public void whenTableRowDeleted_thenRowCountDecreased() {
JsoupTableParser jsoParser = new JsoupTableParser();
Document doc = jsoParser.loadFromFile("Students.html");
List<Map<String, String>> tableData = jsoParser.parseTable(doc, 0);
int countBeforeDel = tableData.size();
jsoParser.deleteRowFromTable(doc, 0, 2);
tableData = jsoParser.parseTable(doc, 0);
assertEquals(countBeforeDel - 1, tableData.size());
}
}