Remove Apache Commons IO dependency and split into smaller methods (#942)
* Fix the requested changes * Split into smaller methods * Split into smaller methods * Remove apache dependency and split into smaller methods * Add unit tests
This commit is contained in:
parent
729c8990d0
commit
6a60defc94
|
@ -15,15 +15,16 @@
|
|||
<version>${jsoup.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
<version>${commons.io.version}</version>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.12</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<properties>
|
||||
<maven.compiler.source>1.8</maven.compiler.source>
|
||||
<maven.compiler.target>1.8</maven.compiler.target>
|
||||
<commons.io.version>2.5</commons.io.version>
|
||||
|
||||
<jsoup.version>1.10.1</jsoup.version>
|
||||
</properties>
|
||||
</project>
|
||||
|
|
|
@ -1,47 +1,33 @@
|
|||
package com.baeldung.jsoup;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.jsoup.HttpStatusException;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.parser.Tag;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
public class JsoupExample {
|
||||
public class JsoupParser {
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
scrapeSpringBlog();
|
||||
Document doc;
|
||||
|
||||
public void loadDocument(String blogUrl) throws IOException {
|
||||
doc = Jsoup.connect(blogUrl).get();
|
||||
}
|
||||
|
||||
static void scrapeSpringBlog() throws IOException {
|
||||
String blogUrl = "https://spring.io/blog";
|
||||
Document doc = Jsoup.connect(blogUrl).get();
|
||||
|
||||
try {
|
||||
Document doc404 = Jsoup.connect("https://spring.io/will-not-be-found").get();
|
||||
} catch (HttpStatusException ex) {
|
||||
System.out.println(ex.getMessage());
|
||||
}
|
||||
|
||||
Document docCustomConn = Jsoup.connect(blogUrl).userAgent("Mozilla").get();
|
||||
docCustomConn = Jsoup.connect(blogUrl).timeout(5000).get();
|
||||
docCustomConn = Jsoup.connect(blogUrl).cookie("cookiename", "val234").get();
|
||||
// docCustomConn = Jsoup.connect(blogUrl).data("datakey", "datavalue").post();
|
||||
docCustomConn = Jsoup.connect(blogUrl).header("headersecurity", "xyz123").get();
|
||||
|
||||
docCustomConn = Jsoup.connect(blogUrl)
|
||||
void loadDocumentCustomized(String blogUrl) throws IOException {
|
||||
doc = Jsoup.connect(blogUrl)
|
||||
.userAgent("Mozilla")
|
||||
.timeout(5000)
|
||||
.cookie("cookiename", "val234")
|
||||
.cookie("anothercookie", "ilovejsoup")
|
||||
.referrer("http://google.com")
|
||||
.header("headersecurity", "xyz123")
|
||||
.get();
|
||||
}
|
||||
|
||||
void examplesSelectors() {
|
||||
Elements links = doc.select("a");
|
||||
Elements sections = doc.select("section");
|
||||
Elements logo = doc.select(".spring-logo--container");
|
||||
Elements pagination = doc.select("#pagination_control");
|
||||
Elements divsDescendant = doc.select("header div");
|
||||
|
@ -50,6 +36,14 @@ public class JsoupExample {
|
|||
Element pag = doc.getElementById("pagination_control");
|
||||
Elements desktopOnly = doc.getElementsByClass("desktopOnly");
|
||||
|
||||
Elements sections = doc.select("section");
|
||||
Element firstSection = sections.first();
|
||||
Elements sectionParagraphs = firstSection.select(".paragraph");
|
||||
}
|
||||
|
||||
void examplesTraversing() {
|
||||
Elements sections = doc.select("section");
|
||||
|
||||
Element firstSection = sections.first();
|
||||
Element lastSection = sections.last();
|
||||
Element secondSection = sections.get(2);
|
||||
|
@ -59,9 +53,9 @@ public class JsoupExample {
|
|||
Elements siblings = firstSection.siblingElements();
|
||||
|
||||
sections.stream().forEach(el -> System.out.println("section: " + el));
|
||||
}
|
||||
|
||||
Elements sectionParagraphs = firstSection.select(".paragraph");
|
||||
|
||||
void examplesExtracting() {
|
||||
Element firstArticle = doc.select("article").first();
|
||||
Element timeElement = firstArticle.select("time").first();
|
||||
String dateTimeOfFirstArticle = timeElement.attr("datetime");
|
||||
|
@ -69,7 +63,14 @@ public class JsoupExample {
|
|||
String sectionDivText = sectionDiv.text();
|
||||
String articleHtml = firstArticle.html();
|
||||
String outerHtml = firstArticle.outerHtml();
|
||||
}
|
||||
|
||||
void examplesModifying() {
|
||||
Element firstArticle = doc.select("article").first();
|
||||
Element timeElement = firstArticle.select("time").first();
|
||||
Element sectionDiv = firstArticle.select("section div").first();
|
||||
|
||||
String dateTimeOfFirstArticle = timeElement.attr("datetime");
|
||||
timeElement.attr("datetime", "2016-12-16 15:19:54.3");
|
||||
sectionDiv.text("foo bar");
|
||||
firstArticle.select("h2").html("<div><span></span></div>");
|
||||
|
@ -82,8 +83,9 @@ public class JsoupExample {
|
|||
|
||||
doc.select("li.navbar-link").remove();
|
||||
firstArticle.select("img").remove();
|
||||
}
|
||||
|
||||
File indexFile = new File("/tmp", "spring_blog_home.html");
|
||||
FileUtils.writeStringToFile(indexFile, doc.html(), doc.charset());
|
||||
String getTidyHtml() {
|
||||
return doc.html();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
package com.baeldung.jsoup;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.jsoup.HttpStatusException;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
public class JsoupParserTest {
|
||||
|
||||
JsoupParser jsoupParser;
|
||||
|
||||
@Before
|
||||
public void setUp() {
|
||||
jsoupParser = new JsoupParser();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test404() throws IOException {
|
||||
try {
|
||||
jsoupParser.loadDocument("https://spring.io/will-not-be-found");
|
||||
} catch (HttpStatusException ex) {
|
||||
assertEquals(404, ex.getStatusCode());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testChange() throws IOException {
|
||||
jsoupParser.loadDocument("http://spring.io/blog");
|
||||
|
||||
jsoupParser.examplesModifying();
|
||||
|
||||
assertTrue(jsoupParser.getTidyHtml().contains("http://baeldung.com"));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue