From 6a60defc94c2e0f0515448b1983119b8da3d980d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Soares?= Date: Mon, 9 Jan 2017 14:06:46 +0000 Subject: [PATCH] Remove Apache Commons IO dependency and split into smaller methods (#942) * Fix the requested changes * Split into smaller methods * Split into smaller methods * Remove apache dependency and split into smaller methods * Add unit tests --- jsoup/pom.xml | 9 +-- .../{JsoupExample.java => JsoupParser.java} | 58 ++++++++++--------- .../com/baeldung/jsoup/JsoupParserTest.java | 36 ++++++++++++ 3 files changed, 71 insertions(+), 32 deletions(-) rename jsoup/src/main/java/com/baeldung/jsoup/{JsoupExample.java => JsoupParser.java} (66%) create mode 100644 jsoup/src/test/java/com/baeldung/jsoup/JsoupParserTest.java diff --git a/jsoup/pom.xml b/jsoup/pom.xml index 989f30422c..343e139b46 100644 --- a/jsoup/pom.xml +++ b/jsoup/pom.xml @@ -15,15 +15,16 @@ ${jsoup.version} - commons-io - commons-io - ${commons.io.version} + junit + junit + 4.12 + test 1.8 1.8 - 2.5 + 1.10.1 diff --git a/jsoup/src/main/java/com/baeldung/jsoup/JsoupExample.java b/jsoup/src/main/java/com/baeldung/jsoup/JsoupParser.java similarity index 66% rename from jsoup/src/main/java/com/baeldung/jsoup/JsoupExample.java rename to jsoup/src/main/java/com/baeldung/jsoup/JsoupParser.java index 10431a621d..cb86b16888 100644 --- a/jsoup/src/main/java/com/baeldung/jsoup/JsoupExample.java +++ b/jsoup/src/main/java/com/baeldung/jsoup/JsoupParser.java @@ -1,47 +1,33 @@ package com.baeldung.jsoup; -import java.io.File; import java.io.IOException; -import org.apache.commons.io.FileUtils; -import org.jsoup.HttpStatusException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.parser.Tag; import org.jsoup.select.Elements; -public class JsoupExample { +public class JsoupParser { - public static void main(String[] args) throws IOException { - scrapeSpringBlog(); + Document doc; + + public void loadDocument(String blogUrl) throws IOException { + doc = Jsoup.connect(blogUrl).get(); } - static void scrapeSpringBlog() throws IOException { - String blogUrl = "https://spring.io/blog"; - Document doc = Jsoup.connect(blogUrl).get(); - - try { - Document doc404 = Jsoup.connect("https://spring.io/will-not-be-found").get(); - } catch (HttpStatusException ex) { - System.out.println(ex.getMessage()); - } - - Document docCustomConn = Jsoup.connect(blogUrl).userAgent("Mozilla").get(); - docCustomConn = Jsoup.connect(blogUrl).timeout(5000).get(); - docCustomConn = Jsoup.connect(blogUrl).cookie("cookiename", "val234").get(); - // docCustomConn = Jsoup.connect(blogUrl).data("datakey", "datavalue").post(); - docCustomConn = Jsoup.connect(blogUrl).header("headersecurity", "xyz123").get(); - - docCustomConn = Jsoup.connect(blogUrl) + void loadDocumentCustomized(String blogUrl) throws IOException { + doc = Jsoup.connect(blogUrl) .userAgent("Mozilla") .timeout(5000) .cookie("cookiename", "val234") .cookie("anothercookie", "ilovejsoup") + .referrer("http://google.com") .header("headersecurity", "xyz123") .get(); + } + void examplesSelectors() { Elements links = doc.select("a"); - Elements sections = doc.select("section"); Elements logo = doc.select(".spring-logo--container"); Elements pagination = doc.select("#pagination_control"); Elements divsDescendant = doc.select("header div"); @@ -50,6 +36,14 @@ public class JsoupExample { Element pag = doc.getElementById("pagination_control"); Elements desktopOnly = doc.getElementsByClass("desktopOnly"); + Elements sections = doc.select("section"); + Element firstSection = sections.first(); + Elements sectionParagraphs = firstSection.select(".paragraph"); + } + + void examplesTraversing() { + Elements sections = doc.select("section"); + Element firstSection = sections.first(); Element lastSection = sections.last(); Element secondSection = sections.get(2); @@ -59,9 +53,9 @@ public class JsoupExample { Elements siblings = firstSection.siblingElements(); sections.stream().forEach(el -> System.out.println("section: " + el)); + } - Elements sectionParagraphs = firstSection.select(".paragraph"); - + void examplesExtracting() { Element firstArticle = doc.select("article").first(); Element timeElement = firstArticle.select("time").first(); String dateTimeOfFirstArticle = timeElement.attr("datetime"); @@ -69,7 +63,14 @@ public class JsoupExample { String sectionDivText = sectionDiv.text(); String articleHtml = firstArticle.html(); String outerHtml = firstArticle.outerHtml(); + } + void examplesModifying() { + Element firstArticle = doc.select("article").first(); + Element timeElement = firstArticle.select("time").first(); + Element sectionDiv = firstArticle.select("section div").first(); + + String dateTimeOfFirstArticle = timeElement.attr("datetime"); timeElement.attr("datetime", "2016-12-16 15:19:54.3"); sectionDiv.text("foo bar"); firstArticle.select("h2").html("
"); @@ -82,8 +83,9 @@ public class JsoupExample { doc.select("li.navbar-link").remove(); firstArticle.select("img").remove(); + } - File indexFile = new File("/tmp", "spring_blog_home.html"); - FileUtils.writeStringToFile(indexFile, doc.html(), doc.charset()); + String getTidyHtml() { + return doc.html(); } } diff --git a/jsoup/src/test/java/com/baeldung/jsoup/JsoupParserTest.java b/jsoup/src/test/java/com/baeldung/jsoup/JsoupParserTest.java new file mode 100644 index 0000000000..85fd3c3459 --- /dev/null +++ b/jsoup/src/test/java/com/baeldung/jsoup/JsoupParserTest.java @@ -0,0 +1,36 @@ +package com.baeldung.jsoup; + +import java.io.IOException; +import org.jsoup.HttpStatusException; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import org.junit.Before; +import org.junit.Test; + +public class JsoupParserTest { + + JsoupParser jsoupParser; + + @Before + public void setUp() { + jsoupParser = new JsoupParser(); + } + + @Test + public void test404() throws IOException { + try { + jsoupParser.loadDocument("https://spring.io/will-not-be-found"); + } catch (HttpStatusException ex) { + assertEquals(404, ex.getStatusCode()); + } + } + + @Test + public void testChange() throws IOException { + jsoupParser.loadDocument("http://spring.io/blog"); + + jsoupParser.examplesModifying(); + + assertTrue(jsoupParser.getTidyHtml().contains("http://baeldung.com")); + } +}