diff --git a/spring-mvc-java/src/test/java/com/baeldung/htmlunit/HtmlUnitWebScraping.java b/spring-mvc-java/src/test/java/com/baeldung/htmlunit/HtmlUnitWebScraping.java new file mode 100644 index 0000000000..16d18717e6 --- /dev/null +++ b/spring-mvc-java/src/test/java/com/baeldung/htmlunit/HtmlUnitWebScraping.java @@ -0,0 +1,40 @@ +package com.baeldung.htmlunit; + +import java.util.List; + +import com.gargoylesoftware.htmlunit.WebClient; +import com.gargoylesoftware.htmlunit.html.HtmlAnchor; +import com.gargoylesoftware.htmlunit.html.HtmlHeading1; +import com.gargoylesoftware.htmlunit.html.HtmlHeading2; +import com.gargoylesoftware.htmlunit.html.HtmlPage; + +public class HtmlUnitWebScraping { + + public static void main(final String[] args) throws Exception { + try (final WebClient webClient = new WebClient()) { + + webClient.getOptions().setCssEnabled(false); + webClient.getOptions().setJavaScriptEnabled(false); + + final HtmlPage page = webClient.getPage("http://www.baeldung.com/full_archive"); + final HtmlAnchor latestPostLink = (HtmlAnchor) page.getByXPath("(//ul[@class='car-monthlisting']/li)[1]/a").get(0); + + System.out.println("Entering: " + latestPostLink.getHrefAttribute()); + + final HtmlPage postPage = latestPostLink.click(); + + final HtmlHeading1 heading1 = (HtmlHeading1) postPage.getByXPath("//h1").get(0); + System.out.println("Title: " + heading1.getTextContent()); + + final List headings2 = (List) postPage.getByXPath("//h2"); + + final StringBuilder sb = new StringBuilder(heading1.getTextContent()); + for (final HtmlHeading2 h2 : headings2) { + sb.append("\n").append(h2.getTextContent()); + } + + System.out.println(sb.toString()); + } + } + +}