43 lines
1.1 KiB
Java
Raw Normal View History

2016-08-01 10:45:10 +02:00
package com.baeldung.htmlunit;
import java.util.List;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
import com.gargoylesoftware.htmlunit.html.HtmlHeading1;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
public class HtmlUnitWebScraping {
private WebClient webClient;
2016-08-01 10:45:10 +02:00
@Before
public void init() throws Exception {
webClient = new WebClient();
}
2016-08-01 10:45:10 +02:00
@After
public void close() throws Exception {
webClient.close();
}
2016-08-01 10:45:10 +02:00
@Test
public void givenBaeldungArchive_whenRetrievingArticle_thenHasH1()
throws Exception {
webClient.getOptions().setCssEnabled(false);
webClient.getOptions().setJavaScriptEnabled(false);
2016-08-01 10:45:10 +02:00
String url = "http://www.baeldung.com/full_archive";
HtmlPage page = webClient.getPage(url);
String xpath = "(//ul[@class='car-monthlisting']/li)[1]/a";
HtmlAnchor latestPostLink
= (HtmlAnchor) page.getByXPath(xpath).get(0);
HtmlPage postPage = latestPostLink.click();
2016-08-01 10:45:10 +02:00
List<HtmlHeading1> h1
= (List<HtmlHeading1>) postPage.getByXPath("//h1");
2016-08-01 10:45:10 +02:00
Assert.assertTrue(h1.size() > 0);
}
2016-08-01 10:45:10 +02:00
}