package com.baeldung.crawler4j; import java.io.File; import edu.uci.ics.crawler4j.crawler.CrawlConfig; import edu.uci.ics.crawler4j.crawler.CrawlController; import edu.uci.ics.crawler4j.fetcher.PageFetcher; import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig; import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer; public class ImageCrawlerController { public static void main(String[] args) throws Exception { File crawlStorage = new File("src/test/resources/crawler4j"); CrawlConfig config = new CrawlConfig(); config.setCrawlStorageFolder(crawlStorage.getAbsolutePath()); config.setIncludeBinaryContentInCrawling(true); config.setMaxPagesToFetch(500); File saveDir = new File("src/test/resources/crawler4j"); int numCrawlers = 12; PageFetcher pageFetcher = new PageFetcher(config); RobotstxtConfig robotstxtConfig = new RobotstxtConfig(); RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher); CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer); controller.addSeed("https://www.baeldung.com/"); CrawlController.WebCrawlerFactory factory = () -> new ImageCrawler(saveDir); controller.start(factory, numCrawlers); } }