爬虫设置间隔时间

This commit is contained in:
yang.xie 2022-10-13 16:39:08 +08:00
parent 6d0b9ade62
commit d1d01c99f2
2 changed files with 22 additions and 2 deletions

View File

@ -1,5 +1,6 @@
package com.northtecom.visatrack.api.config;
import com.configcat.ConfigCatClient;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@ -24,6 +25,11 @@ public class ApplicationBeanConfig {
@Autowired
private AuthenticationConfiguration authenticationConfiguration;
@Bean
public ConfigCatClient configCatClient() throws Exception {
return new ConfigCatClient("d5naCOKEsUeKSEB2aamvxg/JRdvJ42xcUKZGqnHq1vQgQ");
}
/**
* 用于配置 AuthenticationManager 实例
*/

View File

@ -1,6 +1,7 @@
package com.northtecom.visatrack.api.service.impl;
import cn.hutool.core.date.DateUtil;
import com.configcat.ConfigCatClient;
import com.northtecom.visatrack.api.config.CrawlConfig;
import com.northtecom.visatrack.api.data.entity.VisaCheckeeCrawlHtml;
import com.northtecom.visatrack.api.data.repository.VisaCheckeeCrawlHtmlRepository;
@ -39,12 +40,21 @@ public class CrawlService {
private final CrawlConfig crawlConfig;
private final ConfigCatClient configCatClient;
private Integer crawlInterval = 2000;
@Autowired
public CrawlService(VisaCheckeeCrawlHtmlRepository visaCheckeeCrawlHtmlRepository, CrawlConfig crawlConfig) {
public CrawlService(VisaCheckeeCrawlHtmlRepository visaCheckeeCrawlHtmlRepository, CrawlConfig crawlConfig,
ConfigCatClient configCatClient) {
this.visaCheckeeCrawlHtmlRepository = visaCheckeeCrawlHtmlRepository;
this.crawlConfig = crawlConfig;
this.configCatClient = configCatClient;
}
protected Integer getCrawlIntervalValue() {
return this.configCatClient.getValue(Integer.class, "crawlInterval", this.crawlInterval);
}
/**
* 爬取页面增加重试机制代理机制
@ -119,7 +129,11 @@ public class CrawlService {
SaveHtml(content, crawlUrl, crawlDate, crawlId);
}
Thread.sleep(2000);
Integer sleepTime = getCrawlIntervalValue();
log.info("Crawl url {} success, sleep {} ms", crawlUrl, sleepTime);
Thread.sleep(sleepTime);
return content;
}