修复爬虫,增加镜像仓库,支持推送docker镜像到镜像仓库
This commit is contained in:
parent
d1d01c99f2
commit
ec9aee60c5
|
@ -7,3 +7,7 @@ docker部署代码
|
|||
`sudo docker rm usvisartrackapi`
|
||||
|
||||
`sudo docker run -p 8383:8282 --restart=unless-stopped -e APP_DATABASE_HOST="54.39.157.60" -e APP_DATABASE_PORT="3306" -e APP_DATABASE_NAME="usvisatrack" -e APP_DATABASE_USERNAME="root" -e APP_DATABASE_PASSWORD="ETNN0sqc1qMbgQaeGKWL" --name usvisartrackapi -d usvisartrackapi:0.0.2-snapshot`
|
||||
|
||||
docker tag usvisartrackapi:0.0.2-snapshot 54.39.157.60:86/library/usvisartrackapi:0.0.2-snapshot
|
||||
|
||||
docker push 54.39.157.60:86/library/usvisartrackapi:0.0.2-snapshot
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
# yaml 配置实例
|
||||
version: '3'
|
||||
services:
|
||||
web:
|
||||
image: usvisartrackapi:0.0.2-snapshot
|
||||
container_name: usvisatrackapi
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8383:8282"
|
||||
environment:
|
||||
- APP_DATABASE_HOST=54.39.157.60
|
||||
- APP_DATABASE_PORT=3306
|
||||
- APP_DATABASE_NAME=usvisatrack
|
||||
- APP_DATABASE_USERNAME=root
|
||||
- APP_DATABASE_PASSWORD=ETNN0sqc1qMbgQaeGKWL
|
||||
- CRAWL_USEPROXY=true
|
||||
|
|
@ -52,10 +52,6 @@ public class CrawlService {
|
|||
this.configCatClient = configCatClient;
|
||||
}
|
||||
|
||||
protected Integer getCrawlIntervalValue() {
|
||||
return this.configCatClient.getValue(Integer.class, "crawlInterval", this.crawlInterval);
|
||||
}
|
||||
|
||||
/**
|
||||
* 爬取页面增加重试机制代理机制
|
||||
*
|
||||
|
@ -65,6 +61,7 @@ public class CrawlService {
|
|||
*/
|
||||
public Document getDocument(String url) throws IOException, InterruptedException {
|
||||
Document document = null;
|
||||
log.info("Crawl url: {},Crawl config {}", url, this.crawlConfig);
|
||||
int i = 0;
|
||||
boolean success = false;
|
||||
while (i < crawlConfig.getRetryCount()) {
|
||||
|
@ -129,7 +126,7 @@ public class CrawlService {
|
|||
SaveHtml(content, crawlUrl, crawlDate, crawlId);
|
||||
}
|
||||
|
||||
Integer sleepTime = getCrawlIntervalValue();
|
||||
Integer sleepTime = this.crawlInterval;
|
||||
|
||||
log.info("Crawl url {} success, sleep {} ms", crawlUrl, sleepTime);
|
||||
|
||||
|
|
Loading…
Reference in New Issue