实现爬虫自动同步数据库 加入spring batch做未来的改动准备
This commit is contained in:
parent
60fd6c7d14
commit
2f412bee98
14
Install.md
14
Install.md
|
@ -19,3 +19,17 @@ docker tag usvisartrackapi:0.0.2-snapshot admin/usvisartrackapi:0.0.2-snapshot
|
|||
docker push pubuser/usvisartrackapi:0.0.2-snapshot
|
||||
|
||||
docker tag usvisartrackapi:0.0.2-snapshot 54.39.157.60:8092/library/usvisartrackapi:0.0.2-snapshot
|
||||
|
||||
docker compose 发布脚本
|
||||
|
||||
docker compose pull
|
||||
docker compose up -d --remove-orphans
|
||||
docker image prune -f
|
||||
|
||||
docker logs --tail=100 gitlab-runner -f
|
||||
|
||||
docker logs --tail=100 usvisatrackapi -f
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ param (
|
|||
[string]$DockerServerName = "usvisartrackapi",
|
||||
[string]$CodeServerPort = "8282",
|
||||
[string]$PublishServerPort = "8383",
|
||||
[string]$BuildVerison = "0.0.2-snapshot",
|
||||
[string]$BuildVerison = "0.0.5-snapshot",
|
||||
[string]$PushServer = "repo-docker.ossez.com",
|
||||
[string]$PushPath = "/docker-hub/"
|
||||
)
|
||||
|
|
17
pom.xml
17
pom.xml
|
@ -70,11 +70,6 @@
|
|||
<version>31.1-jre</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.configcat</groupId>
|
||||
<artifactId>configcat-java-client</artifactId>
|
||||
<version>7.2.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
|
@ -97,14 +92,20 @@
|
|||
<dependency>
|
||||
<groupId>cn.hutool</groupId>
|
||||
<artifactId>hutool-all</artifactId>
|
||||
<version>5.8.8</version>
|
||||
<version>5.8.9</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.flagsmith</groupId>
|
||||
<artifactId>flagsmith-java-client</artifactId>
|
||||
<version>5.0.6</version>
|
||||
</dependency>
|
||||
|
||||
<!-- hibernate enhancement -->
|
||||
<dependency>
|
||||
<groupId>com.vladmihalcea</groupId>
|
||||
<artifactId>hibernate-types-55</artifactId>
|
||||
<version>2.19.2</version>
|
||||
<version>2.20.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- DATABASE Client -->
|
||||
|
@ -156,7 +157,7 @@
|
|||
<dependency>
|
||||
<groupId>com.mailgun</groupId>
|
||||
<artifactId>mailgun-java</artifactId>
|
||||
<version>1.0.3</version>
|
||||
<version>1.0.4</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
package com.northtecom.visatrack.api.base.util;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
*
|
||||
* @Author: XieYang
|
||||
* @Date: 2022/10/31/11:57
|
||||
* @Description:
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface Action {
|
||||
void accept();
|
||||
|
||||
default Action andThen(Action after) {
|
||||
Objects.requireNonNull(after);
|
||||
return () -> {
|
||||
accept();
|
||||
after.accept();
|
||||
};
|
||||
}
|
||||
}
|
|
@ -5,14 +5,11 @@
|
|||
*/
|
||||
package com.northtecom.visatrack.api.base.util;
|
||||
|
||||
import com.configcat.ConfigCatClient;
|
||||
import com.configcat.User;
|
||||
import com.mailgun.api.v3.MailgunMessagesApi;
|
||||
import com.mailgun.client.MailgunClient;
|
||||
import com.mailgun.model.message.Message;
|
||||
import com.mailgun.model.message.MessageResponse;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
|
@ -33,20 +30,25 @@ public class EmailUtils {
|
|||
private static String emailSenderAddress = "info@usvisatrack.com";
|
||||
|
||||
|
||||
private ConfigCatClient configCatClient;
|
||||
// @Autowired
|
||||
// private FlagsmithClient flagsmithClient;
|
||||
private MailgunMessagesApi mailgunMessagesApi;
|
||||
|
||||
|
||||
public EmailUtils() {
|
||||
configCatClient = new ConfigCatClient("d5naCOKEsUeKSEB2aamvxg/JRdvJ42xcUKZGqnHq1vQgQ");
|
||||
User userObject = User.newBuilder().build(EMAIL_KEY_IDENTIFIER);
|
||||
// Unique identifier is required. Could be UserID, Email address or SessionID.
|
||||
String emailKey = configCatClient.getValue(String.class, EMAIL_KEY_NAME, userObject, StringUtils.EMPTY);
|
||||
emailSenderAddress = configCatClient.getValue(String.class, EMAIL_KEY_SENDER_NAME, userObject,
|
||||
emailSenderAddress);
|
||||
String emailKey = TryGetConfigByKey(EMAIL_KEY_NAME, "979fcedb0aa8bcdeab632bbf6baa74e0");
|
||||
emailSenderAddress = TryGetConfigByKey(EMAIL_KEY_SENDER_NAME, "updates@usvisatrack.com");
|
||||
mailgunMessagesApi = MailgunClient.config(emailKey).createApi(MailgunMessagesApi.class);
|
||||
}
|
||||
|
||||
private String TryGetConfigByKey(String key, String defaultValue) {
|
||||
// try {
|
||||
// Flags flags = flagsmithClient.getEnvironmentFlags();
|
||||
// return flags.getFeatureValue(key).toString();
|
||||
// } catch (FlagsmithClientError flagsmithClientError) {
|
||||
// flagsmithClientError.printStackTrace();
|
||||
// }
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send Test Email to check config and email sending API
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
package com.northtecom.visatrack.api.config;
|
||||
|
||||
import com.configcat.ConfigCatClient;
|
||||
import com.flagsmith.FlagsmithClient;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
@ -26,10 +26,15 @@ public class ApplicationBeanConfig {
|
|||
private AuthenticationConfiguration authenticationConfiguration;
|
||||
|
||||
@Bean
|
||||
public ConfigCatClient configCatClient() throws Exception {
|
||||
return new ConfigCatClient("d5naCOKEsUeKSEB2aamvxg/JRdvJ42xcUKZGqnHq1vQgQ");
|
||||
public FlagsmithClient flagsmithClient() throws Exception {
|
||||
return FlagsmithClient
|
||||
.newBuilder()
|
||||
.setApiKey("bNRvdzMgcojGLCP6ts6fjB")
|
||||
.withApiUrl("https://flag.ossez.com/api/v1/")
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 用于配置 AuthenticationManager 实例
|
||||
*/
|
||||
|
|
|
@ -12,6 +12,7 @@ import com.northtecom.visatrack.api.service.impl.CaseVisaReportService;
|
|||
import com.northtecom.visatrack.api.service.impl.VisaCaseService;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
@ -37,6 +38,7 @@ public class CaseVisaReportReportController {
|
|||
private final CaseVisaReportService caseVisaReportService;
|
||||
private final VisaCaseService visaCaseService;
|
||||
|
||||
@Autowired
|
||||
public CaseVisaReportReportController(CaseVisaReportService caseVisaReportService,
|
||||
VisaCaseService visaCaseService) {
|
||||
this.caseVisaReportService = caseVisaReportService;
|
||||
|
|
|
@ -52,18 +52,49 @@ public class CrawlController {
|
|||
this.visaCaseService = visaCaseService;
|
||||
}
|
||||
|
||||
/**
|
||||
* Crawl checkee report data
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
@PostMapping("/checkee_report/crawl")
|
||||
@Operation(summary = "爬取签证申请报表数据", description = "爬取签证申请报表数据")
|
||||
public boolean crawlCheckeeReport() {
|
||||
this.visaReportCheckeeService.crawlCheckeeReport();
|
||||
|
||||
@PostMapping("/checkee_visa/syncAllVisaDataFromCheckee")
|
||||
@Operation(summary = "同步所有数据", description = "同步所有数据")
|
||||
public Boolean syncAllVisaDataFromCheckee() {
|
||||
LocalDate endDate = LocalDate.now();
|
||||
LocalDate startDate = LocalDate.of(2018, 10, 1);
|
||||
this.visaReportCheckeeService.syncDataAndReport(endDate, startDate);
|
||||
return true;
|
||||
}
|
||||
|
||||
@PostMapping("/checkee_visa/syncLast3monthVisaDataFromCheckee")
|
||||
@Operation(summary = "同步最近3个月的数据", description = "同步最近3个月的数据")
|
||||
public Boolean syncLast3monthVisaDataFromCheckee() {
|
||||
LocalDate endDate = LocalDate.now();
|
||||
LocalDate startDate = endDate.minusMonths(3);
|
||||
this.visaReportCheckeeService.syncDataAndReport(endDate, startDate);
|
||||
return true;
|
||||
}
|
||||
|
||||
@PostMapping("/checkee_visa/syncLast3YearsVisaDataFromCheckee")
|
||||
@Operation(summary = "同步最近3年的数据", description = "同步最近3年的数据")
|
||||
public Boolean syncLast3YearsVisaDataFromCheckee() {
|
||||
LocalDate endDate = LocalDate.now();
|
||||
LocalDate startDate = endDate.minusYears(3);
|
||||
this.visaReportCheckeeService.syncDataAndReport(endDate, startDate);
|
||||
return true;
|
||||
}
|
||||
|
||||
@PostMapping("/checkee_visa/rebuildAllReport")
|
||||
@Operation(summary = "重建所有报表", description = "重建所有报表")
|
||||
public Boolean rebuildAllReport() {
|
||||
this.visaReportCheckeeService.rebuildAllReport();
|
||||
return true;
|
||||
}
|
||||
|
||||
@PostMapping("/checkee_visa/importNewCrawlData")
|
||||
@Operation(summary = "导入新的签证数据", description = "导入新的签证数据")
|
||||
public boolean importNewCrawlData() throws ParseException {
|
||||
this.visaCaseService.importNewCrawlData();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Import checkee crawl data to database
|
||||
*
|
||||
|
|
|
@ -14,21 +14,16 @@ import java.util.Date;
|
|||
* Created with IntelliJ IDEA.
|
||||
*
|
||||
* @Author: XieYang
|
||||
* @Date: 2022/10/04/20:00
|
||||
* @Date: 2022/10/31/8:48
|
||||
* @Description:
|
||||
*/
|
||||
@Entity
|
||||
@Data
|
||||
@Table(name = "visa_checkee_crawl_html",uniqueConstraints = {
|
||||
@UniqueConstraint(columnNames={"crawl_key"})
|
||||
@Table(name = "crawl_cache", uniqueConstraints = {
|
||||
@UniqueConstraint(columnNames = {"crawl_key"})
|
||||
})
|
||||
@org.hibernate.annotations.Table(appliesTo = "visa_checkee_crawl_html", comment = "Visa case checkee crawl html")
|
||||
public class VisaCheckeeCrawlHtml extends BaseEntity<Long> {
|
||||
|
||||
public static final String CRAWL_KEY_REPORT_LIST = "report_list";
|
||||
public static final String CRAWL_KEY_REPORT_DETAIL_LIST = "report_detail_list";
|
||||
public static final String CRAWL_KEY_VISA_DETAIL = "visa_detail";
|
||||
public static final String CRAWL_KEY_VISA_UPDATE = "visa_update";
|
||||
@org.hibernate.annotations.Table(appliesTo = "crawl_cache", comment = "Crawl cache")
|
||||
public class CrawlCache extends BaseEntity<Long> {
|
||||
/**
|
||||
* Crawl key
|
||||
*/
|
||||
|
@ -42,8 +37,8 @@ public class VisaCheckeeCrawlHtml extends BaseEntity<Long> {
|
|||
/**
|
||||
* Crawl html content
|
||||
*/
|
||||
@Column(name = "content", columnDefinition = "LONGTEXT comment 'crawl html content'")
|
||||
private String content;
|
||||
@Column(name = "oss_path", columnDefinition = "varchar(500) comment 'Oss path'")
|
||||
private String OssPath;
|
||||
/**
|
||||
* Cache days
|
||||
*/
|
|
@ -80,7 +80,7 @@ public class VisaCase extends BaseEntity<Long> {
|
|||
/**
|
||||
* Note
|
||||
*/
|
||||
@Column(name = "note", columnDefinition = "varchar(2000) COMMENT 'note'")
|
||||
@Column(name = "note", columnDefinition = "Text COMMENT 'note'")
|
||||
private String note;
|
||||
/**
|
||||
* Visa interview date
|
||||
|
|
|
@ -43,24 +43,24 @@ public class VisaCheckeeCrawlData extends BaseEntity<Long> {
|
|||
private String status;
|
||||
@Column(name = "complete_date", columnDefinition = "Date COMMENT 'complete date'")
|
||||
private LocalDate completeDate;
|
||||
@Column(name = "note", columnDefinition = "varchar(2000) COMMENT 'note'")
|
||||
@Column(name = "note", columnDefinition = "TEXT COMMENT 'note'")
|
||||
private String note;
|
||||
@Column(name = "last_name", columnDefinition = "varchar(50) COMMENT 'last name'")
|
||||
@Column(name = "last_name", columnDefinition = "varchar(100) COMMENT 'last name'")
|
||||
private String lastName;
|
||||
@Column(name = "first_name", columnDefinition = "varchar(50) COMMENT 'first name'")
|
||||
@Column(name = "first_name", columnDefinition = "varchar(100) COMMENT 'first name'")
|
||||
private String firstName;
|
||||
@Column(name = "university", columnDefinition = "varchar(100) COMMENT 'university'")
|
||||
private String university;
|
||||
@Column(name = "degree", columnDefinition = "varchar(50) COMMENT 'degree'")
|
||||
@Column(name = "degree", columnDefinition = "varchar(200) COMMENT 'degree'")
|
||||
private String degree;
|
||||
@Column(name = "employer", columnDefinition = "varchar(50) COMMENT 'employer'")
|
||||
@Column(name = "employer", columnDefinition = "varchar(100) COMMENT 'employer'")
|
||||
private String employer;
|
||||
@Column(name = "job_title", columnDefinition = "varchar(50) COMMENT 'job title'")
|
||||
@Column(name = "job_title", columnDefinition = "varchar(100) COMMENT 'job title'")
|
||||
private String jobTitle;
|
||||
@Column(name = "years_in_usa", columnDefinition = "varchar(50) COMMENT 'years in usa'")
|
||||
@Column(name = "years_in_usa", columnDefinition = "varchar(100) COMMENT 'years in usa'")
|
||||
private String yearsInUsa;
|
||||
@Column(name = "country", columnDefinition = "varchar(50) COMMENT 'country'")
|
||||
@Column(name = "country", columnDefinition = "varchar(100) COMMENT 'country'")
|
||||
private String country;
|
||||
@Column(name = "part_email", columnDefinition = "varchar(50) COMMENT 'part email'")
|
||||
@Column(name = "part_email", columnDefinition = "varchar(100) COMMENT 'part email'")
|
||||
private String partEmail;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
package com.northtecom.visatrack.api.data.repository;
|
||||
|
||||
import com.northtecom.visatrack.api.data.entity.CrawlCache;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.data.jpa.repository.Query;
|
||||
import org.springframework.data.repository.query.Param;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
*
|
||||
* @Author: XieYang
|
||||
* @Date: 2022/10/04/20:05
|
||||
* @Description:
|
||||
*/
|
||||
public interface CrawlCacheRepository extends JpaRepository<CrawlCache, Long> {
|
||||
@Query("select vh from CrawlCache vh where vh.crawlKey = :crawlKey order by vh.crawlDate desc")
|
||||
Optional<CrawlCache> findLatestCrawlCache(@Param("crawlKey") String crawlKey);
|
||||
}
|
|
@ -30,4 +30,9 @@ public interface VisaCheckeeCrawlDataRepository extends JpaRepository<VisaChecke
|
|||
@Query(value = "Select * from usvisatrack.visa_checkee_crawl_data where (first_name is null or first_name = '')" +
|
||||
" or (part_email is null or part_email = '')", nativeQuery = true)
|
||||
List<VisaCheckeeCrawlData> QueryNotDetail();
|
||||
|
||||
@Query(value = "Select * from usvisatrack.visa_checkee_crawl_data where case_num not in (SELECT v" +
|
||||
".ref_crawl_case_number from usvisatrack.visa_case v)", nativeQuery = true)
|
||||
List<VisaCheckeeCrawlData> QueryNewCrawlData();
|
||||
|
||||
}
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
package com.northtecom.visatrack.api.data.repository;
|
||||
|
||||
import com.northtecom.visatrack.api.data.entity.VisaCheckeeCrawlHtml;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.data.jpa.repository.Query;
|
||||
import org.springframework.data.repository.query.Param;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
*
|
||||
* @Author: XieYang
|
||||
* @Date: 2022/10/04/20:05
|
||||
* @Description:
|
||||
*/
|
||||
public interface VisaCheckeeCrawlHtmlRepository extends JpaRepository<VisaCheckeeCrawlHtml, Long> {
|
||||
@Query("select vh from VisaCheckeeCrawlHtml vh where vh.crawlKey = :crawlKey order by vh.crawlDate desc")
|
||||
Optional<VisaCheckeeCrawlHtml> findLatestCrawlHtml(@Param("crawlKey") String crawlKey);
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
package com.northtecom.visatrack.api.schedule;
|
||||
|
||||
import com.northtecom.visatrack.api.service.impl.VisaCaseService;
|
||||
import com.northtecom.visatrack.api.service.impl.VisaReportCheckeeService;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
*
|
||||
* @Author: XieYang
|
||||
* @Date: 2022/10/15/9:01
|
||||
* @Description:
|
||||
*/
|
||||
|
||||
@Component
|
||||
@Slf4j
|
||||
public class VisaCaseSyncTask {
|
||||
|
||||
private final VisaCaseService visaCaseService;
|
||||
private final VisaReportCheckeeService visaReportCheckeeService;
|
||||
|
||||
|
||||
public VisaCaseSyncTask(VisaCaseService visaCaseService, VisaReportCheckeeService visaReportCheckeeService) {
|
||||
this.visaCaseService = visaCaseService;
|
||||
this.visaReportCheckeeService = visaReportCheckeeService;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 同步最近半年的签证数据,并更新报表 每天每隔1小时执行一次
|
||||
*/
|
||||
@Scheduled(cron = "0 0 0/1 * * ?")
|
||||
public void syncLast3monthVisaDataFromCheckee() {
|
||||
log.info("同步最近半年的签证数据,并更新报表 触发于:{}", LocalDateTime.now());
|
||||
try {
|
||||
LocalDate endDate = LocalDate.now();
|
||||
LocalDate startDate = endDate.minusMonths(3);
|
||||
this.visaReportCheckeeService.syncDataAndReport(endDate, startDate);
|
||||
} catch (Exception e) {
|
||||
log.error("同步最近半年的签证数据,并更新报表 异常:{}", e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 同步最近3年的签证数据,并更新报表 每天凌晨2点执行一次
|
||||
*/
|
||||
@Scheduled(cron = "0 0 3 * * ?")
|
||||
public void syncLast3YearsVisaDataFromCheckee() {
|
||||
log.info("同步最近3年的签证数据,并更新报表 触发于:{}", LocalDateTime.now());
|
||||
try {
|
||||
LocalDate endDate = LocalDate.now();
|
||||
LocalDate startDate = endDate.minusYears(3);
|
||||
this.visaReportCheckeeService.syncDataAndReport(endDate, startDate);
|
||||
} catch (Exception e) {
|
||||
log.error("同步最近3年的签证数据 异常:{}", e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 同步最近所有的签证数据 每月1日凌晨1点执行一次
|
||||
*/
|
||||
@Scheduled(cron = "0 0 1 1 * ?")
|
||||
public void syncAllVisaDataFromCheckee() {
|
||||
log.info("同步最近所有的签证数据,并更新报表 触发于:{}", LocalDateTime.now());
|
||||
try {
|
||||
LocalDate endDate = LocalDate.now();
|
||||
LocalDate startDate = LocalDate.of(2018, 10, 1);
|
||||
this.visaReportCheckeeService.syncDataAndReport(endDate, startDate);
|
||||
} catch (Exception e) {
|
||||
log.error("同步最近所有的签证数据,并更新报表 异常:{}", e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 重建报表数据 每天整点20,40分执行一次
|
||||
*/
|
||||
@Scheduled(cron = "0 20,40 * * * ?")
|
||||
public void rebuildReport() {
|
||||
log.info("重建报表数据 触发于:{}", LocalDateTime.now());
|
||||
try {
|
||||
this.visaReportCheckeeService.rebuildAllReport();
|
||||
} catch (Exception e) {
|
||||
log.error("重建报表数据 异常:{}", e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
package com.northtecom.visatrack.api.schedule;
|
||||
|
||||
import com.northtecom.visatrack.api.service.impl.VisaCaseService;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
*
|
||||
* @Author: XieYang
|
||||
* @Date: 2022/10/15/9:01
|
||||
* @Description:
|
||||
*/
|
||||
|
||||
@Component
|
||||
@Slf4j
|
||||
public class VisaReportTask {
|
||||
|
||||
private final VisaCaseService visaCaseService;
|
||||
|
||||
public VisaReportTask(VisaCaseService visaCaseService) {
|
||||
this.visaCaseService = visaCaseService;
|
||||
}
|
||||
|
||||
// @Scheduled(cron = "0/3 * * * * *")
|
||||
public void test_3() {
|
||||
log.info("test_3 触发于:{}", LocalDateTime.now());
|
||||
try {
|
||||
Thread.sleep(5000);
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算最近12个月的签证统计(2小时更新一次)
|
||||
*/
|
||||
// @Scheduled(initialDelay = 20000, fixedDelay = 1000 * 60 * 60 * 2)
|
||||
public void caculateLast12MonthsMainReport() {
|
||||
log.info("计算最近12个月的签证统计 触发于:{}", LocalDateTime.now());
|
||||
try {
|
||||
visaCaseService.calculateMainReport(12);
|
||||
} catch (Exception e) {
|
||||
log.error("计算最近12个月的签证统计 异常:{}", e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -23,6 +23,7 @@ import java.util.stream.Collectors;
|
|||
@Service
|
||||
public class AwsSysFileService {
|
||||
|
||||
public static final String DEFAULT_BUCKET_NAME = "usvsiatrackcrawl";
|
||||
|
||||
private final AmazonS3 s3Client;
|
||||
|
||||
|
|
|
@ -1,13 +1,12 @@
|
|||
package com.northtecom.visatrack.api.service.impl;
|
||||
|
||||
import cn.hutool.core.date.DateUtil;
|
||||
import com.northtecom.visatrack.api.config.CrawlConfig;
|
||||
import com.northtecom.visatrack.api.data.entity.CrawlCache;
|
||||
import com.northtecom.visatrack.api.data.entity.VisaCheckeeCrawlData;
|
||||
import com.northtecom.visatrack.api.data.entity.VisaCheckeeCrawlHtml;
|
||||
import com.northtecom.visatrack.api.data.repository.VisaCheckeeCrawlHtmlRepository;
|
||||
import com.northtecom.visatrack.api.data.repository.CrawlCacheRepository;
|
||||
import com.northtecom.visatrack.api.service.dto.CrawlHtml;
|
||||
import com.northtecom.visatrack.api.service.dto.VisaCheckeeData;
|
||||
import com.northtecom.visatrack.api.service.dto.VisaReportCheckeeData;
|
||||
import com.northtecom.visatrack.api.service.rules.CrawlToOssRule;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.jsoup.Connection;
|
||||
|
@ -17,13 +16,11 @@ import org.jsoup.nodes.Element;
|
|||
import org.jsoup.select.Elements;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.SocketTimeoutException;
|
||||
import java.time.LocalDate;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
|
@ -37,20 +34,23 @@ import java.util.Optional;
|
|||
@Service
|
||||
@Slf4j
|
||||
public class CrawlService {
|
||||
public static final String URL_CHECKEE_REPORT = "https://www.checkee.info/index.php";
|
||||
public static final String URL_CHECKEE_REPORT_DETAIL = "https://www.checkee.info/main.php?dispdate=";
|
||||
public static final String URL_CHECKEE_VISA_DETAIL = "https://checkee.info/personal_detail.php?casenum=";
|
||||
public static final String URL_CHECKEE_VISA_UPDATE = "https://checkee.info/update.php?casenum=";
|
||||
private final VisaCheckeeCrawlHtmlRepository visaCheckeeCrawlHtmlRepository;
|
||||
|
||||
|
||||
private final CrawlCacheRepository crawlCacheRepository;
|
||||
private final CrawlConfig crawlConfig;
|
||||
private final AwsSysFileService awsSysFileService;
|
||||
|
||||
|
||||
private Integer crawlInterval = 2000;
|
||||
|
||||
@Autowired
|
||||
public CrawlService(VisaCheckeeCrawlHtmlRepository visaCheckeeCrawlHtmlRepository, CrawlConfig crawlConfig) {
|
||||
this.visaCheckeeCrawlHtmlRepository = visaCheckeeCrawlHtmlRepository;
|
||||
public CrawlService(
|
||||
CrawlCacheRepository crawlCacheRepository, CrawlConfig crawlConfig,
|
||||
AwsSysFileService awsSysFileService) {
|
||||
|
||||
this.crawlCacheRepository = crawlCacheRepository;
|
||||
this.crawlConfig = crawlConfig;
|
||||
this.awsSysFileService = awsSysFileService;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -60,7 +60,7 @@ public class CrawlService {
|
|||
* @return {@link Document}
|
||||
* @throws IOException ioexception
|
||||
*/
|
||||
public Document getDocument(String url) throws IOException, InterruptedException {
|
||||
public Document getDocument(String url) throws InterruptedException {
|
||||
Document document = null;
|
||||
log.info("Crawl url: {},Crawl config {}", url, this.crawlConfig);
|
||||
int i = 0;
|
||||
|
@ -90,153 +90,106 @@ public class CrawlService {
|
|||
return document;
|
||||
}
|
||||
|
||||
public CrawlHtml crawlHtmlByCrawlUrl(String crawlUrl) throws IOException,
|
||||
InterruptedException {
|
||||
|
||||
CrawlHtml crawlHtml = new CrawlHtml();
|
||||
|
||||
Date crawlDate = new Date();
|
||||
|
||||
crawlHtml.setUrl(crawlUrl);
|
||||
|
||||
Integer crawlCacheDay = caculateCrawlCacheDay(crawlUrl);
|
||||
|
||||
log.info("Start to read crawl url : {} from db", crawlUrl, crawlDate);
|
||||
|
||||
Optional<VisaCheckeeCrawlHtml> latestCrawlHtml =
|
||||
visaCheckeeCrawlHtmlRepository.findLatestCrawlHtml(crawlUrl);
|
||||
|
||||
if (latestCrawlHtml.isPresent() && DateUtil.offsetDay(latestCrawlHtml.get().getCrawlDate(), crawlCacheDay).after(crawlDate)) {
|
||||
log.info("crawl url cache in database,use db value");
|
||||
crawlHtml.setCrawlTime(latestCrawlHtml.get().getUpdateTime());
|
||||
crawlHtml.setHtml(latestCrawlHtml.get().getContent());
|
||||
return crawlHtml;
|
||||
}
|
||||
|
||||
Long crawlId = null;
|
||||
|
||||
if (latestCrawlHtml.isPresent()) {
|
||||
crawlId = latestCrawlHtml.get().getId();
|
||||
}
|
||||
|
||||
log.info("Start to crawl url {} at {}", crawlUrl, crawlDate);
|
||||
|
||||
String content = "";
|
||||
public CrawlHtml crawlHtml(String crawlUrl) {
|
||||
|
||||
try {
|
||||
Document doc = getDocument(crawlUrl);
|
||||
if (!doc.select("title").text().equals("One moment, please...")) {
|
||||
content = doc.html();
|
||||
CrawlToOssRule crawlToOssRule = CrawlToOssRule.of(AwsSysFileService.DEFAULT_BUCKET_NAME, crawlUrl);
|
||||
|
||||
CrawlHtml crawlHtml = new CrawlHtml();
|
||||
|
||||
crawlHtml.setUrl(crawlUrl);
|
||||
|
||||
Optional<CrawlCache> latestCrawlHtml =
|
||||
crawlCacheRepository.findLatestCrawlCache(crawlUrl);
|
||||
|
||||
// 从数据库中读取到缓存html,并且没有过期
|
||||
if (latestCrawlHtml.isPresent() && !crawlToOssRule.checkIsExpired(latestCrawlHtml.get().getCrawlDate())) {
|
||||
log.info("crawl url cache in database,use db value,cache id is {}", latestCrawlHtml.get().getId());
|
||||
crawlHtml.setCrawlTime(latestCrawlHtml.get().getUpdateTime());
|
||||
String html = ReadFromOss(latestCrawlHtml.get().getOssPath(), crawlToOssRule);
|
||||
crawlHtml.setHtml(html);
|
||||
return crawlHtml;
|
||||
}
|
||||
|
||||
Long crawlId = null;
|
||||
|
||||
if (latestCrawlHtml.isPresent()) {
|
||||
crawlId = latestCrawlHtml.get().getId();
|
||||
}
|
||||
|
||||
log.info("Start to crawl url {} at {}", crawlUrl, crawlToOssRule.getUrl());
|
||||
|
||||
String content = "";
|
||||
|
||||
try {
|
||||
Document doc = getDocument(crawlUrl);
|
||||
if (!doc.select("title").text().equals("One moment, please...")) {
|
||||
content = doc.html();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage());
|
||||
throw e;
|
||||
}
|
||||
|
||||
if (StringUtils.isNotBlank(content)) {
|
||||
SaveCacheHtml(content, crawlId, crawlToOssRule);
|
||||
}
|
||||
|
||||
Integer sleepTime = this.crawlInterval;
|
||||
|
||||
log.info("Crawl url {} success, sleep {} ms", crawlUrl, sleepTime);
|
||||
|
||||
Thread.sleep(sleepTime);
|
||||
|
||||
crawlHtml.setCrawlTime(crawlToOssRule.getCrawlDate());
|
||||
crawlHtml.setHtml(content);
|
||||
|
||||
return crawlHtml;
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage());
|
||||
throw e;
|
||||
}
|
||||
|
||||
if (StringUtils.isNotBlank(content)) {
|
||||
SaveHtml(content, crawlUrl, crawlDate, crawlId, crawlCacheDay);
|
||||
}
|
||||
|
||||
Integer sleepTime = this.crawlInterval;
|
||||
|
||||
log.info("Crawl url {} success, sleep {} ms", crawlUrl, sleepTime);
|
||||
|
||||
Thread.sleep(sleepTime);
|
||||
|
||||
crawlHtml.setCrawlTime(crawlDate);
|
||||
crawlHtml.setHtml(content);
|
||||
|
||||
return crawlHtml;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 计算页面缓存天数
|
||||
* 如果是主页面报表 缓存一天
|
||||
* 如果是月份列表 1-2月内的缓存一天, 3-6 月份的缓存3天, 6月-2年以上的缓存7天,2年以上-6年的缓存30天,6年以上的缓存3年
|
||||
* 个人信息明细页面缓存 1年
|
||||
*
|
||||
* @param crawlUrl 爬行url
|
||||
* @return {@link Integer}
|
||||
*/
|
||||
private Integer caculateCrawlCacheDay(String crawlUrl) {
|
||||
if (crawlUrl.contains(URL_CHECKEE_REPORT)) {
|
||||
return 1;
|
||||
} else if (crawlUrl.contains(URL_CHECKEE_VISA_DETAIL) || crawlUrl.contains(URL_CHECKEE_VISA_UPDATE)) {
|
||||
return 365;
|
||||
} else if (crawlUrl.contains(URL_CHECKEE_REPORT_DETAIL)) {
|
||||
String month = crawlUrl.substring(crawlUrl.lastIndexOf("=") + 1);
|
||||
LocalDate localDate = LocalDate.parse(month + "-01");
|
||||
LocalDate now = LocalDate.now();
|
||||
int monthDiff =
|
||||
now.getYear() * 12 + now.getMonthValue() - (localDate.getYear() * 12 + localDate.getMonthValue());
|
||||
if (monthDiff <= 2) {
|
||||
return 1;
|
||||
} else if (monthDiff <= 6) {
|
||||
return 7;
|
||||
} else if (monthDiff <= 24) {
|
||||
return 30;
|
||||
} else if (monthDiff <= 72) {
|
||||
return 365;
|
||||
} else {
|
||||
return 3 * 365;
|
||||
}
|
||||
} else {
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
@Transactional
|
||||
private void SaveHtml(String html, String crawlUrl, Date crawlDate, Long crawlId, Integer crawlCacheDay) {
|
||||
|
||||
VisaCheckeeCrawlHtml visaCheckeeCrawlHtml = new VisaCheckeeCrawlHtml();
|
||||
if (crawlId != null) {
|
||||
visaCheckeeCrawlHtml =
|
||||
visaCheckeeCrawlHtmlRepository.findById(crawlId).orElse(new VisaCheckeeCrawlHtml());
|
||||
}
|
||||
visaCheckeeCrawlHtml.setCrawlKey(crawlUrl);
|
||||
visaCheckeeCrawlHtml.setCrawlDate(crawlDate);
|
||||
visaCheckeeCrawlHtml.setContent(html);
|
||||
visaCheckeeCrawlHtml.setCacheDays(crawlCacheDay);
|
||||
log.info("Before save crawl Key {} length {}", visaCheckeeCrawlHtml.getCrawlKey(),
|
||||
visaCheckeeCrawlHtml.getCrawlKey().length());
|
||||
visaCheckeeCrawlHtmlRepository.saveAndFlush(visaCheckeeCrawlHtml);
|
||||
}
|
||||
|
||||
public List<VisaReportCheckeeData> parseCheckeeReport(String crawlContent) {
|
||||
Document doc = Jsoup.parse(crawlContent);
|
||||
Elements tables = doc.select("table");
|
||||
Element table = tables.get(2);
|
||||
Elements trs = table.select("tr");
|
||||
List<VisaReportCheckeeData> visaReportCheckees = new ArrayList<>();
|
||||
for (int i = 1; i < trs.size(); i++) {
|
||||
VisaReportCheckeeData visaReportCheckee = parseTrToCheckeeReport(trs.get(i));
|
||||
visaReportCheckees.add(visaReportCheckee);
|
||||
}
|
||||
return visaReportCheckees;
|
||||
}
|
||||
|
||||
|
||||
private VisaReportCheckeeData parseTrToCheckeeReport(Element tr) {
|
||||
VisaReportCheckeeData visaReportCheckee = new VisaReportCheckeeData();
|
||||
Elements tds = tr.select("td");
|
||||
visaReportCheckee.setMonth(tds.get(1).text());
|
||||
visaReportCheckee.setPendingCaseCount(Integer.parseInt(tds.get(2).text()));
|
||||
visaReportCheckee.setClearCaseCount(Integer.parseInt(tds.get(3).text()));
|
||||
visaReportCheckee.setRejectCaseCount(Integer.parseInt(tds.get(4).text()));
|
||||
visaReportCheckee.setTotalCaseCount(Integer.parseInt(tds.get(5).text()));
|
||||
visaReportCheckee.setAveWaitingDaysForCompleteCases(parseAveWaitingDaysForCompleteCasesTd(tds.get(6)));
|
||||
return visaReportCheckee;
|
||||
}
|
||||
|
||||
private Integer parseAveWaitingDaysForCompleteCasesTd(Element td) {
|
||||
if (td.text().equals("-") || td.text().equals("")) {
|
||||
log.error("Error when crawlHtml: {}", e.getMessage());
|
||||
return null;
|
||||
} else {
|
||||
return Integer.parseInt(td.text());
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
private String ReadFromOss(String ossPath, CrawlToOssRule crawlToOssRule) {
|
||||
String html = "";
|
||||
try {
|
||||
html = awsSysFileService.readHtmlFile(crawlToOssRule.getFileName(), crawlToOssRule.getBucket(),
|
||||
crawlToOssRule.getFolder());
|
||||
} catch (Exception e) {
|
||||
log.error("Read from oss error: {}", e.getMessage());
|
||||
}
|
||||
return html;
|
||||
}
|
||||
|
||||
private void SaveCacheHtml(String html, Long crawlId, CrawlToOssRule crawlToOssRule) throws IOException {
|
||||
|
||||
saveHtmlToOss(html, crawlToOssRule);
|
||||
|
||||
CrawlCache crawlCache = new CrawlCache();
|
||||
if (crawlId != null) {
|
||||
crawlCache =
|
||||
crawlCacheRepository.findById(crawlId).orElse(new CrawlCache());
|
||||
}
|
||||
crawlCache.setCrawlKey(crawlToOssRule.getUrl());
|
||||
crawlCache.setCrawlDate(crawlToOssRule.getCrawlDate());
|
||||
crawlCache.setOssPath(crawlToOssRule.getOssPath());
|
||||
crawlCache.setCacheDays(crawlToOssRule.getCacheDays());
|
||||
crawlCacheRepository.save(crawlCache);
|
||||
}
|
||||
|
||||
private void saveHtmlToOss(String html, CrawlToOssRule crawlToOssRule) throws IOException {
|
||||
this.awsSysFileService.uploadHtmlFile(
|
||||
crawlToOssRule.getFileName(),
|
||||
crawlToOssRule.getBucket(),
|
||||
crawlToOssRule.getFolder(),
|
||||
html,
|
||||
crawlToOssRule.getUrl());
|
||||
}
|
||||
|
||||
|
||||
public List<VisaCheckeeData> parseCheckeeVisa(String crawlCheckeeVisaList) {
|
||||
Document doc = Jsoup.parse(crawlCheckeeVisaList);
|
||||
Elements tables = doc.select("table");
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
package com.northtecom.visatrack.api.service.impl;
|
||||
|
||||
import com.northtecom.visatrack.api.base.data.BaseEntity;
|
||||
import com.northtecom.visatrack.api.base.exception.BaseException;
|
||||
import com.northtecom.visatrack.api.base.util.Action;
|
||||
import com.northtecom.visatrack.api.base.web.Status;
|
||||
import com.northtecom.visatrack.api.controller.vo.VisaCaseSearch;
|
||||
import com.northtecom.visatrack.api.controller.vo.VisaSubmitRequest;
|
||||
|
@ -8,16 +10,20 @@ import com.northtecom.visatrack.api.controller.vo.VisaTrackUserDetail;
|
|||
import com.northtecom.visatrack.api.data.entity.CaseVisaReport;
|
||||
import com.northtecom.visatrack.api.data.entity.User;
|
||||
import com.northtecom.visatrack.api.data.entity.VisaCase;
|
||||
import com.northtecom.visatrack.api.data.entity.VisaCheckeeCrawlData;
|
||||
import com.northtecom.visatrack.api.data.repository.CaseVisaReportRepository;
|
||||
import com.northtecom.visatrack.api.data.repository.UserRepository;
|
||||
import com.northtecom.visatrack.api.data.repository.VisaCaseRepository;
|
||||
import com.northtecom.visatrack.api.data.repository.VisaCheckeeCrawlDataRepository;
|
||||
import com.northtecom.visatrack.api.data.spec.DateRange;
|
||||
import com.northtecom.visatrack.api.data.spec.VisaCaseSpecification;
|
||||
import com.northtecom.visatrack.api.service.dto.CaseAvgWaitDayReport;
|
||||
import com.northtecom.visatrack.api.service.dto.CaseStatusSummaryReport;
|
||||
import com.northtecom.visatrack.api.service.dto.CrawlHtml;
|
||||
import com.northtecom.visatrack.api.service.dto.VisaReportCheckeeData;
|
||||
import com.northtecom.visatrack.api.service.enums.VisaEntry;
|
||||
import com.northtecom.visatrack.api.service.enums.VisaStatus;
|
||||
import com.northtecom.visatrack.api.service.rules.CrawlToOssRule;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
|
@ -29,7 +35,6 @@ import org.springframework.security.access.prepost.PreAuthorize;
|
|||
import org.springframework.security.core.Authentication;
|
||||
import org.springframework.security.core.context.SecurityContextHolder;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.util.StringUtils;
|
||||
|
||||
import java.time.Instant;
|
||||
|
@ -41,6 +46,7 @@ import java.util.ArrayList;
|
|||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
|
@ -56,14 +62,19 @@ public class VisaCaseService {
|
|||
private final VisaCaseRepository visaCaseRepository;
|
||||
private final CaseVisaReportRepository caseVisaReportRepository;
|
||||
private final UserRepository userRepository;
|
||||
private final CrawlService crawlService;
|
||||
private final VisaCheckeeCrawlDataRepository visaCheckeeCrawlDataRepository;
|
||||
|
||||
|
||||
@Autowired
|
||||
public VisaCaseService(VisaCaseRepository visaCaseRepository, VisaReportCheckeeService visaReportCheckeeService,
|
||||
CaseVisaReportRepository caseVisaReportRepository, UserRepository userRepository) {
|
||||
public VisaCaseService(VisaCaseRepository visaCaseRepository,
|
||||
CaseVisaReportRepository caseVisaReportRepository, UserRepository userRepository,
|
||||
CrawlService crawlService, VisaCheckeeCrawlDataRepository visaCheckeeCrawlDataRepository) {
|
||||
this.visaCaseRepository = visaCaseRepository;
|
||||
this.caseVisaReportRepository = caseVisaReportRepository;
|
||||
this.userRepository = userRepository;
|
||||
this.crawlService = crawlService;
|
||||
this.visaCheckeeCrawlDataRepository = visaCheckeeCrawlDataRepository;
|
||||
}
|
||||
|
||||
|
||||
|
@ -213,7 +224,6 @@ public class VisaCaseService {
|
|||
|
||||
}
|
||||
|
||||
@Transactional
|
||||
private void saveCaseVisaReport(VisaReportCheckeeData visaReportCheckeeData) {
|
||||
Optional<CaseVisaReport> caseVisaReport =
|
||||
caseVisaReportRepository.findByMonth(visaReportCheckeeData.getMonth());
|
||||
|
@ -406,4 +416,171 @@ public class VisaCaseService {
|
|||
|
||||
return visaCaseRepository.queryFavouredVisaCaseByUserId(userDetail.getId());
|
||||
}
|
||||
|
||||
public void importNewCrawlData() {
|
||||
List<VisaCheckeeCrawlData> newCrawlData =
|
||||
this.visaCheckeeCrawlDataRepository.QueryNewCrawlData();
|
||||
|
||||
getCrawlDataDetail(newCrawlData);
|
||||
|
||||
List<VisaCase> visaCaseList = new ArrayList<>();
|
||||
|
||||
for (int i = 0; i < newCrawlData.size(); i++) {
|
||||
|
||||
VisaCheckeeCrawlData crawlData = newCrawlData.get(i);
|
||||
|
||||
VisaCase savedVisaCase = saveCheckeeVisaDataToVisa(crawlData);
|
||||
|
||||
if (savedVisaCase != null) {
|
||||
visaCaseList.add(savedVisaCase);
|
||||
}
|
||||
|
||||
log.info("Crawl checkee visa data: {}/{}", i + 1, savedVisaCase);
|
||||
}
|
||||
|
||||
log.info("Start to batch save crawl checkee visa data total: {}", visaCaseList.size());
|
||||
|
||||
BatchSaveVisaCase(visaCaseList);
|
||||
|
||||
|
||||
}
|
||||
|
||||
private void getCrawlDataDetail(List<VisaCheckeeCrawlData> notDetailData) {
|
||||
log.info("Start to save crawl checkee not detail visa data total: {}", notDetailData.size());
|
||||
|
||||
for (int i = 0; i < notDetailData.size(); i++) {
|
||||
log.info("Start to save crawl checkee not detail visa data: {}/{}", i + 1, notDetailData.size());
|
||||
|
||||
VisaCheckeeCrawlData visaCheckeeCrawlData = notDetailData.get(i);
|
||||
|
||||
try {
|
||||
CrawlHtml crawlCheckeeVisaDetail =
|
||||
this.crawlService.crawlHtml(CrawlToOssRule.URL_CHECKEE_VISA_DETAIL + visaCheckeeCrawlData.getCaseNum());
|
||||
this.crawlService.parseCheckeeVisaDetailDataAndFill(crawlCheckeeVisaDetail.getHtml(),
|
||||
visaCheckeeCrawlData);
|
||||
} catch (Exception e) {
|
||||
log.error("Crawl checkee visa detail error: caseNum : {} error: {}", visaCheckeeCrawlData.getCaseNum()
|
||||
, e.getMessage());
|
||||
}
|
||||
|
||||
try {
|
||||
CrawlHtml crawlCheckeeVisaUpdate =
|
||||
this.crawlService.crawlHtml(CrawlToOssRule.URL_CHECKEE_VISA_UPDATE + visaCheckeeCrawlData.getCaseNum());
|
||||
this.crawlService.parseCheckeeVisaUpdateDataAndFill(crawlCheckeeVisaUpdate.getHtml(),
|
||||
visaCheckeeCrawlData);
|
||||
} catch (Exception e) {
|
||||
log.error("Crawl checkee visa update email error: caseNum : {} error: {}",
|
||||
visaCheckeeCrawlData.getCaseNum(), e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
log.info("End to save crawl checkee not detail visa data total: {}", notDetailData.size());
|
||||
|
||||
batchSaveDataItems(
|
||||
notDetailData,
|
||||
50,
|
||||
visaCheckeeCrawlDataRepository::saveAllAndFlush,
|
||||
visaCheckeeCrawlDataRepository::saveAndFlush,
|
||||
visaCheckeeCrawlDataRepository::flush);
|
||||
}
|
||||
|
||||
private <T extends BaseEntity<Long>> void batchSaveDataItems(List<T> saveItems, Integer batchSize,
|
||||
Function<List<T>, List<T>> saveFunction,
|
||||
Function<T, T> saveSingle, Action flushAction) {
|
||||
|
||||
List<T> batchItems = new ArrayList<>();
|
||||
for (int i = 0; i < saveItems.size(); i++) {
|
||||
batchItems.add(saveItems.get(i));
|
||||
if (batchItems.size() >= batchSize) {
|
||||
batchSaveDataItem(batchItems, saveFunction, saveSingle, flushAction);
|
||||
}
|
||||
}
|
||||
if (batchItems.size() > 0) {
|
||||
batchSaveDataItem(batchItems, saveFunction, saveSingle, flushAction);
|
||||
}
|
||||
}
|
||||
|
||||
private <T extends BaseEntity<Long>> void batchSaveDataItem(List<T> batchItems, Function<List<T>, List<T>> saveAll,
|
||||
Function<T, T> saveSingle, Action flushAction) {
|
||||
try {
|
||||
saveAll.apply(batchItems);
|
||||
} catch (Exception e) {
|
||||
log.error("Batch save items error: {}", e.getMessage());
|
||||
flushAction.accept();
|
||||
for (T batchItem : batchItems) {
|
||||
try {
|
||||
saveSingle.apply(batchItem);
|
||||
} catch (Exception e1) {
|
||||
log.error("Save item error: {},Id is : {}", e1.getMessage(),
|
||||
batchItem.getId());
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
batchItems.clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void BatchSaveVisaCase(List<VisaCase> visaCaseList) {
|
||||
List<VisaCase> batchItems = new ArrayList<>();
|
||||
for (int i = 0; i < visaCaseList.size(); i++) {
|
||||
batchItems.add(visaCaseList.get(i));
|
||||
if (batchItems.size() >= 50) {
|
||||
batchSave(batchItems);
|
||||
}
|
||||
}
|
||||
if (batchItems.size() > 0) {
|
||||
batchSave(batchItems);
|
||||
}
|
||||
}
|
||||
|
||||
private void batchSave(List<VisaCase> batchItems) {
|
||||
try {
|
||||
visaCaseRepository.saveAll(batchItems);
|
||||
} catch (Exception e) {
|
||||
log.error("Batch save visa case error: {}", e.getMessage());
|
||||
for (VisaCase visaCase : batchItems) {
|
||||
try {
|
||||
visaCaseRepository.save(visaCase);
|
||||
} catch (Exception e1) {
|
||||
log.error("Save visa case error: {},Case # : {}", e1.getMessage(),
|
||||
visaCase.getRefCrawlCaseNumber());
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
batchItems.clear();
|
||||
}
|
||||
}
|
||||
|
||||
public VisaCase saveCheckeeVisaDataToVisa(VisaCheckeeCrawlData visaCheckeeCrawlData) {
|
||||
VisaCase saveVisaCase = new VisaCase();
|
||||
if (StringUtils.hasText(visaCheckeeCrawlData.getCaseNum())) {
|
||||
Optional<VisaCase> visaCase =
|
||||
visaCaseRepository.findByRefCaseNum(visaCheckeeCrawlData.getCaseNum());
|
||||
if (visaCase.isPresent()) {
|
||||
saveVisaCase = visaCase.get();
|
||||
}
|
||||
}
|
||||
saveVisaCase.setUserName(visaCheckeeCrawlData.getUserId());
|
||||
saveVisaCase.setUserEmail(visaCheckeeCrawlData.getPartEmail());
|
||||
saveVisaCase.setVisaCategory(visaCheckeeCrawlData.getVisaType());
|
||||
saveVisaCase.setVisaStatus(VisaStatus.valueOf(visaCheckeeCrawlData.getStatus()));
|
||||
saveVisaCase.setEmbassyConsulate(visaCheckeeCrawlData.getConsulate());
|
||||
try {
|
||||
saveVisaCase.setVisaEntry(VisaEntry.valueOf(visaCheckeeCrawlData.getVisaEntry()));
|
||||
} catch (Exception e) {
|
||||
log.error("Parse check date error: {}, error: {}", visaCheckeeCrawlData.getCheckDate(), e.getMessage());
|
||||
saveVisaCase.setVisaEntry(VisaEntry.New);
|
||||
}
|
||||
saveVisaCase.setMajor(visaCheckeeCrawlData.getMajor());
|
||||
saveVisaCase.setDateVisaInterview(visaCheckeeCrawlData.getCheckDate());
|
||||
saveVisaCase.setDateVisaCheckCompleted(visaCheckeeCrawlData.getCompleteDate());
|
||||
saveVisaCase.setNote(visaCheckeeCrawlData.getNote());
|
||||
saveVisaCase.setRefCrawlCaseNumber(visaCheckeeCrawlData.getCaseNum());
|
||||
saveVisaCase.setCrawled(true);
|
||||
saveVisaCase.setCrawledTime(visaCheckeeCrawlData.getCrawlTime());
|
||||
return saveVisaCase;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -12,11 +12,8 @@ import com.northtecom.visatrack.api.data.repository.VisaCaseRepository;
|
|||
import com.northtecom.visatrack.api.data.repository.VisaCheckeeCrawlDataRepository;
|
||||
import com.northtecom.visatrack.api.data.spec.DateRange;
|
||||
import com.northtecom.visatrack.api.service.dto.*;
|
||||
import com.northtecom.visatrack.api.service.enums.VisaEntry;
|
||||
import com.northtecom.visatrack.api.service.enums.VisaStatus;
|
||||
import lombok.SneakyThrows;
|
||||
import com.northtecom.visatrack.api.service.rules.CrawlToOssRule;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.ObjectUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.StringUtils;
|
||||
|
@ -46,16 +43,19 @@ public class VisaReportCheckeeService {
|
|||
private final VisaCheckeeCrawlDataRepository visaCheckeeCrawlDataRepository;
|
||||
private final VisaCaseRepository visaCaseRepository;
|
||||
|
||||
private final VisaCaseService visaCaseService;
|
||||
|
||||
|
||||
@Autowired
|
||||
public VisaReportCheckeeService(CrawlService crawlService,
|
||||
CaseVisaReportRepository caseVisaReportRepository,
|
||||
VisaCheckeeCrawlDataRepository visaCheckeeCrawlDataRepository,
|
||||
VisaCaseRepository visaCaseRepository) {
|
||||
VisaCaseRepository visaCaseRepository, VisaCaseService visaCaseService) {
|
||||
this.crawlService = crawlService;
|
||||
this.caseVisaReportRepository = caseVisaReportRepository;
|
||||
this.visaCheckeeCrawlDataRepository = visaCheckeeCrawlDataRepository;
|
||||
this.visaCaseRepository = visaCaseRepository;
|
||||
this.visaCaseService = visaCaseService;
|
||||
}
|
||||
|
||||
//Iterable 转 List
|
||||
|
@ -65,17 +65,6 @@ public class VisaReportCheckeeService {
|
|||
return list;
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public void crawlCheckeeReport() {
|
||||
CrawlHtml crawlContent = this.crawlService.crawlHtmlByCrawlUrl(CrawlService.URL_CHECKEE_REPORT);
|
||||
|
||||
if (ObjectUtils.isNotEmpty(crawlContent)) {
|
||||
List<VisaReportCheckeeData> reportCheckeeData =
|
||||
this.crawlService.parseCheckeeReport(crawlContent.getHtml());
|
||||
log.info("Crawl checkee report size: {}", reportCheckeeData.size());
|
||||
saveCheckeeReport(reportCheckeeData);
|
||||
}
|
||||
}
|
||||
|
||||
private void saveCheckeeReport(List<VisaReportCheckeeData> reportCheckeeData) {
|
||||
if (reportCheckeeData.size() == 0) {
|
||||
|
@ -142,7 +131,7 @@ public class VisaReportCheckeeService {
|
|||
|
||||
|
||||
CrawlHtml crawlCheckeeVisaList =
|
||||
this.crawlService.crawlHtmlByCrawlUrl(CrawlService.URL_CHECKEE_REPORT_DETAIL + visaCrawlRequest.getDateKey());
|
||||
this.crawlService.crawlHtml(CrawlToOssRule.URL_CHECKEE_REPORT_DETAIL + visaCrawlRequest.getDateKey());
|
||||
List<VisaCheckeeData> visaCheckeeCrawlCrawlDataList =
|
||||
this.crawlService.parseCheckeeVisa(crawlCheckeeVisaList.getHtml());
|
||||
|
||||
|
@ -172,7 +161,7 @@ public class VisaReportCheckeeService {
|
|||
if (!hasPersonalDetailData && visaCrawlRequest.getCrawlDetail()) {
|
||||
try {
|
||||
CrawlHtml crawlCheckeeVisaDetail =
|
||||
this.crawlService.crawlHtmlByCrawlUrl(CrawlService.URL_CHECKEE_VISA_DETAIL + visaCheckeeData.getCaseNum());
|
||||
this.crawlService.crawlHtml(CrawlToOssRule.URL_CHECKEE_VISA_DETAIL + visaCheckeeData.getCaseNum());
|
||||
this.crawlService.parseCheckeeVisaDetailDataAndFill(crawlCheckeeVisaDetail.getHtml(),
|
||||
visaCheckeeData);
|
||||
} catch (Exception e) {
|
||||
|
@ -182,7 +171,7 @@ public class VisaReportCheckeeService {
|
|||
|
||||
try {
|
||||
CrawlHtml crawlCheckeeVisaUpdate =
|
||||
this.crawlService.crawlHtmlByCrawlUrl(CrawlService.URL_CHECKEE_VISA_UPDATE + visaCheckeeData.getCaseNum());
|
||||
this.crawlService.crawlHtml(CrawlToOssRule.URL_CHECKEE_VISA_UPDATE + visaCheckeeData.getCaseNum());
|
||||
this.crawlService.parseCheckeeVisaUpdateDataAndFill(crawlCheckeeVisaUpdate.getHtml(),
|
||||
visaCheckeeData);
|
||||
} catch (Exception e) {
|
||||
|
@ -203,6 +192,7 @@ public class VisaReportCheckeeService {
|
|||
}
|
||||
|
||||
log.info("Start to batch save crawl checkee visa data total: {}", allSavedVisaCaseList.size());
|
||||
|
||||
visaCheckeeCrawlDataRepository.saveAll(allSavedVisaCaseList);
|
||||
|
||||
}
|
||||
|
@ -281,65 +271,11 @@ public class VisaReportCheckeeService {
|
|||
return saveVisaCheckeeCrawlData;
|
||||
}
|
||||
|
||||
private void saveCheckeeVisa(VisaCheckeeData visaCheckeeData,
|
||||
List<VisaCheckeeCrawlData> visaReportCheckeeCrawlDataList, Date crawlTime,
|
||||
String month) {
|
||||
|
||||
VisaCheckeeCrawlData findCrawlData = visaReportCheckeeCrawlDataList.stream()
|
||||
.filter(visaCheckeeCrawlData -> visaCheckeeCrawlData.getCaseNum().equals(visaCheckeeData.getCaseNum()))
|
||||
.findFirst().orElse(null);
|
||||
|
||||
VisaCheckeeCrawlData saveVisaCheckeeCrawlData = new VisaCheckeeCrawlData();
|
||||
|
||||
if (findCrawlData == null) {
|
||||
Optional<VisaCheckeeCrawlData> visaCheckeeCrawlData =
|
||||
visaCheckeeCrawlDataRepository.findByCaseNum(visaCheckeeData.getCaseNum());
|
||||
|
||||
if (visaCheckeeCrawlData.isPresent()) {
|
||||
saveVisaCheckeeCrawlData = visaCheckeeCrawlData.get();
|
||||
}
|
||||
} else {
|
||||
saveVisaCheckeeCrawlData = findCrawlData;
|
||||
}
|
||||
|
||||
// 爬取时间1小时以内不做更新
|
||||
if (saveVisaCheckeeCrawlData.getCrawlTime() != null && DateUtil.offsetHour(saveVisaCheckeeCrawlData.getCrawlTime(), 1).after(crawlTime)) {
|
||||
return;
|
||||
}
|
||||
|
||||
saveVisaCheckeeCrawlData.setCrawlTime(crawlTime);
|
||||
saveVisaCheckeeCrawlData.setMonth(month);
|
||||
saveVisaCheckeeCrawlData.setCaseNum(visaCheckeeData.getCaseNum());
|
||||
saveVisaCheckeeCrawlData.setUserId(visaCheckeeData.getUserId());
|
||||
saveVisaCheckeeCrawlData.setVisaType(visaCheckeeData.getVisaType());
|
||||
saveVisaCheckeeCrawlData.setVisaEntry(visaCheckeeData.getVisaEntry());
|
||||
saveVisaCheckeeCrawlData.setConsulate(visaCheckeeData.getConsulate());
|
||||
saveVisaCheckeeCrawlData.setMajor(visaCheckeeData.getMajor());
|
||||
saveVisaCheckeeCrawlData.setStatus(visaCheckeeData.getStatus());
|
||||
saveVisaCheckeeCrawlData.setCheckDate(visaCheckeeData.getCheckDate());
|
||||
saveVisaCheckeeCrawlData.setCompleteDate(visaCheckeeData.getCompleteDate());
|
||||
saveVisaCheckeeCrawlData.setNote(visaCheckeeData.getNote());
|
||||
|
||||
saveVisaCheckeeCrawlData.setPartEmail(visaCheckeeData.getPartEmail());
|
||||
saveVisaCheckeeCrawlData.setFirstName(visaCheckeeData.getFirstName());
|
||||
saveVisaCheckeeCrawlData.setLastName(visaCheckeeData.getLastName());
|
||||
saveVisaCheckeeCrawlData.setUniversity(visaCheckeeData.getUniversity());
|
||||
saveVisaCheckeeCrawlData.setDegree(visaCheckeeData.getDegree());
|
||||
saveVisaCheckeeCrawlData.setJobTitle(visaCheckeeData.getJobTitle());
|
||||
saveVisaCheckeeCrawlData.setEmployer(visaCheckeeData.getEmployer());
|
||||
saveVisaCheckeeCrawlData.setYearsInUsa(visaCheckeeData.getYearsInUsa());
|
||||
saveVisaCheckeeCrawlData.setCountry(visaCheckeeData.getCountry());
|
||||
|
||||
|
||||
try {
|
||||
visaCheckeeCrawlDataRepository.saveAndFlush(saveVisaCheckeeCrawlData);
|
||||
} catch (Exception e) {
|
||||
log.error("Save checkee visa error: caseNum : {} error: {}", visaCheckeeData.getCaseNum(), e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public void importCheckeeVisaCrawlData(LocalDate startDate, LocalDate endDate) {
|
||||
|
||||
log.info("Start to import checkee visa crawl data from {} to {}", startDate, endDate);
|
||||
|
||||
List<VisaCheckeeCrawlData> visaCheckeeCrawlDataList =
|
||||
visaCheckeeCrawlDataRepository.findAllNotImportData(startDate, endDate);
|
||||
|
||||
|
@ -349,7 +285,7 @@ public class VisaReportCheckeeService {
|
|||
log.info("Start to import checkee visa data: {}/{}", i + 1, visaCheckeeCrawlDataList.size());
|
||||
|
||||
try {
|
||||
batchItems.add(saveCheckeeVisaDataToVisa(visaCheckeeCrawlDataList.get(i)));
|
||||
batchItems.add(visaCaseService.saveCheckeeVisaDataToVisa(visaCheckeeCrawlDataList.get(i)));
|
||||
} catch (Exception e) {
|
||||
log.error("Save checkee visa data to visa error: {},case# {}", e.getMessage(),
|
||||
visaCheckeeCrawlDataList.get(i).getCaseNum());
|
||||
|
@ -385,41 +321,15 @@ public class VisaReportCheckeeService {
|
|||
}
|
||||
|
||||
|
||||
private VisaCase saveCheckeeVisaDataToVisa(VisaCheckeeCrawlData visaCheckeeCrawlData) {
|
||||
VisaCase saveVisaCase = new VisaCase();
|
||||
if (StringUtils.hasText(visaCheckeeCrawlData.getCaseNum())) {
|
||||
Optional<VisaCase> visaCase =
|
||||
visaCaseRepository.findByRefCaseNum(visaCheckeeCrawlData.getCaseNum());
|
||||
if (visaCase.isPresent()) {
|
||||
saveVisaCase = visaCase.get();
|
||||
}
|
||||
}
|
||||
saveVisaCase.setUserName(visaCheckeeCrawlData.getUserId());
|
||||
saveVisaCase.setUserEmail(visaCheckeeCrawlData.getPartEmail());
|
||||
saveVisaCase.setVisaCategory(visaCheckeeCrawlData.getVisaType());
|
||||
saveVisaCase.setVisaStatus(VisaStatus.valueOf(visaCheckeeCrawlData.getStatus()));
|
||||
saveVisaCase.setEmbassyConsulate(visaCheckeeCrawlData.getConsulate());
|
||||
try {
|
||||
saveVisaCase.setVisaEntry(VisaEntry.valueOf(visaCheckeeCrawlData.getVisaEntry()));
|
||||
} catch (Exception e) {
|
||||
log.error("Parse check date error: {}, error: {}", visaCheckeeCrawlData.getCheckDate(), e.getMessage());
|
||||
saveVisaCase.setVisaEntry(VisaEntry.New);
|
||||
}
|
||||
saveVisaCase.setMajor(visaCheckeeCrawlData.getMajor());
|
||||
saveVisaCase.setDateVisaInterview(visaCheckeeCrawlData.getCheckDate());
|
||||
saveVisaCase.setDateVisaCheckCompleted(visaCheckeeCrawlData.getCompleteDate());
|
||||
saveVisaCase.setNote(visaCheckeeCrawlData.getNote());
|
||||
saveVisaCase.setRefCrawlCaseNumber(visaCheckeeCrawlData.getCaseNum());
|
||||
saveVisaCase.setCrawled(true);
|
||||
saveVisaCase.setCrawledTime(visaCheckeeCrawlData.getCrawlTime());
|
||||
return saveVisaCase;
|
||||
}
|
||||
|
||||
public void crawlNotDetailData() {
|
||||
|
||||
List<VisaCheckeeCrawlData> notDetailData =
|
||||
this.visaCheckeeCrawlDataRepository.QueryNotDetail();
|
||||
|
||||
getCrawlDataDetail(notDetailData);
|
||||
}
|
||||
|
||||
private void getCrawlDataDetail(List<VisaCheckeeCrawlData> notDetailData) {
|
||||
log.info("Start to save crawl checkee not detail visa data total: {}", notDetailData.size());
|
||||
|
||||
for (int i = 0; i < notDetailData.size(); i++) {
|
||||
|
@ -429,7 +339,7 @@ public class VisaReportCheckeeService {
|
|||
|
||||
try {
|
||||
CrawlHtml crawlCheckeeVisaDetail =
|
||||
this.crawlService.crawlHtmlByCrawlUrl(CrawlService.URL_CHECKEE_VISA_DETAIL + visaCheckeeCrawlData.getCaseNum());
|
||||
this.crawlService.crawlHtml(CrawlToOssRule.URL_CHECKEE_VISA_DETAIL + visaCheckeeCrawlData.getCaseNum());
|
||||
this.crawlService.parseCheckeeVisaDetailDataAndFill(crawlCheckeeVisaDetail.getHtml(),
|
||||
visaCheckeeCrawlData);
|
||||
} catch (Exception e) {
|
||||
|
@ -439,7 +349,7 @@ public class VisaReportCheckeeService {
|
|||
|
||||
try {
|
||||
CrawlHtml crawlCheckeeVisaUpdate =
|
||||
this.crawlService.crawlHtmlByCrawlUrl(CrawlService.URL_CHECKEE_VISA_UPDATE + visaCheckeeCrawlData.getCaseNum());
|
||||
this.crawlService.crawlHtml(CrawlToOssRule.URL_CHECKEE_VISA_UPDATE + visaCheckeeCrawlData.getCaseNum());
|
||||
this.crawlService.parseCheckeeVisaUpdateDataAndFill(crawlCheckeeVisaUpdate.getHtml(),
|
||||
visaCheckeeCrawlData);
|
||||
} catch (Exception e) {
|
||||
|
@ -524,4 +434,48 @@ public class VisaReportCheckeeService {
|
|||
|
||||
return caseStatusSummaryReportList;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 重建所有报表数据
|
||||
*/
|
||||
public void rebuildAllReport() {
|
||||
LocalDate endDate = LocalDate.now();
|
||||
LocalDate startDate = LocalDate.of(2018, 10, 1);
|
||||
log.info("Start to rebuild all report data from {} to {}", startDate, endDate);
|
||||
this.generateReport(startDate, endDate);
|
||||
}
|
||||
|
||||
public void syncVisaDataFromCheckee(LocalDate startDate, LocalDate endDate) {
|
||||
|
||||
log.info("Start to sync visa data from checkee from {} to {}", startDate, endDate);
|
||||
|
||||
DateRange dateRange = DateRange.of(startDate, endDate);
|
||||
|
||||
List<String> monthList = dateRange.getMonthKeys();
|
||||
|
||||
log.info("Start to sync visa data from checkee");
|
||||
|
||||
for (String monthKey : monthList) {
|
||||
log.info("sync [{}] data", monthKey);
|
||||
try {
|
||||
VisaCrawlRequest visaCrawlRequest = new VisaCrawlRequest();
|
||||
visaCrawlRequest.setDateKey(monthKey);
|
||||
visaCrawlRequest.setCrawlDetail(true);
|
||||
this.crawlCheckeeVisa(visaCrawlRequest);
|
||||
} catch (Exception e) {
|
||||
log.error("sync [{}] data error", monthKey, e);
|
||||
}
|
||||
}
|
||||
|
||||
log.info("End to sync visa data from checkee");
|
||||
|
||||
|
||||
}
|
||||
|
||||
public void syncDataAndReport(LocalDate endDate, LocalDate startDate) {
|
||||
this.syncVisaDataFromCheckee(startDate, endDate);
|
||||
this.importCheckeeVisaCrawlData(startDate, endDate.plusDays(1));
|
||||
this.rebuildAllReport();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,104 @@
|
|||
package com.northtecom.visatrack.api.service.rules;
|
||||
|
||||
import cn.hutool.core.date.DateUtil;
|
||||
import lombok.Data;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.Date;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
*
|
||||
* @Author: XieYang
|
||||
* @Date: 2022/10/31/9:29
|
||||
* @Description:
|
||||
*/
|
||||
@Data
|
||||
public class CrawlToOssRule {
|
||||
|
||||
public static final String URL_CHECKEE_REPORT = "https://www.checkee.info/index.php";
|
||||
public static final String URL_CHECKEE_REPORT_DETAIL = "https://www.checkee.info/main.php?dispdate=";
|
||||
public static final String URL_CHECKEE_VISA_DETAIL = "https://checkee.info/personal_detail.php?casenum=";
|
||||
public static final String URL_CHECKEE_VISA_UPDATE = "https://checkee.info/update.php?casenum=";
|
||||
|
||||
private String folder;
|
||||
private String bucket;
|
||||
private String fileName;
|
||||
private String url;
|
||||
private String ossPath;
|
||||
private Integer cacheDays;
|
||||
|
||||
private Date crawlDate;
|
||||
|
||||
public CrawlToOssRule(String bucket, String url) {
|
||||
this.crawlDate = new Date();
|
||||
this.bucket = bucket;
|
||||
this.url = url;
|
||||
if (this.url.startsWith(URL_CHECKEE_REPORT)) {
|
||||
this.folder = "index";
|
||||
this.fileName = "index.html";
|
||||
} else if (this.url.startsWith(URL_CHECKEE_REPORT_DETAIL)) {
|
||||
this.folder = "report/month";
|
||||
this.fileName = String.format("report_detail_%s.html",
|
||||
this.url.substring(URL_CHECKEE_REPORT_DETAIL.length()));
|
||||
} else if (this.url.startsWith(URL_CHECKEE_VISA_DETAIL)) {
|
||||
this.folder = "detail";
|
||||
this.fileName = String.format("visa_detail_%s.html", this.url.substring(URL_CHECKEE_VISA_DETAIL.length()));
|
||||
} else if (this.url.startsWith(URL_CHECKEE_VISA_UPDATE)) {
|
||||
this.folder = "update";
|
||||
this.fileName = String.format("visa_update_%s.html", this.url.substring(URL_CHECKEE_VISA_UPDATE.length()));
|
||||
}
|
||||
|
||||
this.cacheDays = caculateCrawlCacheDay(this.url);
|
||||
|
||||
this.ossPath = buildOssPath(this.bucket, this.folder, this.fileName);
|
||||
}
|
||||
|
||||
public static CrawlToOssRule of(String defaultBucketName, String crawlUrl) {
|
||||
return new CrawlToOssRule(defaultBucketName, crawlUrl);
|
||||
}
|
||||
|
||||
private String buildOssPath(String bucket, String folder, String fileName) {
|
||||
bucket = bucket.replace("/", "").replace("\\", "");
|
||||
folder = folder.replace("\\", "/");
|
||||
if (folder.startsWith("/")) {
|
||||
folder = folder.substring(1);
|
||||
}
|
||||
if (folder.endsWith("/")) {
|
||||
folder = folder.substring(0, folder.length() - 1);
|
||||
}
|
||||
fileName = fileName.replace("/", "").replace("\\", "");
|
||||
return String.format("%s/%s/%s", bucket, folder, fileName);
|
||||
}
|
||||
|
||||
private Integer caculateCrawlCacheDay(String crawlUrl) {
|
||||
if (crawlUrl.contains(URL_CHECKEE_REPORT)) {
|
||||
return 1;
|
||||
} else if (crawlUrl.contains(URL_CHECKEE_VISA_DETAIL) || crawlUrl.contains(URL_CHECKEE_VISA_UPDATE)) {
|
||||
return 365;
|
||||
} else if (crawlUrl.contains(URL_CHECKEE_REPORT_DETAIL)) {
|
||||
String month = crawlUrl.substring(crawlUrl.lastIndexOf("=") + 1);
|
||||
LocalDate localDate = LocalDate.parse(month + "-01");
|
||||
LocalDate now = LocalDate.now();
|
||||
int monthDiff =
|
||||
now.getYear() * 12 + now.getMonthValue() - (localDate.getYear() * 12 + localDate.getMonthValue());
|
||||
if (monthDiff <= 2) {
|
||||
return 1;
|
||||
} else if (monthDiff <= 6) {
|
||||
return 7;
|
||||
} else if (monthDiff <= 24) {
|
||||
return 30;
|
||||
} else if (monthDiff <= 72) {
|
||||
return 365;
|
||||
} else {
|
||||
return 3 * 365;
|
||||
}
|
||||
} else {
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean checkIsExpired(Date crawledDate) {
|
||||
return DateUtil.offsetDay(crawledDate, this.cacheDays).before(crawlDate);
|
||||
}
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
server:
|
||||
ssl:
|
||||
key-store: classpath:keystore.p12
|
||||
key-store-password: 123456
|
||||
key-store-type: PKCS12
|
||||
key-alias: tomcat
|
||||
#server:
|
||||
# ssl:
|
||||
# key-store: classpath:keystore.p12
|
||||
# key-store-password: 123456
|
||||
# key-store-type: PKCS12
|
||||
# key-alias: tomcat
|
|
@ -31,7 +31,7 @@ spring:
|
|||
application:
|
||||
name: usvisatrack
|
||||
title: Us Visa Track API
|
||||
version: 1.1.7
|
||||
version: 1.1.8
|
||||
jackson:
|
||||
mapper:
|
||||
accept-case-insensitive-properties: true
|
||||
|
@ -57,7 +57,7 @@ spring:
|
|||
order_updates: true
|
||||
mvc:
|
||||
async:
|
||||
request-timeout: 30000
|
||||
request-timeout: 60000
|
||||
|
||||
thymeleaf:
|
||||
prefix: classpath:/templates/
|
||||
|
|
Loading…
Reference in New Issue