修复爬虫爬取数据问题

This commit is contained in:
yang.xie 2022-10-13 07:07:14 +08:00
parent a2dfa58afb
commit 6d0b9ade62
2 changed files with 38 additions and 15 deletions

View File

@ -107,13 +107,19 @@ public class CrawlService {
try {
Document doc = getDocument(crawlUrl);
if (!doc.select("title").text().equals("One moment, please...")) {
content = doc.html();
}
} catch (Exception e) {
log.error(e.getMessage());
throw e;
}
if (!StringUtils.isNotBlank(content)) {
SaveHtml(content, crawlUrl, crawlDate, crawlId);
}
Thread.sleep(2000);
return content;
}

View File

@ -1,5 +1,7 @@
package com.northtecom.visatrack.api.service.impl;
import com.northtecom.visatrack.api.base.exception.BaseException;
import com.northtecom.visatrack.api.base.web.Status;
import com.northtecom.visatrack.api.controller.vo.VisaCrawlRequest;
import com.northtecom.visatrack.api.data.entity.CaseVisaReport;
import com.northtecom.visatrack.api.data.entity.VisaCase;
@ -21,6 +23,7 @@ import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.io.IOException;
import java.time.LocalDate;
import java.util.List;
import java.util.Optional;
@ -130,6 +133,13 @@ public class VisaReportCheckeeService {
public void crawlCheckeeVisa(VisaCrawlRequest visaCrawlRequest) throws IOException, InterruptedException {
LocalDate crawlDate = LocalDate.parse(visaCrawlRequest.getDateKey() + "-01");
if (crawlDate.isAfter(LocalDate.now())) {
throw new BaseException(Status.BAD_REQUEST, "Crawl month is after current month");
}
String crawlCheckeeVisaList =
this.crawlService.crawlHtmlByCrawlUrl(URL_CHECKEE_REPORT_DETAIL + visaCrawlRequest.getDateKey(), 1);
List<VisaCheckeeData> visaCheckeeCrawlDataList = this.crawlService.parseCheckeeVisa(crawlCheckeeVisaList);
@ -218,19 +228,26 @@ public class VisaReportCheckeeService {
}
private void saveCheckeeVisaDataToVisa(VisaCheckeeCrawlData visaCheckeeCrawlData) {
VisaCase visaCase = new VisaCase();
visaCase.setUserName(visaCheckeeCrawlData.getUserId());
visaCase.setUserEmail(visaCheckeeCrawlData.getPartEmail());
visaCase.setVisaCategory(visaCheckeeCrawlData.getVisaType());
visaCase.setVisaStatus(VisaStatus.valueOf(visaCheckeeCrawlData.getStatus()));
visaCase.setEmbassyConsulate(visaCheckeeCrawlData.getConsulate());
visaCase.setVisaEntry(VisaEntry.valueOf(visaCheckeeCrawlData.getVisaEntry()));
visaCase.setMajor(visaCheckeeCrawlData.getMajor());
visaCase.setDateVisaInterview(visaCheckeeCrawlData.getCheckDate());
visaCase.setDateVisaCheckCompleted(visaCheckeeCrawlData.getCompleteDate());
visaCase.setNote(visaCheckeeCrawlData.getNote());
visaCase.setRefCrawlDataId(visaCheckeeCrawlData.getId());
VisaCase savedVisaCase = visaCaseRepository.saveAndFlush(visaCase);
VisaCase saveVisaCase = new VisaCase();
if (visaCheckeeCrawlData.getSaveToVisaCaseId() != null) {
Optional<VisaCase> visaCase =
visaCaseRepository.findById(visaCheckeeCrawlData.getSaveToVisaCaseId());
if (visaCase.isPresent()) {
saveVisaCase = visaCase.get();
}
}
saveVisaCase.setUserName(visaCheckeeCrawlData.getUserId());
saveVisaCase.setUserEmail(visaCheckeeCrawlData.getPartEmail());
saveVisaCase.setVisaCategory(visaCheckeeCrawlData.getVisaType());
saveVisaCase.setVisaStatus(VisaStatus.valueOf(visaCheckeeCrawlData.getStatus()));
saveVisaCase.setEmbassyConsulate(visaCheckeeCrawlData.getConsulate());
saveVisaCase.setVisaEntry(VisaEntry.valueOf(visaCheckeeCrawlData.getVisaEntry()));
saveVisaCase.setMajor(visaCheckeeCrawlData.getMajor());
saveVisaCase.setDateVisaInterview(visaCheckeeCrawlData.getCheckDate());
saveVisaCase.setDateVisaCheckCompleted(visaCheckeeCrawlData.getCompleteDate());
saveVisaCase.setNote(visaCheckeeCrawlData.getNote());
saveVisaCase.setRefCrawlDataId(visaCheckeeCrawlData.getId());
VisaCase savedVisaCase = visaCaseRepository.saveAndFlush(saveVisaCase);
visaCheckeeCrawlData.setSaveToVisaCaseId(savedVisaCase.getId());
visaCheckeeCrawlDataRepository.saveAndFlush(visaCheckeeCrawlData);
}