修复爬虫爬取数据问题

This commit is contained in:
yang.xie 2022-10-13 07:07:14 +08:00
parent a2dfa58afb
commit 6d0b9ade62
2 changed files with 38 additions and 15 deletions

View File

@ -107,13 +107,19 @@ public class CrawlService {
try { try {
Document doc = getDocument(crawlUrl); Document doc = getDocument(crawlUrl);
if (!doc.select("title").text().equals("One moment, please...")) {
content = doc.html(); content = doc.html();
}
} catch (Exception e) { } catch (Exception e) {
log.error(e.getMessage()); log.error(e.getMessage());
throw e; throw e;
} }
if (!StringUtils.isNotBlank(content)) {
SaveHtml(content, crawlUrl, crawlDate, crawlId); SaveHtml(content, crawlUrl, crawlDate, crawlId);
}
Thread.sleep(2000);
return content; return content;
} }

View File

@ -1,5 +1,7 @@
package com.northtecom.visatrack.api.service.impl; package com.northtecom.visatrack.api.service.impl;
import com.northtecom.visatrack.api.base.exception.BaseException;
import com.northtecom.visatrack.api.base.web.Status;
import com.northtecom.visatrack.api.controller.vo.VisaCrawlRequest; import com.northtecom.visatrack.api.controller.vo.VisaCrawlRequest;
import com.northtecom.visatrack.api.data.entity.CaseVisaReport; import com.northtecom.visatrack.api.data.entity.CaseVisaReport;
import com.northtecom.visatrack.api.data.entity.VisaCase; import com.northtecom.visatrack.api.data.entity.VisaCase;
@ -21,6 +23,7 @@ import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional; import org.springframework.transaction.annotation.Transactional;
import java.io.IOException; import java.io.IOException;
import java.time.LocalDate;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
@ -130,6 +133,13 @@ public class VisaReportCheckeeService {
public void crawlCheckeeVisa(VisaCrawlRequest visaCrawlRequest) throws IOException, InterruptedException { public void crawlCheckeeVisa(VisaCrawlRequest visaCrawlRequest) throws IOException, InterruptedException {
LocalDate crawlDate = LocalDate.parse(visaCrawlRequest.getDateKey() + "-01");
if (crawlDate.isAfter(LocalDate.now())) {
throw new BaseException(Status.BAD_REQUEST, "Crawl month is after current month");
}
String crawlCheckeeVisaList = String crawlCheckeeVisaList =
this.crawlService.crawlHtmlByCrawlUrl(URL_CHECKEE_REPORT_DETAIL + visaCrawlRequest.getDateKey(), 1); this.crawlService.crawlHtmlByCrawlUrl(URL_CHECKEE_REPORT_DETAIL + visaCrawlRequest.getDateKey(), 1);
List<VisaCheckeeData> visaCheckeeCrawlDataList = this.crawlService.parseCheckeeVisa(crawlCheckeeVisaList); List<VisaCheckeeData> visaCheckeeCrawlDataList = this.crawlService.parseCheckeeVisa(crawlCheckeeVisaList);
@ -218,19 +228,26 @@ public class VisaReportCheckeeService {
} }
private void saveCheckeeVisaDataToVisa(VisaCheckeeCrawlData visaCheckeeCrawlData) { private void saveCheckeeVisaDataToVisa(VisaCheckeeCrawlData visaCheckeeCrawlData) {
VisaCase visaCase = new VisaCase(); VisaCase saveVisaCase = new VisaCase();
visaCase.setUserName(visaCheckeeCrawlData.getUserId()); if (visaCheckeeCrawlData.getSaveToVisaCaseId() != null) {
visaCase.setUserEmail(visaCheckeeCrawlData.getPartEmail()); Optional<VisaCase> visaCase =
visaCase.setVisaCategory(visaCheckeeCrawlData.getVisaType()); visaCaseRepository.findById(visaCheckeeCrawlData.getSaveToVisaCaseId());
visaCase.setVisaStatus(VisaStatus.valueOf(visaCheckeeCrawlData.getStatus())); if (visaCase.isPresent()) {
visaCase.setEmbassyConsulate(visaCheckeeCrawlData.getConsulate()); saveVisaCase = visaCase.get();
visaCase.setVisaEntry(VisaEntry.valueOf(visaCheckeeCrawlData.getVisaEntry())); }
visaCase.setMajor(visaCheckeeCrawlData.getMajor()); }
visaCase.setDateVisaInterview(visaCheckeeCrawlData.getCheckDate()); saveVisaCase.setUserName(visaCheckeeCrawlData.getUserId());
visaCase.setDateVisaCheckCompleted(visaCheckeeCrawlData.getCompleteDate()); saveVisaCase.setUserEmail(visaCheckeeCrawlData.getPartEmail());
visaCase.setNote(visaCheckeeCrawlData.getNote()); saveVisaCase.setVisaCategory(visaCheckeeCrawlData.getVisaType());
visaCase.setRefCrawlDataId(visaCheckeeCrawlData.getId()); saveVisaCase.setVisaStatus(VisaStatus.valueOf(visaCheckeeCrawlData.getStatus()));
VisaCase savedVisaCase = visaCaseRepository.saveAndFlush(visaCase); saveVisaCase.setEmbassyConsulate(visaCheckeeCrawlData.getConsulate());
saveVisaCase.setVisaEntry(VisaEntry.valueOf(visaCheckeeCrawlData.getVisaEntry()));
saveVisaCase.setMajor(visaCheckeeCrawlData.getMajor());
saveVisaCase.setDateVisaInterview(visaCheckeeCrawlData.getCheckDate());
saveVisaCase.setDateVisaCheckCompleted(visaCheckeeCrawlData.getCompleteDate());
saveVisaCase.setNote(visaCheckeeCrawlData.getNote());
saveVisaCase.setRefCrawlDataId(visaCheckeeCrawlData.getId());
VisaCase savedVisaCase = visaCaseRepository.saveAndFlush(saveVisaCase);
visaCheckeeCrawlData.setSaveToVisaCaseId(savedVisaCase.getId()); visaCheckeeCrawlData.setSaveToVisaCaseId(savedVisaCase.getId());
visaCheckeeCrawlDataRepository.saveAndFlush(visaCheckeeCrawlData); visaCheckeeCrawlDataRepository.saveAndFlush(visaCheckeeCrawlData);
} }