修复爬虫爬取数据问题
This commit is contained in:
parent
a2dfa58afb
commit
6d0b9ade62
|
@ -107,13 +107,19 @@ public class CrawlService {
|
|||
|
||||
try {
|
||||
Document doc = getDocument(crawlUrl);
|
||||
content = doc.html();
|
||||
if (!doc.select("title").text().equals("One moment, please...")) {
|
||||
content = doc.html();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage());
|
||||
throw e;
|
||||
}
|
||||
|
||||
SaveHtml(content, crawlUrl, crawlDate, crawlId);
|
||||
if (!StringUtils.isNotBlank(content)) {
|
||||
SaveHtml(content, crawlUrl, crawlDate, crawlId);
|
||||
}
|
||||
|
||||
Thread.sleep(2000);
|
||||
|
||||
return content;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package com.northtecom.visatrack.api.service.impl;
|
||||
|
||||
import com.northtecom.visatrack.api.base.exception.BaseException;
|
||||
import com.northtecom.visatrack.api.base.web.Status;
|
||||
import com.northtecom.visatrack.api.controller.vo.VisaCrawlRequest;
|
||||
import com.northtecom.visatrack.api.data.entity.CaseVisaReport;
|
||||
import com.northtecom.visatrack.api.data.entity.VisaCase;
|
||||
|
@ -21,6 +23,7 @@ import org.springframework.stereotype.Service;
|
|||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.time.LocalDate;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
|
@ -130,6 +133,13 @@ public class VisaReportCheckeeService {
|
|||
|
||||
|
||||
public void crawlCheckeeVisa(VisaCrawlRequest visaCrawlRequest) throws IOException, InterruptedException {
|
||||
|
||||
LocalDate crawlDate = LocalDate.parse(visaCrawlRequest.getDateKey() + "-01");
|
||||
|
||||
if (crawlDate.isAfter(LocalDate.now())) {
|
||||
throw new BaseException(Status.BAD_REQUEST, "Crawl month is after current month");
|
||||
}
|
||||
|
||||
String crawlCheckeeVisaList =
|
||||
this.crawlService.crawlHtmlByCrawlUrl(URL_CHECKEE_REPORT_DETAIL + visaCrawlRequest.getDateKey(), 1);
|
||||
List<VisaCheckeeData> visaCheckeeCrawlDataList = this.crawlService.parseCheckeeVisa(crawlCheckeeVisaList);
|
||||
|
@ -218,19 +228,26 @@ public class VisaReportCheckeeService {
|
|||
}
|
||||
|
||||
private void saveCheckeeVisaDataToVisa(VisaCheckeeCrawlData visaCheckeeCrawlData) {
|
||||
VisaCase visaCase = new VisaCase();
|
||||
visaCase.setUserName(visaCheckeeCrawlData.getUserId());
|
||||
visaCase.setUserEmail(visaCheckeeCrawlData.getPartEmail());
|
||||
visaCase.setVisaCategory(visaCheckeeCrawlData.getVisaType());
|
||||
visaCase.setVisaStatus(VisaStatus.valueOf(visaCheckeeCrawlData.getStatus()));
|
||||
visaCase.setEmbassyConsulate(visaCheckeeCrawlData.getConsulate());
|
||||
visaCase.setVisaEntry(VisaEntry.valueOf(visaCheckeeCrawlData.getVisaEntry()));
|
||||
visaCase.setMajor(visaCheckeeCrawlData.getMajor());
|
||||
visaCase.setDateVisaInterview(visaCheckeeCrawlData.getCheckDate());
|
||||
visaCase.setDateVisaCheckCompleted(visaCheckeeCrawlData.getCompleteDate());
|
||||
visaCase.setNote(visaCheckeeCrawlData.getNote());
|
||||
visaCase.setRefCrawlDataId(visaCheckeeCrawlData.getId());
|
||||
VisaCase savedVisaCase = visaCaseRepository.saveAndFlush(visaCase);
|
||||
VisaCase saveVisaCase = new VisaCase();
|
||||
if (visaCheckeeCrawlData.getSaveToVisaCaseId() != null) {
|
||||
Optional<VisaCase> visaCase =
|
||||
visaCaseRepository.findById(visaCheckeeCrawlData.getSaveToVisaCaseId());
|
||||
if (visaCase.isPresent()) {
|
||||
saveVisaCase = visaCase.get();
|
||||
}
|
||||
}
|
||||
saveVisaCase.setUserName(visaCheckeeCrawlData.getUserId());
|
||||
saveVisaCase.setUserEmail(visaCheckeeCrawlData.getPartEmail());
|
||||
saveVisaCase.setVisaCategory(visaCheckeeCrawlData.getVisaType());
|
||||
saveVisaCase.setVisaStatus(VisaStatus.valueOf(visaCheckeeCrawlData.getStatus()));
|
||||
saveVisaCase.setEmbassyConsulate(visaCheckeeCrawlData.getConsulate());
|
||||
saveVisaCase.setVisaEntry(VisaEntry.valueOf(visaCheckeeCrawlData.getVisaEntry()));
|
||||
saveVisaCase.setMajor(visaCheckeeCrawlData.getMajor());
|
||||
saveVisaCase.setDateVisaInterview(visaCheckeeCrawlData.getCheckDate());
|
||||
saveVisaCase.setDateVisaCheckCompleted(visaCheckeeCrawlData.getCompleteDate());
|
||||
saveVisaCase.setNote(visaCheckeeCrawlData.getNote());
|
||||
saveVisaCase.setRefCrawlDataId(visaCheckeeCrawlData.getId());
|
||||
VisaCase savedVisaCase = visaCaseRepository.saveAndFlush(saveVisaCase);
|
||||
visaCheckeeCrawlData.setSaveToVisaCaseId(savedVisaCase.getId());
|
||||
visaCheckeeCrawlDataRepository.saveAndFlush(visaCheckeeCrawlData);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue