修复爬虫爬取数据问题
This commit is contained in:
parent
a2dfa58afb
commit
6d0b9ade62
|
@ -107,13 +107,19 @@ public class CrawlService {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Document doc = getDocument(crawlUrl);
|
Document doc = getDocument(crawlUrl);
|
||||||
|
if (!doc.select("title").text().equals("One moment, please...")) {
|
||||||
content = doc.html();
|
content = doc.html();
|
||||||
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error(e.getMessage());
|
log.error(e.getMessage());
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!StringUtils.isNotBlank(content)) {
|
||||||
SaveHtml(content, crawlUrl, crawlDate, crawlId);
|
SaveHtml(content, crawlUrl, crawlDate, crawlId);
|
||||||
|
}
|
||||||
|
|
||||||
|
Thread.sleep(2000);
|
||||||
|
|
||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
package com.northtecom.visatrack.api.service.impl;
|
package com.northtecom.visatrack.api.service.impl;
|
||||||
|
|
||||||
|
import com.northtecom.visatrack.api.base.exception.BaseException;
|
||||||
|
import com.northtecom.visatrack.api.base.web.Status;
|
||||||
import com.northtecom.visatrack.api.controller.vo.VisaCrawlRequest;
|
import com.northtecom.visatrack.api.controller.vo.VisaCrawlRequest;
|
||||||
import com.northtecom.visatrack.api.data.entity.CaseVisaReport;
|
import com.northtecom.visatrack.api.data.entity.CaseVisaReport;
|
||||||
import com.northtecom.visatrack.api.data.entity.VisaCase;
|
import com.northtecom.visatrack.api.data.entity.VisaCase;
|
||||||
|
@ -21,6 +23,7 @@ import org.springframework.stereotype.Service;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.time.LocalDate;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
|
@ -130,6 +133,13 @@ public class VisaReportCheckeeService {
|
||||||
|
|
||||||
|
|
||||||
public void crawlCheckeeVisa(VisaCrawlRequest visaCrawlRequest) throws IOException, InterruptedException {
|
public void crawlCheckeeVisa(VisaCrawlRequest visaCrawlRequest) throws IOException, InterruptedException {
|
||||||
|
|
||||||
|
LocalDate crawlDate = LocalDate.parse(visaCrawlRequest.getDateKey() + "-01");
|
||||||
|
|
||||||
|
if (crawlDate.isAfter(LocalDate.now())) {
|
||||||
|
throw new BaseException(Status.BAD_REQUEST, "Crawl month is after current month");
|
||||||
|
}
|
||||||
|
|
||||||
String crawlCheckeeVisaList =
|
String crawlCheckeeVisaList =
|
||||||
this.crawlService.crawlHtmlByCrawlUrl(URL_CHECKEE_REPORT_DETAIL + visaCrawlRequest.getDateKey(), 1);
|
this.crawlService.crawlHtmlByCrawlUrl(URL_CHECKEE_REPORT_DETAIL + visaCrawlRequest.getDateKey(), 1);
|
||||||
List<VisaCheckeeData> visaCheckeeCrawlDataList = this.crawlService.parseCheckeeVisa(crawlCheckeeVisaList);
|
List<VisaCheckeeData> visaCheckeeCrawlDataList = this.crawlService.parseCheckeeVisa(crawlCheckeeVisaList);
|
||||||
|
@ -218,19 +228,26 @@ public class VisaReportCheckeeService {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void saveCheckeeVisaDataToVisa(VisaCheckeeCrawlData visaCheckeeCrawlData) {
|
private void saveCheckeeVisaDataToVisa(VisaCheckeeCrawlData visaCheckeeCrawlData) {
|
||||||
VisaCase visaCase = new VisaCase();
|
VisaCase saveVisaCase = new VisaCase();
|
||||||
visaCase.setUserName(visaCheckeeCrawlData.getUserId());
|
if (visaCheckeeCrawlData.getSaveToVisaCaseId() != null) {
|
||||||
visaCase.setUserEmail(visaCheckeeCrawlData.getPartEmail());
|
Optional<VisaCase> visaCase =
|
||||||
visaCase.setVisaCategory(visaCheckeeCrawlData.getVisaType());
|
visaCaseRepository.findById(visaCheckeeCrawlData.getSaveToVisaCaseId());
|
||||||
visaCase.setVisaStatus(VisaStatus.valueOf(visaCheckeeCrawlData.getStatus()));
|
if (visaCase.isPresent()) {
|
||||||
visaCase.setEmbassyConsulate(visaCheckeeCrawlData.getConsulate());
|
saveVisaCase = visaCase.get();
|
||||||
visaCase.setVisaEntry(VisaEntry.valueOf(visaCheckeeCrawlData.getVisaEntry()));
|
}
|
||||||
visaCase.setMajor(visaCheckeeCrawlData.getMajor());
|
}
|
||||||
visaCase.setDateVisaInterview(visaCheckeeCrawlData.getCheckDate());
|
saveVisaCase.setUserName(visaCheckeeCrawlData.getUserId());
|
||||||
visaCase.setDateVisaCheckCompleted(visaCheckeeCrawlData.getCompleteDate());
|
saveVisaCase.setUserEmail(visaCheckeeCrawlData.getPartEmail());
|
||||||
visaCase.setNote(visaCheckeeCrawlData.getNote());
|
saveVisaCase.setVisaCategory(visaCheckeeCrawlData.getVisaType());
|
||||||
visaCase.setRefCrawlDataId(visaCheckeeCrawlData.getId());
|
saveVisaCase.setVisaStatus(VisaStatus.valueOf(visaCheckeeCrawlData.getStatus()));
|
||||||
VisaCase savedVisaCase = visaCaseRepository.saveAndFlush(visaCase);
|
saveVisaCase.setEmbassyConsulate(visaCheckeeCrawlData.getConsulate());
|
||||||
|
saveVisaCase.setVisaEntry(VisaEntry.valueOf(visaCheckeeCrawlData.getVisaEntry()));
|
||||||
|
saveVisaCase.setMajor(visaCheckeeCrawlData.getMajor());
|
||||||
|
saveVisaCase.setDateVisaInterview(visaCheckeeCrawlData.getCheckDate());
|
||||||
|
saveVisaCase.setDateVisaCheckCompleted(visaCheckeeCrawlData.getCompleteDate());
|
||||||
|
saveVisaCase.setNote(visaCheckeeCrawlData.getNote());
|
||||||
|
saveVisaCase.setRefCrawlDataId(visaCheckeeCrawlData.getId());
|
||||||
|
VisaCase savedVisaCase = visaCaseRepository.saveAndFlush(saveVisaCase);
|
||||||
visaCheckeeCrawlData.setSaveToVisaCaseId(savedVisaCase.getId());
|
visaCheckeeCrawlData.setSaveToVisaCaseId(savedVisaCase.getId());
|
||||||
visaCheckeeCrawlDataRepository.saveAndFlush(visaCheckeeCrawlData);
|
visaCheckeeCrawlDataRepository.saveAndFlush(visaCheckeeCrawlData);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue