数据导入自动运行程序,部署在服务器上,每周运行一次,用于刷新数据
This commit is contained in:
parent
1180a3ee6f
commit
2d416b95c4
@ -1,14 +1,15 @@
|
||||
package com.usvisatrack.services;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
import org.apache.commons.cli.CommandLineParser;
|
||||
import org.apache.commons.cli.GnuParser;
|
||||
import org.apache.commons.cli.DefaultParser;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.math.NumberUtils;
|
||||
import org.joda.time.DateTime;
|
||||
import org.joda.time.format.DateTimeFormat;
|
||||
import org.jsoup.Jsoup;
|
||||
@ -20,10 +21,12 @@ import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.usvisatrack.core.common.data.VisaEntry;
|
||||
import com.usvisatrack.core.common.data.VisaStatus;
|
||||
import com.usvisatrack.core.dao.model.USEmbassy;
|
||||
import com.usvisatrack.core.dao.model.CheckeeVisa;
|
||||
import com.usvisatrack.core.dao.model.User;
|
||||
import com.usvisatrack.core.dao.model.Visa;
|
||||
import com.usvisatrack.core.dao.model.VisaClass;
|
||||
import com.usvisatrack.core.factories.USFactory;
|
||||
import com.usvisatrack.core.factories.UserFactory;
|
||||
import com.usvisatrack.core.factories.VisaFactory;
|
||||
import com.usvisatrack.services.common.DataCrawl;
|
||||
|
||||
@ -36,9 +39,7 @@ import com.usvisatrack.services.common.DataCrawl;
|
||||
public class VisaImporter extends DataCrawl {
|
||||
private static final Logger logger = LoggerFactory.getLogger(VisaImporter.class);
|
||||
|
||||
public final static String ITEM_FTP_FOLDER = "/home/ftp/com-bcodepot/item";
|
||||
public final static String ITEM_PROCESSED_FOLDER = "/home/data/origin/com-bcodepot-www/processed/item";
|
||||
public final static String APIDATA_CDN_FOLDER = "/home/cdn/com-bcodepot-www/data/apidata";
|
||||
public final static String URL_CHECKEE = "https://www.checkee.info/main.php?dispdate=";
|
||||
|
||||
public static HashMap<String, VisaClass> visaClassMap = new HashMap<String, VisaClass>();
|
||||
|
||||
@ -50,7 +51,7 @@ public class VisaImporter extends DataCrawl {
|
||||
options.addOption("l", true, "Clean all agents input information");
|
||||
options.addOption("h", true, "Clean one agent by input email address");
|
||||
|
||||
CommandLineParser parser = new GnuParser();
|
||||
CommandLineParser parser = new DefaultParser();
|
||||
|
||||
// parse command line
|
||||
try {
|
||||
@ -72,43 +73,70 @@ public class VisaImporter extends DataCrawl {
|
||||
@Override
|
||||
public void run() {
|
||||
initData(); // Init data from database
|
||||
crawlWebVisa();
|
||||
|
||||
crawlWebVisa(URL_CHECKEE + DateTimeFormat.forPattern("yyyy-MM").print(new DateTime()));
|
||||
crawlWebVisa(URL_CHECKEE + DateTimeFormat.forPattern("yyyy-MM").print(new DateTime().minusMonths(1)));
|
||||
}
|
||||
|
||||
/**
|
||||
* crawlWebItem by parse web page
|
||||
*/
|
||||
private void crawlWebVisa() {
|
||||
private void crawlWebVisa(String cURL) {
|
||||
logger.error("Crawl Web Data to load item info.");
|
||||
List<Visa> visaList = new ArrayList<Visa>();
|
||||
|
||||
boolean isNewVisa = false;
|
||||
|
||||
Document doc = null;
|
||||
|
||||
try {
|
||||
doc = Jsoup.connect("https://www.checkee.info/main.php?dispdate=2017-07").get();
|
||||
doc = Jsoup.connect(cURL).get();
|
||||
Elements newsHeadlines = doc.select("table");
|
||||
Element table = newsHeadlines.get(6);
|
||||
|
||||
int i = 0;
|
||||
// int i = 0;
|
||||
|
||||
for (Element row : table.select("tr")) {
|
||||
i++;
|
||||
// i++;
|
||||
Elements tds = row.select("td");
|
||||
|
||||
if (!StringUtils.equalsIgnoreCase("ID", tds.get(1).text())) {
|
||||
Visa visa = new Visa();
|
||||
String checkeeCaseNumber = getCheckeeCaseNumber(tds.get(0));
|
||||
|
||||
Visa visa = VisaFactory.getVisaFromCheckee(NumberUtils.toLong(checkeeCaseNumber));
|
||||
|
||||
if (visa == null) {
|
||||
visa = new Visa();
|
||||
isNewVisa = true;
|
||||
}
|
||||
|
||||
// SET VISA CLASS
|
||||
String visaClassName = StringUtils.trimToEmpty(tds.get(2).text());
|
||||
if (StringUtils.isNotBlank(visaClassName)) {
|
||||
switch (visaClassName) {
|
||||
case "B1":
|
||||
visaClassName = "B-1";
|
||||
break;
|
||||
case "B2":
|
||||
visaClassName = "B-2";
|
||||
break;
|
||||
case "H1":
|
||||
visaClassName = "H1-B";
|
||||
break;
|
||||
case "H4":
|
||||
visaClassName = "H-4";
|
||||
break;
|
||||
case "F1":
|
||||
visaClassName = "F-1";
|
||||
break;
|
||||
case "F2":
|
||||
visaClassName = "F-2";
|
||||
break;
|
||||
case "J1":
|
||||
visaClassName = "J-1";
|
||||
break;
|
||||
case "L1":
|
||||
visaClassName = "L-1";
|
||||
break;
|
||||
}
|
||||
|
||||
visa.setVisaClass(visaClassMap.get(visaClassName));
|
||||
@ -132,11 +160,26 @@ public class VisaImporter extends DataCrawl {
|
||||
String usEmbassyName = StringUtils.upperCase(StringUtils.trimToEmpty(tds.get(4).text()));
|
||||
if (StringUtils.isNotBlank(usEmbassyName)) {
|
||||
switch (usEmbassyName) {
|
||||
case "BEIJING":
|
||||
visa.setUsEmbassy(USFactory.searchUSEmbassy("China", "BeiJing"));
|
||||
break;
|
||||
case "CHENGDU":
|
||||
visa.setUsEmbassy(USFactory.searchUSEmbassy("China", "Chengdu"));
|
||||
break;
|
||||
case "GUANGZHOU":
|
||||
visa.setUsEmbassy(USFactory.searchUSEmbassy("China", "Guangzhou"));
|
||||
break;
|
||||
case "RENEWAL":
|
||||
visa.setVisaEntry(VisaEntry.RENEWAL);
|
||||
case "SHANGHAI":
|
||||
visa.setUsEmbassy(USFactory.searchUSEmbassy("China", "Shanghai"));
|
||||
break;
|
||||
case "SHENYANG":
|
||||
visa.setUsEmbassy(USFactory.searchUSEmbassy("China", "Shenyang"));
|
||||
break;
|
||||
case "WUHAN":
|
||||
visa.setUsEmbassy(USFactory.searchUSEmbassy("China", "Wuhan"));
|
||||
break;
|
||||
case "HONGKONG":
|
||||
visa.setUsEmbassy(USFactory.searchUSEmbassy("China", "HongKong"));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -166,21 +209,25 @@ public class VisaImporter extends DataCrawl {
|
||||
if (StringUtils.isNotBlank(dateVisaInterview)) {
|
||||
visa.setDateVisaInterview(DateTimeFormat.forPattern("yyyy-MM-dd").parseDateTime(dateVisaInterview).toDate());
|
||||
}
|
||||
if (StringUtils.isNotBlank(dateVisaIssued)) {
|
||||
if (StringUtils.isNotBlank(dateVisaIssued) && !StringUtils.equals(dateVisaIssued, "0000-00-00")) {
|
||||
visa.setDateVisaIssued(DateTimeFormat.forPattern("yyyy-MM-dd").parseDateTime(dateVisaIssued).toDate());
|
||||
}
|
||||
|
||||
Element link = tds.get(10).select("a").first();
|
||||
|
||||
logger.debug(">>>>>>>>>>>>>>>[{}]", link.attr("href"));
|
||||
|
||||
updateVisaNote(visa, checkeeCaseNumber);
|
||||
|
||||
visa.setModifyDate(new Date());
|
||||
visa.setUser(getUser(checkeeCaseNumber));
|
||||
VisaFactory.save(visa);
|
||||
|
||||
if (isNewVisa) {
|
||||
CheckeeVisa checkeeVisa = new CheckeeVisa();
|
||||
checkeeVisa.setId(NumberUtils.toLong(checkeeCaseNumber));
|
||||
checkeeVisa.setVisaID(visa.getId());
|
||||
VisaFactory.save(checkeeVisa);
|
||||
}
|
||||
}
|
||||
|
||||
if (i == 2)
|
||||
break;
|
||||
// if (i == 2)
|
||||
// break;
|
||||
|
||||
}
|
||||
|
||||
@ -213,13 +260,48 @@ public class VisaImporter extends DataCrawl {
|
||||
*/
|
||||
private String getCheckeeCaseNumber(Element element) {
|
||||
String checkeeCaseNumber = null;
|
||||
|
||||
Element link = element.select("a").first();
|
||||
checkeeCaseNumber = StringUtils.substringAfterLast(link.attr("href"), "casenum=");
|
||||
|
||||
return checkeeCaseNumber;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param userName
|
||||
* @return
|
||||
*/
|
||||
private User getUser(String checkeeCaseNumber) {
|
||||
String userName = null;
|
||||
String userEmail = null;
|
||||
|
||||
User user = null;
|
||||
Document doc = null;
|
||||
|
||||
try {
|
||||
doc = Jsoup.connect("https://www.checkee.info/update.php?casenum=" + checkeeCaseNumber).get();
|
||||
Elements elements = doc.select("input[name=email_dis]");
|
||||
userEmail = elements.first().val();
|
||||
|
||||
elements = doc.select("b");
|
||||
userName = StringUtils.trim(StringUtils.substringAfter(elements.get(7).text(), "ID:"));
|
||||
|
||||
user = UserFactory.get(userName);
|
||||
if (user == null) {
|
||||
user = new User();
|
||||
user.setUserName(userName);
|
||||
user.setEmail(userEmail);
|
||||
UserFactory.save(user);
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
// TODO: handle exception
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
return user;
|
||||
}
|
||||
|
||||
private void updateVisaNote(Visa visa, String checkeeCaseNumber) {
|
||||
Document doc = null;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user