数据导入自动运行程序,部署在服务器上,每周运行一次,用于刷新数据

This commit is contained in:
YuCheng Hu 2017-07-25 14:51:16 -04:00
parent 1180a3ee6f
commit 2d416b95c4

View File

@ -1,14 +1,15 @@
package com.usvisatrack.services;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.jsoup.Jsoup;
@ -20,10 +21,12 @@ import org.slf4j.LoggerFactory;
import com.usvisatrack.core.common.data.VisaEntry;
import com.usvisatrack.core.common.data.VisaStatus;
import com.usvisatrack.core.dao.model.USEmbassy;
import com.usvisatrack.core.dao.model.CheckeeVisa;
import com.usvisatrack.core.dao.model.User;
import com.usvisatrack.core.dao.model.Visa;
import com.usvisatrack.core.dao.model.VisaClass;
import com.usvisatrack.core.factories.USFactory;
import com.usvisatrack.core.factories.UserFactory;
import com.usvisatrack.core.factories.VisaFactory;
import com.usvisatrack.services.common.DataCrawl;
@ -36,9 +39,7 @@ import com.usvisatrack.services.common.DataCrawl;
public class VisaImporter extends DataCrawl {
private static final Logger logger = LoggerFactory.getLogger(VisaImporter.class);
public final static String ITEM_FTP_FOLDER = "/home/ftp/com-bcodepot/item";
public final static String ITEM_PROCESSED_FOLDER = "/home/data/origin/com-bcodepot-www/processed/item";
public final static String APIDATA_CDN_FOLDER = "/home/cdn/com-bcodepot-www/data/apidata";
public final static String URL_CHECKEE = "https://www.checkee.info/main.php?dispdate=";
public static HashMap<String, VisaClass> visaClassMap = new HashMap<String, VisaClass>();
@ -50,7 +51,7 @@ public class VisaImporter extends DataCrawl {
options.addOption("l", true, "Clean all agents input information");
options.addOption("h", true, "Clean one agent by input email address");
CommandLineParser parser = new GnuParser();
CommandLineParser parser = new DefaultParser();
// parse command line
try {
@ -72,43 +73,70 @@ public class VisaImporter extends DataCrawl {
@Override
public void run() {
initData(); // Init data from database
crawlWebVisa();
crawlWebVisa(URL_CHECKEE + DateTimeFormat.forPattern("yyyy-MM").print(new DateTime()));
crawlWebVisa(URL_CHECKEE + DateTimeFormat.forPattern("yyyy-MM").print(new DateTime().minusMonths(1)));
}
/**
* crawlWebItem by parse web page
*/
private void crawlWebVisa() {
private void crawlWebVisa(String cURL) {
logger.error("Crawl Web Data to load item info.");
List<Visa> visaList = new ArrayList<Visa>();
boolean isNewVisa = false;
Document doc = null;
try {
doc = Jsoup.connect("https://www.checkee.info/main.php?dispdate=2017-07").get();
doc = Jsoup.connect(cURL).get();
Elements newsHeadlines = doc.select("table");
Element table = newsHeadlines.get(6);
int i = 0;
// int i = 0;
for (Element row : table.select("tr")) {
i++;
// i++;
Elements tds = row.select("td");
if (!StringUtils.equalsIgnoreCase("ID", tds.get(1).text())) {
Visa visa = new Visa();
String checkeeCaseNumber = getCheckeeCaseNumber(tds.get(0));
Visa visa = VisaFactory.getVisaFromCheckee(NumberUtils.toLong(checkeeCaseNumber));
if (visa == null) {
visa = new Visa();
isNewVisa = true;
}
// SET VISA CLASS
String visaClassName = StringUtils.trimToEmpty(tds.get(2).text());
if (StringUtils.isNotBlank(visaClassName)) {
switch (visaClassName) {
case "B1":
visaClassName = "B-1";
break;
case "B2":
visaClassName = "B-2";
break;
case "H1":
visaClassName = "H1-B";
break;
case "H4":
visaClassName = "H-4";
break;
case "F1":
visaClassName = "F-1";
break;
case "F2":
visaClassName = "F-2";
break;
case "J1":
visaClassName = "J-1";
break;
case "L1":
visaClassName = "L-1";
break;
}
visa.setVisaClass(visaClassMap.get(visaClassName));
@ -132,11 +160,26 @@ public class VisaImporter extends DataCrawl {
String usEmbassyName = StringUtils.upperCase(StringUtils.trimToEmpty(tds.get(4).text()));
if (StringUtils.isNotBlank(usEmbassyName)) {
switch (usEmbassyName) {
case "BEIJING":
visa.setUsEmbassy(USFactory.searchUSEmbassy("China", "BeiJing"));
break;
case "CHENGDU":
visa.setUsEmbassy(USFactory.searchUSEmbassy("China", "Chengdu"));
break;
case "GUANGZHOU":
visa.setUsEmbassy(USFactory.searchUSEmbassy("China", "Guangzhou"));
break;
case "RENEWAL":
visa.setVisaEntry(VisaEntry.RENEWAL);
case "SHANGHAI":
visa.setUsEmbassy(USFactory.searchUSEmbassy("China", "Shanghai"));
break;
case "SHENYANG":
visa.setUsEmbassy(USFactory.searchUSEmbassy("China", "Shenyang"));
break;
case "WUHAN":
visa.setUsEmbassy(USFactory.searchUSEmbassy("China", "Wuhan"));
break;
case "HONGKONG":
visa.setUsEmbassy(USFactory.searchUSEmbassy("China", "HongKong"));
break;
}
@ -166,21 +209,25 @@ public class VisaImporter extends DataCrawl {
if (StringUtils.isNotBlank(dateVisaInterview)) {
visa.setDateVisaInterview(DateTimeFormat.forPattern("yyyy-MM-dd").parseDateTime(dateVisaInterview).toDate());
}
if (StringUtils.isNotBlank(dateVisaIssued)) {
if (StringUtils.isNotBlank(dateVisaIssued) && !StringUtils.equals(dateVisaIssued, "0000-00-00")) {
visa.setDateVisaIssued(DateTimeFormat.forPattern("yyyy-MM-dd").parseDateTime(dateVisaIssued).toDate());
}
Element link = tds.get(10).select("a").first();
logger.debug(">>>>>>>>>>>>>>>[{}]", link.attr("href"));
updateVisaNote(visa, checkeeCaseNumber);
visa.setModifyDate(new Date());
visa.setUser(getUser(checkeeCaseNumber));
VisaFactory.save(visa);
if (isNewVisa) {
CheckeeVisa checkeeVisa = new CheckeeVisa();
checkeeVisa.setId(NumberUtils.toLong(checkeeCaseNumber));
checkeeVisa.setVisaID(visa.getId());
VisaFactory.save(checkeeVisa);
}
}
if (i == 2)
break;
// if (i == 2)
// break;
}
@ -213,13 +260,48 @@ public class VisaImporter extends DataCrawl {
*/
private String getCheckeeCaseNumber(Element element) {
String checkeeCaseNumber = null;
Element link = element.select("a").first();
checkeeCaseNumber = StringUtils.substringAfterLast(link.attr("href"), "casenum=");
return checkeeCaseNumber;
}
/**
*
* @param userName
* @return
*/
private User getUser(String checkeeCaseNumber) {
String userName = null;
String userEmail = null;
User user = null;
Document doc = null;
try {
doc = Jsoup.connect("https://www.checkee.info/update.php?casenum=" + checkeeCaseNumber).get();
Elements elements = doc.select("input[name=email_dis]");
userEmail = elements.first().val();
elements = doc.select("b");
userName = StringUtils.trim(StringUtils.substringAfter(elements.get(7).text(), "ID:"));
user = UserFactory.get(userName);
if (user == null) {
user = new User();
user.setUserName(userName);
user.setEmail(userEmail);
UserFactory.save(user);
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
return user;
}
private void updateVisaNote(Visa visa, String checkeeCaseNumber) {
Document doc = null;