78 lines
2.5 KiB
Python
78 lines
2.5 KiB
Python
|
import time
|
|||
|
import json
|
|||
|
import stomp
|
|||
|
from selenium.webdriver import Chrome
|
|||
|
from selenium.webdriver.chrome.service import Service
|
|||
|
from selenium.webdriver.common.by import By
|
|||
|
from selenium.webdriver.support import expected_conditions as EC
|
|||
|
from selenium.webdriver.support.ui import WebDriverWait
|
|||
|
|
|||
|
# 待查询信息
|
|||
|
|
|||
|
AMQHOST = "mq.ossez.com"
|
|||
|
AMQPORT = 61616
|
|||
|
AMQUSER = "artemis"
|
|||
|
AMQPASS = "artemis"
|
|||
|
TOPICNAME = "policyQueue"
|
|||
|
|
|||
|
begin_year = input("请输入最早年份:")
|
|||
|
current_timestamp = time.time()
|
|||
|
time_tuple = time.localtime(current_timestamp)
|
|||
|
end_year = time_tuple.tm_year
|
|||
|
|
|||
|
wait_time = 0.5 # 等待时间
|
|||
|
action_pixel = 100 # 鼠标滚动像素
|
|||
|
|
|||
|
# get网站
|
|||
|
chrome = Chrome(service=Service(r"C:\Users\yhu\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe"))
|
|||
|
chrome.get('https://www.isharkfly.com/')
|
|||
|
wait = WebDriverWait(chrome, 10)
|
|||
|
rowContent = chrome.find_elements(By.XPATH, '/html/body/div[3]/div/div/div/div[4]/div/table/tbody/tr')
|
|||
|
main_window = chrome.current_window_handle
|
|||
|
|
|||
|
|
|||
|
def send_mq(ctxInfo, ctx):
|
|||
|
for summary in ctxInfo.splitlines():
|
|||
|
if summary.startswith('索引号'):
|
|||
|
index_string = summary.split(':')[1]
|
|||
|
elif summary.startswith('发文日期'):
|
|||
|
policyDateTime = summary.split(':')[1]
|
|||
|
elif summary.startswith('名称'):
|
|||
|
policy_name = summary.split(':')[1]
|
|||
|
|
|||
|
data = {}
|
|||
|
data['policy_index_number'] = index_string
|
|||
|
data['policy_title'] = policy_name
|
|||
|
data['policy_content'] = ctx
|
|||
|
|
|||
|
hosts = [(AMQHOST, AMQPORT)]
|
|||
|
conn = stomp.Connection(host_and_ports=hosts, auto_content_length=False)
|
|||
|
conn.connect(username=AMQUSER, passcode=AMQPASS, wait=True)
|
|||
|
conn.send(body=json.dumps(data), destination=TOPICNAME)
|
|||
|
time.sleep(1)
|
|||
|
conn.disconnect()
|
|||
|
|
|||
|
|
|||
|
for tr in rowContent:
|
|||
|
|
|||
|
tdList = tr.find_elements("xpath", 'td')
|
|||
|
indexNumber = tdList[0].text
|
|||
|
docName = tdList[1].text
|
|||
|
tdList[1].find_element(By.TAG_NAME, 'a').click()
|
|||
|
docDate = tdList[2].text
|
|||
|
wait.until(EC.number_of_windows_to_be(2))
|
|||
|
# Loop through until we find a new window handle
|
|||
|
for window_handle in chrome.window_handles:
|
|||
|
if window_handle != main_window:
|
|||
|
chrome.switch_to.window(window_handle)
|
|||
|
break
|
|||
|
ctxInfo = chrome.find_element(By.XPATH, '/html/body/div[3]/div/div/div/div[2]/div/div[1]/ul').text
|
|||
|
ctx = chrome.find_element(By.XPATH, '/html/body/div[3]/div/div/div/div[2]/div/div[2]').text
|
|||
|
send_mq(ctxInfo, ctx)
|
|||
|
chrome.close()
|
|||
|
chrome.switch_to.window(main_window)
|
|||
|
print(docName)
|
|||
|
# break
|
|||
|
|
|||
|
chrome.quit()
|