78 lines
2.5 KiB
Python
78 lines
2.5 KiB
Python
import time
|
||
import json
|
||
import stomp
|
||
from selenium.webdriver import Chrome
|
||
from selenium.webdriver.chrome.service import Service
|
||
from selenium.webdriver.common.by import By
|
||
from selenium.webdriver.support import expected_conditions as EC
|
||
from selenium.webdriver.support.ui import WebDriverWait
|
||
|
||
# 待查询信息
|
||
|
||
AMQHOST = "mq.ossez.com"
|
||
AMQPORT = 61616
|
||
AMQUSER = "artemis"
|
||
AMQPASS = "artemis"
|
||
TOPICNAME = "policyQueue"
|
||
|
||
begin_year = input("请输入最早年份:")
|
||
current_timestamp = time.time()
|
||
time_tuple = time.localtime(current_timestamp)
|
||
end_year = time_tuple.tm_year
|
||
|
||
wait_time = 0.5 # 等待时间
|
||
action_pixel = 100 # 鼠标滚动像素
|
||
|
||
# get网站
|
||
chrome = Chrome(service=Service(r"C:\Users\yhu\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe"))
|
||
chrome.get('https://www.isharkfly.com/')
|
||
wait = WebDriverWait(chrome, 10)
|
||
rowContent = chrome.find_elements(By.XPATH, '/html/body/div[3]/div/div/div/div[4]/div/table/tbody/tr')
|
||
main_window = chrome.current_window_handle
|
||
|
||
|
||
def send_mq(ctxInfo, ctx):
|
||
for summary in ctxInfo.splitlines():
|
||
if summary.startswith('索引号'):
|
||
index_string = summary.split(':')[1]
|
||
elif summary.startswith('发文日期'):
|
||
policyDateTime = summary.split(':')[1]
|
||
elif summary.startswith('名称'):
|
||
policy_name = summary.split(':')[1]
|
||
|
||
data = {}
|
||
data['policy_index_number'] = index_string
|
||
data['policy_title'] = policy_name
|
||
data['policy_content'] = ctx
|
||
|
||
hosts = [(AMQHOST, AMQPORT)]
|
||
conn = stomp.Connection(host_and_ports=hosts, auto_content_length=False)
|
||
conn.connect(username=AMQUSER, passcode=AMQPASS, wait=True)
|
||
conn.send(body=json.dumps(data), destination=TOPICNAME)
|
||
time.sleep(1)
|
||
conn.disconnect()
|
||
|
||
|
||
for tr in rowContent:
|
||
|
||
tdList = tr.find_elements("xpath", 'td')
|
||
indexNumber = tdList[0].text
|
||
docName = tdList[1].text
|
||
tdList[1].find_element(By.TAG_NAME, 'a').click()
|
||
docDate = tdList[2].text
|
||
wait.until(EC.number_of_windows_to_be(2))
|
||
# Loop through until we find a new window handle
|
||
for window_handle in chrome.window_handles:
|
||
if window_handle != main_window:
|
||
chrome.switch_to.window(window_handle)
|
||
break
|
||
ctxInfo = chrome.find_element(By.XPATH, '/html/body/div[3]/div/div/div/div[2]/div/div[1]/ul').text
|
||
ctx = chrome.find_element(By.XPATH, '/html/body/div[3]/div/div/div/div[2]/div/div[2]').text
|
||
send_mq(ctxInfo, ctx)
|
||
chrome.close()
|
||
chrome.switch_to.window(main_window)
|
||
print(docName)
|
||
# break
|
||
|
||
chrome.quit()
|