diff --git a/.idea/checkstyle-idea.xml b/.idea/checkstyle-idea.xml new file mode 100644 index 0000000..f0c4984 --- /dev/null +++ b/.idea/checkstyle-idea.xml @@ -0,0 +1,16 @@ + + + + 10.12.2 + JavaOnly + true + + + \ No newline at end of file diff --git a/.idea/codeStyles/Project.xml b/.idea/codeStyles/Project.xml deleted file mode 100644 index 919ce1f..0000000 --- a/.idea/codeStyles/Project.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml deleted file mode 100644 index a55e7a1..0000000 --- a/.idea/codeStyles/codeStyleConfig.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/dataSources.xml b/.idea/dataSources.xml new file mode 100644 index 0000000..91cfaab --- /dev/null +++ b/.idea/dataSources.xml @@ -0,0 +1,12 @@ + + + + + postgresql + true + org.postgresql.Driver + jdbc:postgresql://nas1120:5433/discourse + $ProjectFileDir$ + + + \ No newline at end of file diff --git a/.idea/jpa-buddy.xml b/.idea/jpa-buddy.xml new file mode 100644 index 0000000..966d5f5 --- /dev/null +++ b/.idea/jpa-buddy.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 7582356..72d0c33 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,6 +1,12 @@ - + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml index 7b27fe6..e1cb261 100644 --- a/.idea/modules.xml +++ b/.idea/modules.xml @@ -2,7 +2,7 @@ - + \ No newline at end of file diff --git a/.idea/other.xml b/.idea/other.xml new file mode 100644 index 0000000..640fd80 --- /dev/null +++ b/.idea/other.xml @@ -0,0 +1,7 @@ + + + + + \ No newline at end of file diff --git a/.idea/python-tutorials.iml b/.idea/python-tutorials.iml new file mode 100644 index 0000000..77e413b --- /dev/null +++ b/.idea/python-tutorials.iml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/sqldialects.xml b/.idea/sqldialects.xml new file mode 100644 index 0000000..df5b4fa --- /dev/null +++ b/.idea/sqldialects.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/README.md b/README.md index 766e236..8b92428 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,8 @@ 欢迎来到 Python 教程和工具库 -GitHub 上有关 CWIKIUS 的项目:[https://github.com/cwiki-us-docs/cwikius-docs](https://github.com/cwiki-us-docs/cwikius-docs) +GitHub 上有关 CWIKIUS +的项目:[https://github.com/cwiki-us-docs/cwikius-docs](https://github.com/cwiki-us-docs/cwikius-docs) [CONTACT](CONTACT.md ':include') @@ -12,10 +13,10 @@ GitHub 上有关 CWIKIUS 的项目:[https://github.com/cwiki-us-docs/cwikius-d 在这里对原作者表示感谢。 -| 网站名称 | URL | NOTE | -|---|---|---| -| WWW.CWIKIUS.CN | [http://www.cwikius.cn/](http://www.cwikius.cn/) | CWIKIUS.CN 一个有独立思考和温度的清新站 | -| [Python - 100天从新手到大师](https://github.com/jackfrued/Python-100-Days) | [https://github.com/jackfrued/Python-100-Days](https://github.com/jackfrued/Python-100-Days) | 我们参考了原作者的一些代码和测试用例。 | -| [Python语言基础50课](https://github.com/jackfrued/Python-Core-50-Courses) | [https://github.com/jackfrued/Python-Core-50-Courses](https://github.com/jackfrued/Python-Core-50-Courses) | 参考了原作者的部分内容和编排目录 | -| [Python 脚本收集](https://github.com/fnplus/Python-scripts-collection) | [https://github.com/fnplus/Python-scripts-collection](https://github.com/fnplus/Python-scripts-collection) | 针对 Python 使用的一些实际脚本的收集 | +| 网站名称 | URL | NOTE | +|----------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------|---------------------------| +| WWW.CWIKIUS.CN | [http://www.cwikius.cn/](http://www.cwikius.cn/) | CWIKIUS.CN 一个有独立思考和温度的清新站 | +| [Python - 100天从新手到大师](https://github.com/jackfrued/Python-100-Days) | [https://github.com/jackfrued/Python-100-Days](https://github.com/jackfrued/Python-100-Days) | 我们参考了原作者的一些代码和测试用例。 | +| [Python语言基础50课](https://github.com/jackfrued/Python-Core-50-Courses) | [https://github.com/jackfrued/Python-Core-50-Courses](https://github.com/jackfrued/Python-Core-50-Courses) | 参考了原作者的部分内容和编排目录 | +| [Python 脚本收集](https://github.com/fnplus/Python-scripts-collection) | [https://github.com/fnplus/Python-scripts-collection](https://github.com/fnplus/Python-scripts-collection) | 针对 Python 使用的一些实际脚本的收集 | diff --git a/python-tutorials.iml b/python-tutorials.iml index ad3c0a3..8021953 100644 --- a/python-tutorials.iml +++ b/python-tutorials.iml @@ -1,5 +1,5 @@ - + diff --git a/requirements.txt b/requirements.txt index d035d62..1bfd113 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,5 +6,8 @@ ruamel.yaml qrcode~=6.1 Pillow pyautogui +requests +numpy +stomp.py selenium -requests \ No newline at end of file +xlrd \ No newline at end of file diff --git a/tests/DoCcrawler.py b/tests/DoCcrawler.py new file mode 100644 index 0000000..4ca51aa --- /dev/null +++ b/tests/DoCcrawler.py @@ -0,0 +1,77 @@ +import time +import json +import stomp +from selenium.webdriver import Chrome +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait + +# 待查询信息 + +AMQHOST = "mq.ossez.com" +AMQPORT = 61616 +AMQUSER = "artemis" +AMQPASS = "artemis" +TOPICNAME = "policyQueue" + +begin_year = input("请输入最早年份:") +current_timestamp = time.time() +time_tuple = time.localtime(current_timestamp) +end_year = time_tuple.tm_year + +wait_time = 0.5 # 等待时间 +action_pixel = 100 # 鼠标滚动像素 + +# get网站 +chrome = Chrome(service=Service(r"C:\Users\yhu\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe")) +chrome.get('https://www.isharkfly.com/') +wait = WebDriverWait(chrome, 10) +rowContent = chrome.find_elements(By.XPATH, '/html/body/div[3]/div/div/div/div[4]/div/table/tbody/tr') +main_window = chrome.current_window_handle + + +def send_mq(ctxInfo, ctx): + for summary in ctxInfo.splitlines(): + if summary.startswith('索引号'): + index_string = summary.split(':')[1] + elif summary.startswith('发文日期'): + policyDateTime = summary.split(':')[1] + elif summary.startswith('名称'): + policy_name = summary.split(':')[1] + + data = {} + data['policy_index_number'] = index_string + data['policy_title'] = policy_name + data['policy_content'] = ctx + + hosts = [(AMQHOST, AMQPORT)] + conn = stomp.Connection(host_and_ports=hosts, auto_content_length=False) + conn.connect(username=AMQUSER, passcode=AMQPASS, wait=True) + conn.send(body=json.dumps(data), destination=TOPICNAME) + time.sleep(1) + conn.disconnect() + + +for tr in rowContent: + + tdList = tr.find_elements("xpath", 'td') + indexNumber = tdList[0].text + docName = tdList[1].text + tdList[1].find_element(By.TAG_NAME, 'a').click() + docDate = tdList[2].text + wait.until(EC.number_of_windows_to_be(2)) + # Loop through until we find a new window handle + for window_handle in chrome.window_handles: + if window_handle != main_window: + chrome.switch_to.window(window_handle) + break + ctxInfo = chrome.find_element(By.XPATH, '/html/body/div[3]/div/div/div/div[2]/div/div[1]/ul').text + ctx = chrome.find_element(By.XPATH, '/html/body/div[3]/div/div/div/div[2]/div/div[2]').text + send_mq(ctxInfo, ctx) + chrome.close() + chrome.switch_to.window(main_window) + print(docName) + # break + +chrome.quit() diff --git a/tests/PolicyExpressCcrawler.py b/tests/PolicyExpressCcrawler.py new file mode 100644 index 0000000..0b22d41 --- /dev/null +++ b/tests/PolicyExpressCcrawler.py @@ -0,0 +1,69 @@ +import time +import requests +import stomp +import json +from selenium.webdriver import Chrome +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait + +# 待查询信息 + +AMQHOST = "mq.ossez.com" +AMQPORT = 61616 +AMQUSER = "artemis" +AMQPASS = "artemis" +TOPICNAME = "policyQueue" + + +def send_mq(data): + hosts = [(AMQHOST, AMQPORT)] + conn = stomp.Connection(host_and_ports=hosts, auto_content_length=False) + conn.connect(username=AMQUSER, passcode=AMQPASS, wait=True) + conn.send(body=json.dumps(data), destination=TOPICNAME) + + conn.disconnect() + + +# https://policyapi.10nservice.com/api/v1/WebPolicy/GetSearchPageList?pageSize=50&pageIndex=1&postType=99&release=&years=&area=430100&platformId=3479085520414310401 + +def do_data_crawl(page_index): + URL = "https://policyapi.10nservice.com/api/v1/WebPolicy/GetSearchPageList" + PARAMS = {'pageSize': 5000, 'pageIndex': page_index, 'postType': 99, 'postType': '', 'years': '', 'area': 430100, + 'platformId': 3479085520414310401} + + # sending get request and saving the response as response object + r = requests.get(url=URL, params=PARAMS) + + # extracting data in json format + responseData = r.json() + + # Loop List + for policyList in json.loads(responseData['Data']): + pid = policyList['PID'] + policyTitle = policyList['PolicyTitle'] + detail_url = "https://policyapi.10nservice.com/api/v1/WebPolicy/GetAdoptDetails?pid=" + pid + "&platformId=3479085520414310401" + request_detail_data = requests.get(url=detail_url).json() + province_id = json.loads(request_detail_data['Data'])['ProvinceID'] + + data = {} + data['policy_index_number'] = pid + data['policy_title'] = policyTitle + data['policy_content'] = json.loads(request_detail_data['Data'])['PolicyText'] + data['policy_tag'] = json.loads(request_detail_data['Data'])['PolicyKey'] + data['release_time'] = json.loads(request_detail_data['Data'])['ReleaseTime'] + data['start_time'] = json.loads(request_detail_data['Data'])['StarTime'] + data['end_time'] = json.loads(request_detail_data['Data'])['EndTime'] + data['source_name'] = json.loads(request_detail_data['Data'])['Source'] + data['source_url'] = json.loads(request_detail_data['Data'])['PageUrl'] + + send_mq(data) + + print(policyTitle) + # break + + +for i in range(3, 6): + do_data_crawl(i) + # break diff --git a/tests/QRCode.py b/tests/QRCode.py index d11b80c..533409c 100644 --- a/tests/QRCode.py +++ b/tests/QRCode.py @@ -9,7 +9,7 @@ import qrcode.image.svg image_path = "resources/token_qr.png" qr_string = "https://www.ossez.com/c/open-source/python/14" -print(qr_string) +print(rebase = hex(base62.decode(base, base62.CHARSET_INVERTED))[2:].zfill(40)) img = qrcode.make(qr_string) img.save(image_path) \ No newline at end of file diff --git a/tests/StompArtemis.py b/tests/StompArtemis.py new file mode 100644 index 0000000..86ce286 --- /dev/null +++ b/tests/StompArtemis.py @@ -0,0 +1,16 @@ +import time + +import stomp + +AMQHOST = "nas1120" +AMQPORT = 61616 +AMQUSER = "artemis" +AMQPASS = "artemis" +TOPICNAME = "remotingQueue" + +hosts = [(AMQHOST, AMQPORT)] +conn = stomp.Connection(host_and_ports=hosts, auto_content_length=False) +conn.connect(username=AMQUSER, passcode=AMQPASS, wait=True) +conn.send(body='Love Python', destination=TOPICNAME) +time.sleep(320) +conn.disconnect() diff --git a/tests/resources/token_qr.png b/tests/resources/token_qr.png new file mode 100644 index 0000000..ed9c284 Binary files /dev/null and b/tests/resources/token_qr.png differ