import
json
import
re
import
PySimpleGUI as sg
import
requests
sg.change_look_and_feel(
'reddit'
)
def
tool_find(tx1,tx2,tx3):
#因为每次爬虫都要写好多正则所以封装成了一个函数
return
re.findall(re.
compile
(
"{}(.*?){}"
.
format
(tx1,tx2),re.S),tx3)
class
DOWNLOAD():
def
__init__(
self
):
#初始化,创建会话并获得cookie
self
.res
=
requests.session()
cookie
=
str
(
self
.res.get(
"https://www.lanzous.com"
).cookies)
self
.cookie
=
tool_find(
"<Cookie "
,
" for"
,cookie)[
0
]
print
(
"cookie: "
+
self
.cookie)
def
step1(
self
,url):
#获取下载页面
headers
=
{
'user-agent'
:
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36'
,
'cookie'
:
self
.cookie
}
r
=
self
.res.get(url,headers
=
headers)
#查看是否有密码
if
check_st1 :
=
tool_find(
'<div class="passwddiv-input">'
,
'/>'
,r.text):
#print('有密码')
url__,name
=
self
.download_psw(url,r)
else
:
url__,name
=
self
.download_nopsw(url,r)
return
url__,name
#__url是真实链接(但是后期还需要重定向),name是文件名,写文件用
def
download_psw(
self
,url,r):
b
=
sg.Window(
'请输入密码:'
,layout
=
[[sg.Text(
'请输入密码'
),sg.
Input
(),sg.Button(
'确定'
)]])
a,psw
=
b.Read()
psw
=
psw[
0
]
b.close()
sign
=
tool_find(
'data : ''
,
','
,r.text)[
0
].split(
'&'
)[
1
][
5
:]
data
=
{
'action'
:
'downprocess'
,
'sign'
: sign,
'p'
: psw
}
headers
=
{
'user-agent'
:
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36'
,
'referer'
:url,
'cookie'
:
self
.cookie
}
r
=
self
.res.post(
'https://www.lanzous.com/ajaxm.php'
,data
=
data,headers
=
headers).text
json1
=
json.loads(r)
return
json1[
'dom'
]
+
'/file/'
+
json1[
'url'
],json1[
'inf'
]
def
download_nopsw(
self
,url,r):
html
=
tool_find(
"<iframe"
,
"</iframe"
,r.text)[
1
]
url_
=
tool_find(
'src="'
,
'"'
,html)[
0
]
name
=
tool_find(
'<div style="font-size'
,
'/div'
,r.text)
name
=
tool_find(
'>'
,
'<'
,name[
0
])[
0
]
print
(name)
headers
=
{
'user-agent'
:
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36'
,
'referer'
:url,
'cookie'
:
self
.cookie
}
r
=
self
.res.get(
'https://www.lanzous.com/'
+
url_,headers
=
headers)
html
=
tool_find(
"var sg = '"
,'
';'
,r.text)[
0
]
#print(html)
data
=
{
'action'
:
'downprocess'
,
'sign'
: html,
'ves'
:
'1'
}
headers
=
{
'user-agent'
:
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36'
,
'cookie'
:
self
.cookie,
'referer'
:url_
}
r
=
self
.res.post(
'https://www.lanzous.com/ajaxm.php'
,data
=
data,headers
=
headers).json()
url
=
r[
'dom'
]
+
'/file/'
+
r[
'url'
]
return
url,name
def
download(
self
,url,name):
#重定向
headers
=
{
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0'
,
'Accept-Language'
:
'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2'
,
}
r
=
self
.res.head(url,headers
=
headers).headers[
'Location'
]
r
=
self
.res.get(r,headers
=
headers).content
#写入文件
with
open
(name,
'wb'
) as f:
f.write(r)
class
GUI():
def
__init__(
self
):
self
.work
=
DOWNLOAD()
def
setup(
self
):
tx1
=
sg.Text(
"请输入地址"
)
bx1
=
sg.
Input
()
bt1
=
sg.Button(
"确定"
)
layout
=
[[tx1,bx1,bt1]]
window1
=
sg.Window(
"蓝奏云下载"
,layout
=
layout)
a,b
=
window1.Read()
window1.close()
interim
=
self
.work.step1(b[
0
])
print
(interim)
self
.work.download(interim[
0
],interim[
1
])
x
=
GUI()
x.setup()