撰于 阅读 51

Python爬取12306登录 转载

转载的,就是为了学习用多点思路

# !/usr/bin/env python
# -*- coding:utf-8 -*-
"""12306登陆"""
import requests
import base64
import re
def get_point(index):
map = {
'1': '37,46',
'2': '110,46',
'3': '181,46',
'4': '253,46',
'5': '37,116',
'6': '110,116',
'7': '181,116',
'8': '253,116',
}
index = index.split(',')
temp = []
for item in index:
temp.append(map[item])
return ','.join(temp)

# 实例化一个Session
session = requests.Session()  # 自动的处理cookie
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
session.headers.update(headers)

# 伪装成浏览器
# 1. 访问 获取cookie
cookie_url = 'https://kyfw.12306.cn/otn/login/conf'
response = session.get(cookie_url)
# 2. 下载验证码
captcha_url = 'https://kyfw.12306.cn/passport/captcha/captcha-image64?login_site=E&module=login&rand=sjrand&1541686714134&callback=jQuery19109992892609908492_1541686371355&_=1541686371356'
response = session.get(captcha_url)
data = response.text
img_base64 = re.findall(r'"image":"(.*?)"', data)[0]
# img_base64 = 'data:image/jpg;base64,' + img_base64
# 转换成二级制数据
img_bytes = base64.b64decode(img_base64)
# 写到文件
with open('captcha.jpg', 'wb') as f:
f.write(img_bytes)

# 3校验验证码
check_captcha = 'https://kyfw.12306.cn/passport/captcha/captcha-check?callback=jQuery19109992892609908492_1541686371355&rand=sjrand&login_site=E&_=1541686371358'
response = session.get(check_captcha, params={'answer': get_point(input('请输入正确的序号>>>:'))} )#验证码输入,请输入正确验证码的序号(不是坐标),序号之间以,号隔开,验证码在程序目录下
res = response.text
code = re.findall(r'"result_code":"(.*?)"', res)[0]
if code == '4':
print('验证码校验成功')
# 4 校验用户名密码
login_url = 'https://kyfw.12306.cn/passport/web/login'
form_data = {
'username': '12306账号',#替换成自己的12306账号
'password': '12306密码',#替换成自己的12306密码
'appid': 'otn'
}
response = session.post(login_url, data=form_data)
res = response.json()
if res["result_code"] == 0:
print('用户名密码校验成功!')
# 5获取权限token
uamtk_url = 'https://kyfw.12306.cn/passport/web/auth/uamtk'
response = session.post(uamtk_url, data={'appid': 'otn'})
res = response.json()
if res["result_code"] == 0:
print('获取token成功')
# 6.校验 token
check_token_url = 'https://kyfw.12306.cn/otn/uamauthclient'
response = session.post(check_token_url, data={'tk': res['newapptk']})
print(response.text)

已有 4 条评论

  1. Each Midi Baccarat table will probably be limited to 18 gamers.

    回复
  2. Normally I do not read article on blogs, but
    I wish to say that this write-up very pressured me to check out
    and do it! Your writing taste has been surprised me. Thanks, very great post.

    回复
  3. If you want to get much from this paragraph then you
    have to apply such methods to your won webpage.

    回复
  4. There is certainly a great deal to find out about this subject.

    I really like all the points you have made.

    回复