标签 Python 下的文章

使用Python抓取12306火车信息,存入csv文件,仅介绍实现思路


马上就要年底过年了,提前去网站上请回家的票。神奇的抢到了硬座票,心里很是欣慰,今年终于不用站着回家了。
但是开心的时间总是那么短暂,一周过后我回来继续抢返程的火车票。真的是查了好几遍,都没有返程的车,多方验证之后发现没有返程的列车,所以……

网站抓取思路

打开12306的官方网站,按F12打开你的开发人员工具,找到Network标签,F5刷新一下,获取车站信息。

列车车次信息列表

获取车站信息

部分数据

var station_names ='@bjb|北京北|VAP|beijingbei|bjb|0@bjd|北京东|BOP|beijingdong|bjd|1@bji|北京|BJP|beijing|bj|2@bjn|北京南|VNP|beijingnan|bjn|3@bjx|北京西|BXP|beijingxi|bjx|4@gzn|广州南|IZQ|guangzhounan|gzn|…………
获取所有车次信息(朋友提供)

列车车次信息列表

车次详细信息查询

列车车次信息查询页面

查询到的信息

车次详细信息查询接口(G126为例)

按自己需求实现部分功能,部分功能不对外开放,仅介绍抓取思路
# -*- coding:utf-8 -*-
'''
车次信息查询页面
https://kyfw.12306.cn/otn/queryTrainInfo/init
车次信息查询接口
https://kyfw.12306.cn/otn/queryTrainInfo/query?leftTicketDTO.train_no=5i0000G12600&leftTicketDTO.train_date=2020-01-24&rand_code=

车站信息
https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9138
车次信息
https://kyfw.12306.cn/otn/resources/js/query/train_list.js
'''
import json

import requests


class MainApp(object):
    def __init__(self):
        self.session = requests.session()

    def start(self):
        self.get_station_name()
        self.get_train_list()
        pass

    # 抓取火车车次信息
    def get_train_list(self):
        line = self.session.get('https://kyfw.12306.cn/otn/resources/js/query/train_list.js').text
        item = line.replace('var train_list =', '')
        json_now = json.loads(item)['2019-10-10']
        with open('train_list.csv', 'w+') as train_list_csv:
            train_list_csv.write('字段1,字段2,字段3,字段4,字段5,字段6,字段7')
            # 高铁
            for g in json_now['G']:
                print(g)
                no = g['train_no']
                station_train_code = str(g['station_train_code'])
                che = station_train_code[0:station_train_code.find('(')]
                se = station_train_code[station_train_code.find('(') + 1:-1].split('-')
                train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '高铁', 'G', che, se[0], se[1], '2019-10-10'))
            # 城际高铁
            for c in json_now['C']:
                print(c)
                no = c['train_no']
                station_train_code = str(c['station_train_code'])
                che = station_train_code[0:station_train_code.find('(')]
                se = station_train_code[station_train_code.find('(') + 1:-1].split('-')
                train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '城际高铁', 'C', che, se[0], se[1], '2019-10-10'))
            # 动车
            for d in json_now['D']:
                print(d)
                no = d['train_no']
                station_train_code = str(d['station_train_code'])
                che = station_train_code[0:station_train_code.find('(')]
                se = station_train_code[station_train_code.find('(') + 1:-1].split('-')
                train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '动车', 'D', che, se[0], se[1], '2019-10-10'))
            # 直达
            for z in json_now['Z']:
                print(z)
                no = z['train_no']
                station_train_code = str(z['station_train_code'])
                che = station_train_code[0:station_train_code.find('(')]
                se = station_train_code[station_train_code.find('(') + 1:-1].split('-')
                train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '直达', 'Z', che, se[0], se[1], '2019-10-10'))
            # 特快
            for t in json_now['T']:
                print(t)
                no = t['train_no']
                station_train_code = str(t['station_train_code'])
                che = station_train_code[0:station_train_code.find('(')]
                se = station_train_code[station_train_code.find('(') + 1:-1].split('-')
                train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '特快', 'T', che, se[0], se[1], '2019-10-10'))
            # 快速
            for k in json_now['K']:
                print(k)
                no = k['train_no']
                station_train_code = str(k['station_train_code'])
                che = station_train_code[0:station_train_code.find('(')]
                se = station_train_code[station_train_code.find('(') + 1:-1].split('-')
                train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '快速', 'K', che, se[0], se[1], '2019-10-10'))
            # 其他
            for o in json_now['O']:
                print(o)
                no = o['train_no']
                station_train_code = str(o['station_train_code'])
                che = station_train_code[0:station_train_code.find('(')]
                se = station_train_code[station_train_code.find('(') + 1:-1].split('-')
                train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '其他', 'O', che, se[0], se[1], '2019-10-10'))

    # 抓取火车站信息
    def get_station_name(self):
        line = self.session.get('https://kyfw.12306.cn/otn/resources/js/framework/station_name.js').text
        item = line.replace('var station_names =\'', '').replace('\';', '').split('|')
        print(item)
        with open('station_name.csv', 'w+') as station_name_csv:
            station_name_csv.write('字段1,字段2,字段3,字段4,字段5,字段6')
            for i in item:
                if '@' in i:
                    station_name_csv.write('\n%s' % i)
                else:
                    station_name_csv.write(',%s' % i)


if __name__ == '__main__':
    main = MainApp()
    main.start()

抓取数据存入csv,后续会入库,提供个性化的查询,也会给出快捷跳转至12306官网查询页面,直接进行购票。

模拟正常人行为进行查询获取公共数据,防止对网站造成压力(我相信我做不到)

不做商业使用,进个人网站展示,展示功能不保证高可用,仅作学习交流分享出现。