| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169 |
- import httpx
- import time
- from datetime import datetime, timedelta
- import retrying
- import execjs
- from lxml import etree
- import json
- from loguru import logger
- import threading
- from queue import Queue
- import requests
- class GK:
- def __init__(self):
- self.search_flights_api = "https://booking.jetstar.com/hk/zh/booking/search-flights"
- with open('./akm逆向/逆向.js', encoding='utf-8') as f:
- js = f.read()
- self.ctx = execjs.compile(js)
- ja3 = "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,5-10-11-17613-45-43-0-23-18-65037-27-35-13-51-65281-16-41,4588-29-23-24,0"
- self.client = httpx.Client(
- http2=True,
- verify=False
- )
- # self.session = requests.Session()
- self.headers = {
- "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
- "accept-language": "zh-CN,zh;q=0.9",
- "cache-control": "no-cache",
- "pragma": "no-cache",
- "priority": "u=0, i",
- "referer": "https://booking.jetstar.com/",
- "sec-ch-ua": "\"Google Chrome\";v=\"135\", \"Not-A.Brand\";v=\"8\", \"Chromium\";v=\"135\"",
- "sec-ch-ua-mobile": "?0",
- "sec-ch-ua-platform": "\"Windows\"",
- "sec-fetch-dest": "document",
- "sec-fetch-mode": "navigate",
- "sec-fetch-site": "same-origin",
- "sec-fetch-user": "?1",
- "upgrade-insecure-requests": "1",
- "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
- }
- def get_cookie(self):
- logger.debug('正在获取 cookie bm-sz...')
- # akm js file url
- akm_url = "https://booking.jetstar.com/MkuYlo/pcp/LD0/PPluEQ/1ik7QcJffXbmL53i/QTcvXmg7/KS5kC3N/VRQcB"
- data = {
- 'sensor_data': self.ctx.call('encrypt1')
- }
- response = self.client.post(akm_url, headers=self.headers, data=data,
- # proxies=self.proxies
- )
- # print(response.http_version) # 应该显示 "HTTP/2"
- # print(response.text)
- bmsz = response.cookies.get('bm_sz')
- data2 = {
- "sensor_data": self.ctx.call('encrypt2', bmsz)
- }
- data2 = json.dumps(data2)
- response2 = self.client.post(akm_url, headers=self.headers, data=data2,
- # proxies=self.proxies
- )
- # print(response2.text)
- # print(response2.status_code)
- logger.debug(f'成功获取到 bm-sz :{bmsz}')
- @retrying.retry(stop_max_attempt_number=2)
- def send_get(self, url, params):
- print(dict(self.client.cookies))
- response = self.client.get(
- url,
- headers=self.headers,
- params=params,
- )
- print(response.status_code)
- print(response.text)
- if response.status_code == 302:
- url = 'https://booking.jetstar.com/hk/zh/booking/select-flights'
- response = self.client.get(url, headers=self.headers)
- print(response.status_code)
- print(response.text)
- return response
- @retrying.retry(stop_max_attempt_number=3)
- def get_data(self, datetime_str):
- params = {
- "s": "true",
- "adults": "1", # 成年人
- "children": "0", # 儿童
- "infants": "0", # 婴儿
- "selectedclass1": "economy", # 选择类型:经济舱
- "currency": "CNY", # 货币
- "mon": "true",
- "channel": "DESKTOP",
- "origin1": "PVG", # 出发地
- "destination1": "NRT", # 目的地
- "departuredate1": datetime_str # 出发时间
- }
- logger.info(f'正在采集 {datetime_str} 航班数据...')
- try:
- response = self.send_get(self.search_flights_api, params)
- if not response:
- return
- return datetime_str, response
- # print(response.text)
- except Exception as e:
- logger.error(e)
- # self.ip += 1
- self.get_cookie()
- raise
- # return datetime_str, None
- def parse_data(self, datetime_str, response):
- if not response:
- return
- html = etree.HTML(response.text)
- data = html.xpath("//script[@id='bundle-data-v2']/text()")
- if data:
- json_data = json.loads(data[0])
- print(datetime_str, ' => ', json_data)
- else:
- logger.warning(f'{datetime_str} 当天暂无数据 / 触发验证码')
- print(response.text)
- @staticmethod
- def gen_datetime(start_date, end_date):
- current_date = datetime.strptime(start_date, '%Y-%m-%d')
- end_date = datetime.strptime(end_date, '%Y-%m-%d')
- date_list = []
- while current_date <= end_date:
- date_list.append(current_date.strftime('%Y-%m-%d')) # 转换为字符串格式存储
- current_date += timedelta(days=1)
- return date_list
- def run(self, start_date, end_date):
- self.get_cookie()
- # 获取采集时间
- for num, datetime_str in enumerate(self.gen_datetime(start_date, end_date)):
- # if num % 4 == 0:
- # self.session = requests.Session()
- # self.get_cookie()
- datetime_str, response = self.get_data(datetime_str)
- self.parse_data(datetime_str, response)
- time.sleep(2)
- if __name__ == '__main__':
- gk = GK()
- gk.run(start_date='2025-05-15', end_date='2025-05-27')
|