import httpx import time from datetime import datetime, timedelta import retrying import execjs from lxml import etree import json from loguru import logger import threading from queue import Queue import requests class GK: def __init__(self): self.search_flights_api = "https://booking.jetstar.com/hk/zh/booking/search-flights" with open('../akm/逆向.js', encoding='utf-8') as f: js = f.read() self.ctx = execjs.compile(js) ja3 = "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,5-10-11-17613-45-43-0-23-18-65037-27-35-13-51-65281-16-41,4588-29-23-24,0" self.client = httpx.Client( http2=True, verify=False ) # self.session = requests.Session() self.headers = { "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept-language": "zh-CN,zh;q=0.9", "cache-control": "no-cache", "pragma": "no-cache", "priority": "u=0, i", "referer": "https://booking.jetstar.com/", "sec-ch-ua": "\"Google Chrome\";v=\"135\", \"Not-A.Brand\";v=\"8\", \"Chromium\";v=\"135\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-site": "same-origin", "sec-fetch-user": "?1", "upgrade-insecure-requests": "1", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" } def get_cookie(self): logger.debug('正在获取 cookie bm-sz...') # akm js file url akm_url = "https://booking.jetstar.com/MkuYlo/pcp/LD0/PPluEQ/1ik7QcJffXbmL53i/QTcvXmg7/KS5kC3N/VRQcB" data = { 'sensor_data': self.ctx.call('encrypt1') } response = self.client.post(akm_url, headers=self.headers, data=data, # proxies=self.proxies ) # print(response.http_version) # 应该显示 "HTTP/2" # print(response.text) bmsz = response.cookies.get('bm_sz') data2 = { "sensor_data": self.ctx.call('encrypt2', bmsz) } data2 = json.dumps(data2) response2 = self.client.post(akm_url, headers=self.headers, data=data2, # proxies=self.proxies ) # print(response2.text) # print(response2.status_code) logger.debug(f'成功获取到 bm-sz :{bmsz}') @retrying.retry(stop_max_attempt_number=2) def send_get(self, url, params): print(dict(self.client.cookies)) response = self.client.get( url, headers=self.headers, params=params, ) print(response.status_code) print(response.text) if response.status_code == 302: url = 'https://booking.jetstar.com/hk/zh/booking/select-flights' response = self.client.get(url, headers=self.headers) print(response.status_code) print(response.text) return response @retrying.retry(stop_max_attempt_number=3) def get_data(self, datetime_str): params = { "s": "true", "adults": "1", # 成年人 "children": "0", # 儿童 "infants": "0", # 婴儿 "selectedclass1": "economy", # 选择类型:经济舱 "currency": "CNY", # 货币 "mon": "true", "channel": "DESKTOP", "origin1": "PVG", # 出发地 "destination1": "NRT", # 目的地 "departuredate1": datetime_str # 出发时间 } logger.info(f'正在采集 {datetime_str} 航班数据...') try: response = self.send_get(self.search_flights_api, params) if not response: return return datetime_str, response # print(response.text) except Exception as e: logger.error(e) # self.ip += 1 self.get_cookie() raise # return datetime_str, None def parse_data(self, datetime_str, response): if not response: return html = etree.HTML(response.text) data = html.xpath("//script[@id='bundle-data-v2']/text()") if data: json_data = json.loads(data[0]) print(datetime_str, ' => ', json_data) else: logger.warning(f'{datetime_str} 当天暂无数据 / 触发验证码') print(response.text) @staticmethod def gen_datetime(start_date, end_date): current_date = datetime.strptime(start_date, '%Y-%m-%d') end_date = datetime.strptime(end_date, '%Y-%m-%d') date_list = [] while current_date <= end_date: date_list.append(current_date.strftime('%Y-%m-%d')) # 转换为字符串格式存储 current_date += timedelta(days=1) return date_list def run(self, start_date, end_date): self.get_cookie() # 获取采集时间 for num, datetime_str in enumerate(self.gen_datetime(start_date, end_date)): # if num % 4 == 0: # self.session = requests.Session() # self.get_cookie() datetime_str, response = self.get_data(datetime_str) self.parse_data(datetime_str, response) time.sleep(2) if __name__ == '__main__': gk = GK() gk.run(start_date='2025-05-15', end_date='2025-05-27')