from datetime import datetime, timedelta from curl_cffi import requests # import requests from requests.exceptions import Timeout import retrying import execjs import json import time from loguru import logger class GK: def __init__(self): self.akm_url = 'https://www.jetstar.com/c9NCrswc1aL9a_poKlkL/Y5OpJhrfcSzf/MwUVAg/SE0/adRNiWCo' self.search_flights_api = "https://booking.jetstar.com/hk/zh/booking/search-flights" self.headers = { "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept-language": "zh-CN,zh;q=0.9", "cache-control": "no-cache", "pragma": "no-cache", "priority": "u=0, i", "referer": "https://booking.jetstar.com/", "sec-ch-ua": "\"Google Chrome\";v=\"135\", \"Not-A.Brand\";v=\"8\", \"Chromium\";v=\"135\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-site": "same-origin", "sec-fetch-user": "?1", "upgrade-insecure-requests": "1", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36" } with open('../akm/akm_5.26.js', encoding='utf-8') as f: js = f.read() self.ctx = execjs.compile(js) self.session = requests.Session() def get_cookie(self): # akm js file url akm_url = self.akm_url statusTs = str(int(time.time() * 1000)) data = { 'sensor_data': self.ctx.call('encrypt1', statusTs) } response = self.session.post(akm_url, headers=self.headers, verify=False, data=data, impersonate='chrome100', # 不指定 impersonate 时,TLS 指纹是 curl 原生的,而非浏览器指纹(依旧过不了检测)。需显式设置该参数以绕过 TLS 指纹检测 http_version=2 ) logger.info(f'第一次请求cookie bmsz 状态吗 {response.status_code}') print('内容 ', response.text) print('响应cookie ', response.cookies.get_dict()) bmsz = response.cookies.get_dict()['bm_sz'] print('bmsz =>', bmsz) data2 = { "sensor_data": self.ctx.call('encrypt2', statusTs, bmsz) } data2 = json.dumps(data2) response2 = self.session.post(akm_url, headers=self.headers, data=data2, verify=False, impersonate='chrome101', http_version=2 ) logger.info(f'第2次请求验证 cookie bmsz 状态吗 {response.status_code}') print(response2.text) print(response2.cookies.get_dict()) @retrying.retry(stop_max_attempt_number=3) def send_get(self, url, params): try: response = self.session.get( url, headers=self.headers, params=params, timeout=20, verify=False, # proxies=proxies impersonate='chrome99', http_version=2 ) response.raise_for_status() print('请求返回cookie', response.cookies.get_dict()) return response # 捕获超时请求,可能是cookie不行了,更新后报错触发重试 except Timeout as e: print(f"请求超时,重新更换cookie: {e}") # # 清除旧 Cookie # self.session.cookies.clear() # print(self.session.cookies.get_dict()) # self.get_cookie() raise # except Exception as e: # logger.error(e) # # return None def get_data(self, datetime_str): params = { "s": "true", "adults": "1", # 成年人 "children": "0", # 儿童 "infants": "0", # 婴儿 "selectedclass1": "economy", # 选择类型:经济舱 "currency": "CNY", # 货币 "mon": "true", "channel": "DESKTOP", "origin1": "PVG", # 出发地 "destination1": "NRT", # 目的地 "departuredate1": datetime_str # 出发时间 } response = self.send_get(self.search_flights_api, params) if not response: return # print(response.text) print(response) from lxml import etree import json html = etree.HTML(response.text) data = html.xpath("//script[@id='bundle-data-v2']/text()") if data: json_data = json.loads(data[0]) print(datetime_str, ' => ', json_data) else: print(response.text) @staticmethod def gen_datetime(start_date, end_date): """生成抓取日期: 2025-03-09 传入这种格式""" # 将字符串转换为 datetime 对象 current_date = datetime.strptime(start_date, '%Y-%m-%d') end_date = datetime.strptime(end_date, '%Y-%m-%d') # 初始化一个空列表来存储日期 date_list = [] # 使用 timedelta 循环遍历每一天 while current_date <= end_date: date_list.append(current_date.strftime('%Y-%m-%d')) # 转换为字符串格式存储 current_date += timedelta(days=1) return date_list def run(self, start_date, end_date): self.get_cookie() # # 获取采集时间 for num, datetime_str in enumerate(self.gen_datetime(start_date, end_date), start=1): # if num % 5 == 0: # self.session = requests.Session() # self.get_cookie() self.get_data(datetime_str) # # time.sleep(1) if __name__ == '__main__': gk = GK() gk.run(start_date='2025-05-29', end_date='2025-06-29')