import time from datetime import datetime, timedelta from tls_client import Session from requests.exceptions import Timeout import retrying import execjs from lxml import etree import json from loguru import logger import threading from queue import Queue import requests class GK: def __init__(self): self.search_flights_api = "https://booking.jetstar.com/hk/zh/booking/search-flights" with open('./akm逆向/逆向.js', encoding='utf-8') as f: js = f.read() self.ctx = execjs.compile(js) ja3 = "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,5-10-11-17613-45-43-0-23-18-65037-27-35-13-51-65281-16-41,4588-29-23-24,0" # 基础配置(指定浏览器指纹) self.session = Session( client_identifier="Chrome_120", # 预设浏览器指纹 random_tls_extension_order=True, # 随机tls指纹 ) self.session.http2 = True # self.session = requests.Session() self.headers = { "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept-language": "zh-CN,zh;q=0.9", "cache-control": "no-cache", "pragma": "no-cache", "priority": "u=0, i", "referer": "https://booking.jetstar.com/", "sec-ch-ua": "\"Google Chrome\";v=\"135\", \"Not-A.Brand\";v=\"8\", \"Chromium\";v=\"135\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-site": "same-origin", "sec-fetch-user": "?1", "upgrade-insecure-requests": "1", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" } def get_cookie(self): logger.debug('正在获取 cookie bm-sz...') # akm js file url akm_url = "https://booking.jetstar.com/MkuYlo/pcp/LD0/PPluEQ/1ik7QcJffXbmL53i/QTcvXmg7/KS5kC3N/VRQcB" data = { 'sensor_data': self.ctx.call('encrypt1') } response = self.session.post(akm_url, headers=self.headers, data=data, # proxies=self.proxies ) # print(response.http_version) # 应该显示 "HTTP/2" # print(response.text) bmsz = response.cookies.get_dict()['bm_sz'] data2 = { "sensor_data": self.ctx.call('encrypt2', bmsz) } data2 = json.dumps(data2) response2 = self.session.post(akm_url, headers=self.headers, data=data2, # proxies=self.proxies ) # print(response2.text) # print(response2.status_code) logger.debug(f'成功获取到 bm-sz :{bmsz}') @retrying.retry(stop_max_attempt_number=2) def send_get(self, url, params): response = self.session.get( url, headers=self.headers, params=params, ) if response.status_code == 302: url = 'https://booking.jetstar.com/hk/zh/booking/select-flights' response = self.session.get(url, headers=self.headers) print(response.status_code) print(response.text) return response @retrying.retry(stop_max_attempt_number=3) def get_data(self, datetime_str): params = { "s": "true", "adults": "1", # 成年人 "children": "0", # 儿童 "infants": "0", # 婴儿 "selectedclass1": "economy", # 选择类型:经济舱 "currency": "CNY", # 货币 "mon": "true", "channel": "DESKTOP", "origin1": "PVG", # 出发地 "destination1": "NRT", # 目的地 "departuredate1": datetime_str # 出发时间 } logger.info(f'正在采集 {datetime_str} 航班数据...') try: response = self.send_get(self.search_flights_api, params) if not response: return return datetime_str, response # print(response.text) except Exception as e: logger.error(e) # self.ip += 1 self.get_cookie() raise # return datetime_str, None def parse_data(self, datetime_str, response): if not response: return html = etree.HTML(response.text) data = html.xpath("//script[@id='bundle-data-v2']/text()") if data: json_data = json.loads(data[0]) print(datetime_str, ' => ', json_data) else: logger.warning(f'{datetime_str} 当天暂无数据 / 触发验证码') print(response.text) @staticmethod def gen_datetime(start_date, end_date): current_date = datetime.strptime(start_date, '%Y-%m-%d') end_date = datetime.strptime(end_date, '%Y-%m-%d') date_list = [] while current_date <= end_date: date_list.append(current_date.strftime('%Y-%m-%d')) # 转换为字符串格式存储 current_date += timedelta(days=1) return date_list def run(self, start_date, end_date): self.get_cookie() # 获取采集时间 for num, datetime_str in enumerate(self.gen_datetime(start_date, end_date)): # if num % 4 == 0: # self.session = requests.Session() # self.get_cookie() datetime_str, response = self.get_data(datetime_str) self.parse_data(datetime_str, response) time.sleep(2) if __name__ == '__main__': gk = GK() gk.run(start_date='2025-05-15', end_date='2025-05-27')