from datetime import datetime, timedelta import requests from requests.exceptions import Timeout import retrying import execjs from lxml import etree import json from loguru import logger import threading from queue import Queue # # 禁用SSL相关警告 (推荐) from requests.packages.urllib3.exceptions import InsecureRequestWarning import warnings requests.packages.urllib3.disable_warnings(InsecureRequestWarning) warnings.filterwarnings("ignore", category=DeprecationWarning) # 可选:过滤其他警告 class GK: def __init__(self): self.search_flights_api = "https://booking.jetstar.com/hk/zh/booking/search-flights" self.headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36', 'Accept-Encoding': 'gzip, deflate, br', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded', 'accept-language': 'zh-CN,zh;q=0.9', 'cache-control': 'no-cache', 'pragma': 'no-cache', 'priority': 'u=0, i', 'referer': 'https://booking.jetstar.com/', 'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1' } with open('./akm逆向/逆向.js', encoding='utf-8') as f: js = f.read() self.ctx = execjs.compile(js) self.session = requests.Session() # self.ip = 100000000000 # self.proxies = { # 'http': f'http://B_3351_HK___5_ss-{self.ip}:ev2pjj@proxy.renlaer.com:7778', # 'https': f'http://B_3351_HK___5_ss-{self.ip}:ev2pjj@proxy.renlaer.com:7778' # } # self.proxies = { # 'http': f'127.0.0.1:8888', # 'https': f'127.0.0.1:8888' # } self.cookies_queue = Queue() def get_cookie(self): logger.debug('正在获取 cookie bm-sz...') # akm js file url akm_url = "https://booking.jetstar.com/MkuYlo/pcp/LD0/PPluEQ/1ik7QcJffXbmL53i/QTcvXmg7/KS5kC3N/VRQcB" data = { 'sensor_data': self.ctx.call('encrypt1') } response = self.session.post(akm_url, headers=self.headers, verify=False, data=data, # proxies=self.proxies ) # print(response.text) bmsz = response.cookies.get_dict()['bm_sz'] data2 = { "sensor_data": self.ctx.call('encrypt2', bmsz) } data2 = json.dumps(data2) response2 = self.session.post(akm_url, headers=self.headers, data=data2, verify=False, # proxies=self.proxies ) # print(response2.text) # print(response2.status_code) logger.debug(f'成功获取到 bm-sz :{bmsz}') @retrying.retry(stop_max_attempt_number=2) def send_get(self, url, params): response = self.session.get( url, headers=self.headers, params=params, timeout=15, verify=False, ) response.raise_for_status() return response @retrying.retry(stop_max_attempt_number=3) def get_data(self, datetime_str): params = { "s": "true", "adults": "1", # 成年人 "children": "0", # 儿童 "infants": "0", # 婴儿 "selectedclass1": "economy", # 选择类型:经济舱 "currency": "CNY", # 货币 "mon": "true", "channel": "DESKTOP", "origin1": "PVG", # 出发地 "destination1": "NRT", # 目的地 "departuredate1": datetime_str # 出发时间 } logger.info(f'正在采集 {datetime_str} 航班数据...') try: response = self.send_get(self.search_flights_api, params) if not response: return return datetime_str, response # print(response.text) except Exception as e: logger.error(e) self.session = requests.Session() # self.ip += 1 self.get_cookie() raise # return datetime_str, None def parse_data(self, datetime_str, response): if not response: return html = etree.HTML(response.text) data = html.xpath("//script[@id='bundle-data-v2']/text()") if data: json_data = json.loads(data[0]) print(datetime_str, ' => ', json_data) else: logger.warning(f'{datetime_str} 当天暂无数据 / 触发验证码') print(response.text) @staticmethod def gen_datetime(start_date, end_date): current_date = datetime.strptime(start_date, '%Y-%m-%d') end_date = datetime.strptime(end_date, '%Y-%m-%d') date_list = [] while current_date <= end_date: date_list.append(current_date.strftime('%Y-%m-%d')) # 转换为字符串格式存储 current_date += timedelta(days=1) return date_list def run(self, start_date, end_date): self.get_cookie() # 获取采集时间 for num, datetime_str in enumerate(self.gen_datetime(start_date, end_date)): # if num % 4 == 0: # self.session = requests.Session() # self.get_cookie() datetime_str, response = self.get_data(datetime_str) self.parse_data(datetime_str, response) # time.sleep(2) if __name__ == '__main__': gk = GK() gk.run(start_date='2025-05-15', end_date='2025-05-27')