请求-tls_client.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. import time
  2. from datetime import datetime, timedelta
  3. from tls_client import Session
  4. from requests.exceptions import Timeout
  5. import retrying
  6. import execjs
  7. from lxml import etree
  8. import json
  9. from loguru import logger
  10. import threading
  11. from queue import Queue
  12. import requests
  13. class GK:
  14. def __init__(self):
  15. self.search_flights_api = "https://booking.jetstar.com/hk/zh/booking/search-flights"
  16. with open('./akm逆向/逆向.js', encoding='utf-8') as f:
  17. js = f.read()
  18. self.ctx = execjs.compile(js)
  19. ja3 = "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,5-10-11-17613-45-43-0-23-18-65037-27-35-13-51-65281-16-41,4588-29-23-24,0"
  20. # 基础配置(指定浏览器指纹)
  21. self.session = Session(
  22. client_identifier="Chrome_120", # 预设浏览器指纹
  23. random_tls_extension_order=True, # 随机tls指纹
  24. )
  25. self.session.http2 = True
  26. # self.session = requests.Session()
  27. self.headers = {
  28. "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
  29. "accept-language": "zh-CN,zh;q=0.9",
  30. "cache-control": "no-cache",
  31. "pragma": "no-cache",
  32. "priority": "u=0, i",
  33. "referer": "https://booking.jetstar.com/",
  34. "sec-ch-ua": "\"Google Chrome\";v=\"135\", \"Not-A.Brand\";v=\"8\", \"Chromium\";v=\"135\"",
  35. "sec-ch-ua-mobile": "?0",
  36. "sec-ch-ua-platform": "\"Windows\"",
  37. "sec-fetch-dest": "document",
  38. "sec-fetch-mode": "navigate",
  39. "sec-fetch-site": "same-origin",
  40. "sec-fetch-user": "?1",
  41. "upgrade-insecure-requests": "1",
  42. "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
  43. }
  44. def get_cookie(self):
  45. logger.debug('正在获取 cookie bm-sz...')
  46. # akm js file url
  47. akm_url = "https://booking.jetstar.com/MkuYlo/pcp/LD0/PPluEQ/1ik7QcJffXbmL53i/QTcvXmg7/KS5kC3N/VRQcB"
  48. data = {
  49. 'sensor_data': self.ctx.call('encrypt1')
  50. }
  51. response = self.session.post(akm_url, headers=self.headers, data=data,
  52. # proxies=self.proxies
  53. )
  54. # print(response.http_version) # 应该显示 "HTTP/2"
  55. # print(response.text)
  56. bmsz = response.cookies.get_dict()['bm_sz']
  57. data2 = {
  58. "sensor_data": self.ctx.call('encrypt2', bmsz)
  59. }
  60. data2 = json.dumps(data2)
  61. response2 = self.session.post(akm_url, headers=self.headers, data=data2,
  62. # proxies=self.proxies
  63. )
  64. # print(response2.text)
  65. # print(response2.status_code)
  66. logger.debug(f'成功获取到 bm-sz :{bmsz}')
  67. @retrying.retry(stop_max_attempt_number=2)
  68. def send_get(self, url, params):
  69. response = self.session.get(
  70. url,
  71. headers=self.headers,
  72. params=params,
  73. )
  74. if response.status_code == 302:
  75. url = 'https://booking.jetstar.com/hk/zh/booking/select-flights'
  76. response = self.session.get(url, headers=self.headers)
  77. print(response.status_code)
  78. print(response.text)
  79. return response
  80. @retrying.retry(stop_max_attempt_number=3)
  81. def get_data(self, datetime_str):
  82. params = {
  83. "s": "true",
  84. "adults": "1", # 成年人
  85. "children": "0", # 儿童
  86. "infants": "0", # 婴儿
  87. "selectedclass1": "economy", # 选择类型:经济舱
  88. "currency": "CNY", # 货币
  89. "mon": "true",
  90. "channel": "DESKTOP",
  91. "origin1": "PVG", # 出发地
  92. "destination1": "NRT", # 目的地
  93. "departuredate1": datetime_str # 出发时间
  94. }
  95. logger.info(f'正在采集 {datetime_str} 航班数据...')
  96. try:
  97. response = self.send_get(self.search_flights_api, params)
  98. if not response:
  99. return
  100. return datetime_str, response
  101. # print(response.text)
  102. except Exception as e:
  103. logger.error(e)
  104. # self.ip += 1
  105. self.get_cookie()
  106. raise
  107. # return datetime_str, None
  108. def parse_data(self, datetime_str, response):
  109. if not response:
  110. return
  111. html = etree.HTML(response.text)
  112. data = html.xpath("//script[@id='bundle-data-v2']/text()")
  113. if data:
  114. json_data = json.loads(data[0])
  115. print(datetime_str, ' => ', json_data)
  116. else:
  117. logger.warning(f'{datetime_str} 当天暂无数据 / 触发验证码')
  118. print(response.text)
  119. @staticmethod
  120. def gen_datetime(start_date, end_date):
  121. current_date = datetime.strptime(start_date, '%Y-%m-%d')
  122. end_date = datetime.strptime(end_date, '%Y-%m-%d')
  123. date_list = []
  124. while current_date <= end_date:
  125. date_list.append(current_date.strftime('%Y-%m-%d')) # 转换为字符串格式存储
  126. current_date += timedelta(days=1)
  127. return date_list
  128. def run(self, start_date, end_date):
  129. self.get_cookie()
  130. # 获取采集时间
  131. for num, datetime_str in enumerate(self.gen_datetime(start_date, end_date)):
  132. # if num % 4 == 0:
  133. # self.session = requests.Session()
  134. # self.get_cookie()
  135. datetime_str, response = self.get_data(datetime_str)
  136. self.parse_data(datetime_str, response)
  137. time.sleep(2)
  138. if __name__ == '__main__':
  139. gk = GK()
  140. gk.run(start_date='2025-05-15', end_date='2025-05-27')