请求-requests版.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. from datetime import datetime, timedelta
  2. import requests
  3. from requests.exceptions import Timeout
  4. import retrying
  5. import execjs
  6. from lxml import etree
  7. import json
  8. from loguru import logger
  9. import threading
  10. from queue import Queue
  11. #
  12. # 禁用SSL相关警告 (推荐)
  13. from requests.packages.urllib3.exceptions import InsecureRequestWarning
  14. import warnings
  15. requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
  16. warnings.filterwarnings("ignore", category=DeprecationWarning) # 可选:过滤其他警告
  17. class GK:
  18. def __init__(self):
  19. self.search_flights_api = "https://booking.jetstar.com/hk/zh/booking/search-flights"
  20. self.headers = {
  21. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
  22. 'Accept-Encoding': 'gzip, deflate, br',
  23. 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  24. 'Connection': 'keep-alive',
  25. 'Content-Type': 'application/x-www-form-urlencoded',
  26. 'accept-language': 'zh-CN,zh;q=0.9',
  27. 'cache-control': 'no-cache',
  28. 'pragma': 'no-cache',
  29. 'priority': 'u=0, i',
  30. 'referer': 'https://booking.jetstar.com/',
  31. 'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
  32. 'sec-ch-ua-mobile': '?0',
  33. 'sec-ch-ua-platform': '"Windows"',
  34. 'sec-fetch-dest': 'document',
  35. 'sec-fetch-mode': 'navigate',
  36. 'sec-fetch-site': 'same-origin',
  37. 'sec-fetch-user': '?1',
  38. 'upgrade-insecure-requests': '1'
  39. }
  40. with open('./akm逆向/逆向.js', encoding='utf-8') as f:
  41. js = f.read()
  42. self.ctx = execjs.compile(js)
  43. self.session = requests.Session()
  44. # self.ip = 100000000000
  45. # self.proxies = {
  46. # 'http': f'http://B_3351_HK___5_ss-{self.ip}:ev2pjj@proxy.renlaer.com:7778',
  47. # 'https': f'http://B_3351_HK___5_ss-{self.ip}:ev2pjj@proxy.renlaer.com:7778'
  48. # }
  49. # self.proxies = {
  50. # 'http': f'127.0.0.1:8888',
  51. # 'https': f'127.0.0.1:8888'
  52. # }
  53. self.cookies_queue = Queue()
  54. def get_cookie(self):
  55. logger.debug('正在获取 cookie bm-sz...')
  56. # akm js file url
  57. akm_url = "https://booking.jetstar.com/MkuYlo/pcp/LD0/PPluEQ/1ik7QcJffXbmL53i/QTcvXmg7/KS5kC3N/VRQcB"
  58. data = {
  59. 'sensor_data': self.ctx.call('encrypt1')
  60. }
  61. response = self.session.post(akm_url, headers=self.headers, verify=False, data=data,
  62. # proxies=self.proxies
  63. )
  64. # print(response.text)
  65. bmsz = response.cookies.get_dict()['bm_sz']
  66. data2 = {
  67. "sensor_data": self.ctx.call('encrypt2', bmsz)
  68. }
  69. data2 = json.dumps(data2)
  70. response2 = self.session.post(akm_url, headers=self.headers, data=data2, verify=False,
  71. # proxies=self.proxies
  72. )
  73. # print(response2.text)
  74. # print(response2.status_code)
  75. logger.debug(f'成功获取到 bm-sz :{bmsz}')
  76. @retrying.retry(stop_max_attempt_number=2)
  77. def send_get(self, url, params):
  78. response = self.session.get(
  79. url,
  80. headers=self.headers, params=params,
  81. timeout=15,
  82. verify=False,
  83. )
  84. response.raise_for_status()
  85. return response
  86. @retrying.retry(stop_max_attempt_number=3)
  87. def get_data(self, datetime_str):
  88. params = {
  89. "s": "true",
  90. "adults": "1", # 成年人
  91. "children": "0", # 儿童
  92. "infants": "0", # 婴儿
  93. "selectedclass1": "economy", # 选择类型:经济舱
  94. "currency": "CNY", # 货币
  95. "mon": "true",
  96. "channel": "DESKTOP",
  97. "origin1": "PVG", # 出发地
  98. "destination1": "NRT", # 目的地
  99. "departuredate1": datetime_str # 出发时间
  100. }
  101. logger.info(f'正在采集 {datetime_str} 航班数据...')
  102. try:
  103. response = self.send_get(self.search_flights_api, params)
  104. if not response:
  105. return
  106. return datetime_str, response
  107. # print(response.text)
  108. except Exception as e:
  109. logger.error(e)
  110. self.session = requests.Session()
  111. # self.ip += 1
  112. self.get_cookie()
  113. raise
  114. # return datetime_str, None
  115. def parse_data(self, datetime_str, response):
  116. if not response:
  117. return
  118. html = etree.HTML(response.text)
  119. data = html.xpath("//script[@id='bundle-data-v2']/text()")
  120. if data:
  121. json_data = json.loads(data[0])
  122. print(datetime_str, ' => ', json_data)
  123. else:
  124. logger.warning(f'{datetime_str} 当天暂无数据 / 触发验证码')
  125. print(response.text)
  126. @staticmethod
  127. def gen_datetime(start_date, end_date):
  128. current_date = datetime.strptime(start_date, '%Y-%m-%d')
  129. end_date = datetime.strptime(end_date, '%Y-%m-%d')
  130. date_list = []
  131. while current_date <= end_date:
  132. date_list.append(current_date.strftime('%Y-%m-%d')) # 转换为字符串格式存储
  133. current_date += timedelta(days=1)
  134. return date_list
  135. def run(self, start_date, end_date):
  136. self.get_cookie()
  137. # 获取采集时间
  138. for num, datetime_str in enumerate(self.gen_datetime(start_date, end_date)):
  139. # if num % 4 == 0:
  140. # self.session = requests.Session()
  141. # self.get_cookie()
  142. datetime_str, response = self.get_data(datetime_str)
  143. self.parse_data(datetime_str, response)
  144. # time.sleep(2)
  145. if __name__ == '__main__':
  146. gk = GK()
  147. gk.run(start_date='2025-05-15', end_date='2025-05-27')