请求-httpx.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. import httpx
  2. import time
  3. from datetime import datetime, timedelta
  4. import retrying
  5. import execjs
  6. from lxml import etree
  7. import json
  8. from loguru import logger
  9. import threading
  10. from queue import Queue
  11. import requests
  12. class GK:
  13. def __init__(self):
  14. self.search_flights_api = "https://booking.jetstar.com/hk/zh/booking/search-flights"
  15. with open('./akm逆向/逆向.js', encoding='utf-8') as f:
  16. js = f.read()
  17. self.ctx = execjs.compile(js)
  18. ja3 = "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,5-10-11-17613-45-43-0-23-18-65037-27-35-13-51-65281-16-41,4588-29-23-24,0"
  19. self.client = httpx.Client(
  20. http2=True,
  21. verify=False
  22. )
  23. # self.session = requests.Session()
  24. self.headers = {
  25. "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
  26. "accept-language": "zh-CN,zh;q=0.9",
  27. "cache-control": "no-cache",
  28. "pragma": "no-cache",
  29. "priority": "u=0, i",
  30. "referer": "https://booking.jetstar.com/",
  31. "sec-ch-ua": "\"Google Chrome\";v=\"135\", \"Not-A.Brand\";v=\"8\", \"Chromium\";v=\"135\"",
  32. "sec-ch-ua-mobile": "?0",
  33. "sec-ch-ua-platform": "\"Windows\"",
  34. "sec-fetch-dest": "document",
  35. "sec-fetch-mode": "navigate",
  36. "sec-fetch-site": "same-origin",
  37. "sec-fetch-user": "?1",
  38. "upgrade-insecure-requests": "1",
  39. "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
  40. }
  41. def get_cookie(self):
  42. logger.debug('正在获取 cookie bm-sz...')
  43. # akm js file url
  44. akm_url = "https://booking.jetstar.com/MkuYlo/pcp/LD0/PPluEQ/1ik7QcJffXbmL53i/QTcvXmg7/KS5kC3N/VRQcB"
  45. data = {
  46. 'sensor_data': self.ctx.call('encrypt1')
  47. }
  48. response = self.client.post(akm_url, headers=self.headers, data=data,
  49. # proxies=self.proxies
  50. )
  51. # print(response.http_version) # 应该显示 "HTTP/2"
  52. # print(response.text)
  53. bmsz = response.cookies.get('bm_sz')
  54. data2 = {
  55. "sensor_data": self.ctx.call('encrypt2', bmsz)
  56. }
  57. data2 = json.dumps(data2)
  58. response2 = self.client.post(akm_url, headers=self.headers, data=data2,
  59. # proxies=self.proxies
  60. )
  61. # print(response2.text)
  62. # print(response2.status_code)
  63. logger.debug(f'成功获取到 bm-sz :{bmsz}')
  64. @retrying.retry(stop_max_attempt_number=2)
  65. def send_get(self, url, params):
  66. print(dict(self.client.cookies))
  67. response = self.client.get(
  68. url,
  69. headers=self.headers,
  70. params=params,
  71. )
  72. print(response.status_code)
  73. print(response.text)
  74. if response.status_code == 302:
  75. url = 'https://booking.jetstar.com/hk/zh/booking/select-flights'
  76. response = self.client.get(url, headers=self.headers)
  77. print(response.status_code)
  78. print(response.text)
  79. return response
  80. @retrying.retry(stop_max_attempt_number=3)
  81. def get_data(self, datetime_str):
  82. params = {
  83. "s": "true",
  84. "adults": "1", # 成年人
  85. "children": "0", # 儿童
  86. "infants": "0", # 婴儿
  87. "selectedclass1": "economy", # 选择类型:经济舱
  88. "currency": "CNY", # 货币
  89. "mon": "true",
  90. "channel": "DESKTOP",
  91. "origin1": "PVG", # 出发地
  92. "destination1": "NRT", # 目的地
  93. "departuredate1": datetime_str # 出发时间
  94. }
  95. logger.info(f'正在采集 {datetime_str} 航班数据...')
  96. try:
  97. response = self.send_get(self.search_flights_api, params)
  98. if not response:
  99. return
  100. return datetime_str, response
  101. # print(response.text)
  102. except Exception as e:
  103. logger.error(e)
  104. # self.ip += 1
  105. self.get_cookie()
  106. raise
  107. # return datetime_str, None
  108. def parse_data(self, datetime_str, response):
  109. if not response:
  110. return
  111. html = etree.HTML(response.text)
  112. data = html.xpath("//script[@id='bundle-data-v2']/text()")
  113. if data:
  114. json_data = json.loads(data[0])
  115. print(datetime_str, ' => ', json_data)
  116. else:
  117. logger.warning(f'{datetime_str} 当天暂无数据 / 触发验证码')
  118. print(response.text)
  119. @staticmethod
  120. def gen_datetime(start_date, end_date):
  121. current_date = datetime.strptime(start_date, '%Y-%m-%d')
  122. end_date = datetime.strptime(end_date, '%Y-%m-%d')
  123. date_list = []
  124. while current_date <= end_date:
  125. date_list.append(current_date.strftime('%Y-%m-%d')) # 转换为字符串格式存储
  126. current_date += timedelta(days=1)
  127. return date_list
  128. def run(self, start_date, end_date):
  129. self.get_cookie()
  130. # 获取采集时间
  131. for num, datetime_str in enumerate(self.gen_datetime(start_date, end_date)):
  132. # if num % 4 == 0:
  133. # self.session = requests.Session()
  134. # self.get_cookie()
  135. datetime_str, response = self.get_data(datetime_str)
  136. self.parse_data(datetime_str, response)
  137. time.sleep(2)
  138. if __name__ == '__main__':
  139. gk = GK()
  140. gk.run(start_date='2025-05-15', end_date='2025-05-27')