Jelajahi Sumber

调整边界起飞时间(72小时), 发送政策接口增加两个字段

node04 3 minggu lalu
induk
melakukan
808a682634
4 mengubah file dengan 19 tambahan dan 17 penghapusan
  1. 10 10
      data_preprocess.py
  2. 6 4
      descending_cabin_task.py
  3. 2 2
      main_pe_0.py
  4. 1 1
      main_tr_0.py

+ 10 - 10
data_preprocess.py

@@ -926,7 +926,7 @@ def preprocess_data_simple(df_input, is_train=False):
 
     # 训练过程
     if is_train:
-        df_target = df_input[(df_input['hours_until_departure'] >= 8) & (df_input['hours_until_departure'] <= 240)].copy()   # 扩展至240小时(10天) 
+        df_target = df_input[(df_input['hours_until_departure'] >= 72) & (df_input['hours_until_departure'] <= 240)].copy()   # 扩展至240小时(10天) 
         df_target = df_target.sort_values(
             by=['gid', 'hours_until_departure'],
             ascending=[True, False]
@@ -1073,7 +1073,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
     ).reset_index(drop=True)
 
     df_sorted = df_sorted[
-        df_sorted['hours_until_departure'].between(8, 240)
+        df_sorted['hours_until_departure'].between(72, 240)
     ].reset_index(drop=True)
 
     # 每个 gid 取 hours_until_departure 最小的一条
@@ -1082,9 +1082,9 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
         .reset_index(drop=True)
     )
 
-    # 确保 hours_until_departure 在 [8, 240] 的 范围内
+    # 确保 hours_until_departure 在 [72, 240] 的 范围内
     # df_min_hours = df_min_hours[
-    #     df_min_hours['hours_until_departure'].between(8, 240)
+    #     df_min_hours['hours_until_departure'].between(72, 240)
     # ].reset_index(drop=True)
 
     drop_info_csv_path = os.path.join(output_dir, f'{group_route_str}_drop_info.csv')
@@ -1193,14 +1193,14 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
         
     # ==================== 综合评分:包络高位 × 降价潜力 ====================
     # target_score = 包络位置(越高越好)× 降价潜力(越高越好)
-    thres_ep = 0.7
-    thres_dp = 0.3
+    thres_ep = 0.6
+    thres_dp = 0.4
     df_min_hours['target_score'] = (
         df_min_hours['envelope_position'] * thres_ep + df_min_hours['drop_potential'] * thres_dp
     ).round(4)
 
     # 综合评分阈值:大于阈值的都认为值得投放
-    target_score_threshold = 0.7
+    target_score_threshold = 0.75
     # df_min_hours['target_score_threshold'] = target_score_threshold
     df_min_hours['is_good_target'] = (df_min_hours['target_score'] >= target_score_threshold).astype(int)
 
@@ -1489,7 +1489,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
     df_min_hours["update_hour"] = _pred_dt.strftime("%Y-%m-%d %H:%M:%S")
     _dep_hour = pd.to_datetime(df_min_hours["from_time"], errors="coerce").dt.floor("h")
     df_min_hours["valid_begin_hour"] = (_dep_hour - pd.to_timedelta(240, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
-    df_min_hours["valid_end_hour"] = (_dep_hour - pd.to_timedelta(8, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
+    df_min_hours["valid_end_hour"] = (_dep_hour - pd.to_timedelta(72, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
 
     # 要展示在预测表里的字段
     order_cols = ['city_pair', 'flight_day', 'flight_number_1', 'flight_number_2', 'from_time', 
@@ -1523,12 +1523,12 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
         na_position='last',
     ).reset_index(drop=True)
 
-    # 时间段过滤 过滤掉异常时间(update_hour 早于 crawl_date, 以及超过8小时不更新的数据)
+    # 时间段过滤 过滤掉异常时间(update_hour 早于 crawl_date, 以及超过12小时不更新的数据)
     update_dt = pd.to_datetime(df_predict["update_hour"], errors="coerce")
     crawl_dt = pd.to_datetime(df_predict["crawl_date"], errors="coerce")
     dt_diff = update_dt - crawl_dt
     df_predict = df_predict.loc[
-        (dt_diff >= pd.Timedelta(0)) & (dt_diff <= pd.Timedelta(hours=8))
+        (dt_diff >= pd.Timedelta(0)) & (dt_diff <= pd.Timedelta(hours=12))
         # (dt_diff >= pd.Timedelta(0))
     ].reset_index(drop=True)
     print("更新时间过滤完成")

+ 6 - 4
descending_cabin_task.py

@@ -533,8 +533,10 @@ def _process_one_task(row, runner):
         return None
 
     result = results[0]
-    # adult_price = result.get("adult_price")
-    # adult_tax = result.get("adult_tax")
+    adult_price = result.get("adult_price")
+    adult_tax = result.get("adult_tax")
+    cover_price = round(adult_price * runner.rate)
+    cover_tax = round(adult_tax * runner.rate)
     # adult_total_price = result.get("adult_total_price")
     segments = result.get("segments") or []
     if not segments:
@@ -572,8 +574,8 @@ def _process_one_task(row, runner):
 
     return {
         "trip_type": 1,
-        # "cover_price": adult_price,
-        # "cover_tax": adult_tax,
+        "cover_price": cover_price,
+        "cover_tax": cover_tax,
         "bag_amount": pc,
         "bag_weight": kg,
         "max_threshold": max_threshold,

+ 2 - 2
main_pe_0.py

@@ -38,8 +38,8 @@ def start_predict():
         except Exception as e:
             print(f"remove {csv_path} info: {str(e)}")
 
-    # 预测时间范围,满足起飞时间 在8小时后到240小时后
-    pred_hour_begin = hourly_time + timedelta(hours=8)
+    # 预测时间范围,满足起飞时间 在72小时后到240小时后
+    pred_hour_begin = hourly_time + timedelta(hours=72)
     pred_hour_end = hourly_time + timedelta(hours=240)
 
     pred_date_end = pred_hour_end.strftime("%Y-%m-%d")

+ 1 - 1
main_tr_0.py

@@ -50,7 +50,7 @@ def start_train():
     # date_end = datetime.today().strftime("%Y-%m-%d")
     date_end = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
     # date_begin = (datetime.today() - timedelta(days=32)).strftime("%Y-%m-%d")
-    date_begin = "2026-04-09"   # 2026-01-01 2026-03-18 2026-04-05 2026-04-07 2026-04-15
+    date_begin = "2026-03-01"   # 2026-01-01  2026-04-17
 
     print(f"训练时间范围: {date_begin} 到 {date_end}")