3 недель назад · 808a682634
--- a/data_preprocess.py
+++ b/data_preprocess.py
@@ -926,7 +926,7 @@ def preprocess_data_simple(df_input, is_train=False):
 
															     # 训练过程
														
 
															     if is_train:
														
 
															-        df_target = df_input[(df_input['hours_until_departure'] >= 8) & (df_input['hours_until_departure'] <= 240)].copy()   # 扩展至240小时（10天） 
														
 
															+        df_target = df_input[(df_input['hours_until_departure'] >= 72) & (df_input['hours_until_departure'] <= 240)].copy()   # 扩展至240小时（10天） 
														
 
															         df_target = df_target.sort_values(
														
 
															             by=['gid', 'hours_until_departure'],
														
 
															             ascending=[True, False]
														
@@ -1073,7 +1073,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
 
															     ).reset_index(drop=True)
														
 
															     df_sorted = df_sorted[
														
 
															-        df_sorted['hours_until_departure'].between(8, 240)
														
 
															+        df_sorted['hours_until_departure'].between(72, 240)
														
 
															     ].reset_index(drop=True)
														
 
															     # 每个 gid 取 hours_until_departure 最小的一条
														
@@ -1082,9 +1082,9 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
 
															         .reset_index(drop=True)
														
 
															     )
														
 
															-    # 确保 hours_until_departure 在 [8, 240] 的 范围内
														
 
															+    # 确保 hours_until_departure 在 [72, 240] 的 范围内
														
 
															     # df_min_hours = df_min_hours[
														
 
															-    #     df_min_hours['hours_until_departure'].between(8, 240)
														
 
															+    #     df_min_hours['hours_until_departure'].between(72, 240)
														
 
															     # ].reset_index(drop=True)
														
 
															     drop_info_csv_path = os.path.join(output_dir, f'{group_route_str}_drop_info.csv')
														
@@ -1193,14 +1193,14 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
 
															     # ==================== 综合评分：包络高位 × 降价潜力 ====================
														
 
															     # target_score = 包络位置（越高越好）× 降价潜力（越高越好）
														
 
															-    thres_ep = 0.7
														
 
															-    thres_dp = 0.3
														
 
															+    thres_ep = 0.6
														
 
															+    thres_dp = 0.4
														
 
															     df_min_hours['target_score'] = (
														
 
															         df_min_hours['envelope_position'] * thres_ep + df_min_hours['drop_potential'] * thres_dp
														
 
															     ).round(4)
														
 
															     # 综合评分阈值：大于阈值的都认为值得投放
														
 
															-    target_score_threshold = 0.7
														
 
															+    target_score_threshold = 0.75
														
 
															     # df_min_hours['target_score_threshold'] = target_score_threshold
														
 
															     df_min_hours['is_good_target'] = (df_min_hours['target_score'] >= target_score_threshold).astype(int)
														
@@ -1489,7 +1489,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
 
															     df_min_hours["update_hour"] = _pred_dt.strftime("%Y-%m-%d %H:%M:%S")
														
 
															     _dep_hour = pd.to_datetime(df_min_hours["from_time"], errors="coerce").dt.floor("h")
														
 
															     df_min_hours["valid_begin_hour"] = (_dep_hour - pd.to_timedelta(240, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
														
 
															-    df_min_hours["valid_end_hour"] = (_dep_hour - pd.to_timedelta(8, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
														
 
															+    df_min_hours["valid_end_hour"] = (_dep_hour - pd.to_timedelta(72, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
														
 
															     # 要展示在预测表里的字段
														
 
															     order_cols = ['city_pair', 'flight_day', 'flight_number_1', 'flight_number_2', 'from_time', 
														
@@ -1523,12 +1523,12 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
 
															         na_position='last',
														
 
															     ).reset_index(drop=True)
														
 
															-    # 时间段过滤 过滤掉异常时间（update_hour 早于 crawl_date, 以及超过8小时不更新的数据）
														
 
															+    # 时间段过滤 过滤掉异常时间（update_hour 早于 crawl_date, 以及超过12小时不更新的数据）
														
 
															     update_dt = pd.to_datetime(df_predict["update_hour"], errors="coerce")
														
 
															     crawl_dt = pd.to_datetime(df_predict["crawl_date"], errors="coerce")
														
 
															     dt_diff = update_dt - crawl_dt
														
 
															     df_predict = df_predict.loc[
														
 
															-        (dt_diff >= pd.Timedelta(0)) & (dt_diff <= pd.Timedelta(hours=8))
														
 
															+        (dt_diff >= pd.Timedelta(0)) & (dt_diff <= pd.Timedelta(hours=12))
														
 
															         # (dt_diff >= pd.Timedelta(0))
														
 
															     ].reset_index(drop=True)
														
 
															     print("更新时间过滤完成")
														
--- a/descending_cabin_task.py
+++ b/descending_cabin_task.py
@@ -533,8 +533,10 @@ def _process_one_task(row, runner):
 
															         return None
														
 
															     result = results[0]
														
 
															-    # adult_price = result.get("adult_price")
														
 
															-    # adult_tax = result.get("adult_tax")
														
 
															+    adult_price = result.get("adult_price")
														
 
															+    adult_tax = result.get("adult_tax")
														
 
															+    cover_price = round(adult_price * runner.rate)
														
 
															+    cover_tax = round(adult_tax * runner.rate)
														
 
															     # adult_total_price = result.get("adult_total_price")
														
 
															     segments = result.get("segments") or []
														
 
															     if not segments:
														
@@ -572,8 +574,8 @@ def _process_one_task(row, runner):
 
															     return {
														
 
															         "trip_type": 1,
														
 
															-        # "cover_price": adult_price,
														
 
															-        # "cover_tax": adult_tax,
														
 
															+        "cover_price": cover_price,
														
 
															+        "cover_tax": cover_tax,
														
 
															         "bag_amount": pc,
														
 
															         "bag_weight": kg,
														
 
															         "max_threshold": max_threshold,
														
--- a/main_pe_0.py
+++ b/main_pe_0.py
@@ -38,8 +38,8 @@ def start_predict():
 
															         except Exception as e:
														
 
															             print(f"remove {csv_path} info: {str(e)}")
														
 
															-    # 预测时间范围，满足起飞时间 在8小时后到240小时后
														
 
															-    pred_hour_begin = hourly_time + timedelta(hours=8)
														
 
															+    # 预测时间范围，满足起飞时间 在72小时后到240小时后
														
 
															+    pred_hour_begin = hourly_time + timedelta(hours=72)
														
 
															     pred_hour_end = hourly_time + timedelta(hours=240)
														
 
															     pred_date_end = pred_hour_end.strftime("%Y-%m-%d")
														
--- a/main_tr_0.py
+++ b/main_tr_0.py
@@ -50,7 +50,7 @@ def start_train():
 
															     # date_end = datetime.today().strftime("%Y-%m-%d")
														
 
															     date_end = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
														
 
															     # date_begin = (datetime.today() - timedelta(days=32)).strftime("%Y-%m-%d")
														
 
															-    date_begin = "2026-04-09"   # 2026-01-01 2026-03-18 2026-04-05 2026-04-07 2026-04-15
														
 
															+    date_begin = "2026-03-01"   # 2026-01-01  2026-04-17
														
 
															     print(f"训练时间范围: {date_begin} 到 {date_end}")