Bladeren bron

将预测边界改回到起飞前12小时

node04 7 uur geleden
bovenliggende
commit
2dc8d7020e
4 gewijzigde bestanden met toevoegingen van 18 en 18 verwijderingen
  1. 8 8
      data_preprocess.py
  2. 2 2
      evaluate_validate_pnl.py
  3. 2 2
      main_pe_0.py
  4. 6 6
      result_validate_0.py

+ 8 - 8
data_preprocess.py

@@ -902,7 +902,7 @@ def preprocess_data_simple(df_input, is_train=False):
 
     # 训练过程
     if is_train:
-        df_target = df_input[(df_input['hours_until_departure'] >= 4) & (df_input['hours_until_departure'] <= 60)].copy()
+        df_target = df_input[(df_input['hours_until_departure'] >= 12) & (df_input['hours_until_departure'] <= 60)].copy()
         df_target = df_target.sort_values(
             by=['gid', 'hours_until_departure'],
             ascending=[True, False]
@@ -1016,7 +1016,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
     ).reset_index(drop=True)
 
     df_sorted = df_sorted[
-        df_sorted['hours_until_departure'].between(4, 60)
+        df_sorted['hours_until_departure'].between(12, 60)
     ].reset_index(drop=True)
 
     # 每个 gid 取 hours_until_departure 最小的一条
@@ -1025,9 +1025,9 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
         .reset_index(drop=True)
     )
 
-    # 确保 hours_until_departure 在 [4, 60] 的 范围内
+    # 确保 hours_until_departure 在 [12, 60] 的 范围内
     # df_min_hours = df_min_hours[
-    #     df_min_hours['hours_until_departure'].between(4, 60)
+    #     df_min_hours['hours_until_departure'].between(12, 60)
     # ].reset_index(drop=True)
 
     drop_info_csv_path = os.path.join(output_dir, f'{group_route_str}_drop_info.csv')
@@ -1120,11 +1120,11 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
                         df_match_chk = df_match.copy()
                         dur_vals = pd.to_numeric(df_match_chk['high_price_duration_hours'], errors='coerce')
                         df_match_chk = df_match_chk.loc[dur_vals.notna()].copy()
-                        df_match_chk = df_match_chk.loc[(dur_vals.loc[dur_vals.notna()] - float(dur_base)).abs() <= 20].copy()
+                        df_match_chk = df_match_chk.loc[(dur_vals.loc[dur_vals.notna()] - float(dur_base)).abs() <= 24].copy()
 
                         drop_hud_vals = pd.to_numeric(df_match_chk['drop_hours_until_departure'], errors='coerce')
                         df_match_chk = df_match_chk.loc[drop_hud_vals.notna()].copy()
-                        df_match_chk = df_match_chk.loc[(drop_hud_vals.loc[drop_hud_vals.notna()] - float(hud_base)).abs() <= 20].copy()
+                        df_match_chk = df_match_chk.loc[(drop_hud_vals.loc[drop_hud_vals.notna()] - float(hud_base)).abs() <= 12].copy()
 
                         # seats_vals = pd.to_numeric(df_match_chk['high_price_seats_remaining_change_amount'], errors='coerce')
                         # df_match_chk = df_match_chk.loc[seats_vals.notna()].copy()
@@ -1306,7 +1306,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
                         df_match_chk_1 = df_match_1.copy()
                         dur_vals_1 = pd.to_numeric(df_match_chk_1['modify_keep_price_duration_hours'], errors='coerce')
                         df_match_chk_1 = df_match_chk_1.loc[dur_vals_1.notna()].copy()
-                        df_match_chk_1 = df_match_chk_1.loc[(dur_vals_1.loc[dur_vals_1.notna()] - float(dur_base_1)).abs() <= 20].copy()
+                        df_match_chk_1 = df_match_chk_1.loc[(dur_vals_1.loc[dur_vals_1.notna()] - float(dur_base_1)).abs() <= 24].copy()
 
                         # drop_hud_vals_1 = pd.to_numeric(df_match_chk_1['keep_hours_until_departure'], errors='coerce')
                         # df_match_chk_1 = df_match_chk_1.loc[drop_hud_vals_1.notna()].copy()
@@ -1398,7 +1398,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
     df_min_hours["update_hour"] = _pred_dt.strftime("%Y-%m-%d %H:%M:%S")
     _dep_hour = pd.to_datetime(df_min_hours["from_time"], errors="coerce").dt.floor("h")
     df_min_hours["valid_begin_hour"] = (_dep_hour - pd.to_timedelta(60, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
-    df_min_hours["valid_end_hour"] = (_dep_hour - pd.to_timedelta(4, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
+    df_min_hours["valid_end_hour"] = (_dep_hour - pd.to_timedelta(12, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
 
     order_cols = ['city_pair', 'flight_day', 'flight_number_1', 'flight_number_2', 'from_time', 
                   'baggage', 'seats_remaining', 'currency',

+ 2 - 2
evaluate_validate_pnl.py

@@ -116,8 +116,8 @@ if __name__ == "__main__":
     # if len(sys.argv) == 1:
     #     sys.argv = [
     #         sys.argv[0],
-    #         # "/home/node04/yuzhou/jiangcang_vj/validate/node0205_zong/result_validate_node0205_zong_20260210134848.csv",  # 替换为实际路径
-    #         "/home/node04/yuzhou/jiangcang_vj/validate/node0209_zong/result_validate_node0209_zong_20260210140926.csv",  # 替换为实际路径
+    #         # "/home/node04/yuzhou/jiangcang_vj/validate/node0205_zong/result_validate_node0205_zong_20260211100622.csv",  # 替换为实际路径
+    #         "/home/node04/yuzhou/jiangcang_vj/validate/node0210_zong/result_validate_node0210_zong_20260211101300.csv",  # 替换为实际路径
     #         # "--output", "debug_output.csv"
     #     ]
 

+ 2 - 2
main_pe_0.py

@@ -38,8 +38,8 @@ def start_predict():
         except Exception as e:
             print(f"remove {csv_path} info: {str(e)}")
 
-    # 预测时间范围,满足起飞时间 在4小时后到60小时后
-    pred_hour_begin = hourly_time + timedelta(hours=4)
+    # 预测时间范围,满足起飞时间 在12小时后到60小时后
+    pred_hour_begin = hourly_time + timedelta(hours=12)
     pred_hour_end = hourly_time + timedelta(hours=60)
 
     pred_date_end = pred_hour_end.strftime("%Y-%m-%d")

+ 6 - 6
result_validate_0.py

@@ -545,13 +545,13 @@ def validate_process_zong(node, enable_min_max_batch_flag=False, min_batch_time_
     )
 
     df_predict_will_drop_filter_1 = df_predict_will_drop_filter[
-            (df_predict_will_drop_filter["valid_end_dt"] + pd.Timedelta(hours=0))
+            (df_predict_will_drop_filter["valid_end_dt"] + pd.Timedelta(hours=8))
         <= hourly_time
     ].copy()
     df_predict_will_drop_filter_1.drop(columns=["valid_end_dt"], inplace=True)
     after_rows = len(df_predict_will_drop_filter_1)
     print(
-        f"valid_end_hour(+0h)过滤完成: {before_rows} -> {after_rows} (hourly_time={hourly_time_str})"
+        f"valid_end_hour(+8h)过滤完成: {before_rows} -> {after_rows} (hourly_time={hourly_time_str})"
     )
     
     # 开始验证
@@ -585,10 +585,10 @@ if __name__ == "__main__":
         # validate_process_zong(node, True, None, "202602051400")   # 有条件汇总
         # node = "node0203"
         # validate_process_zong(node, True, "202602041100", "202602051400")  # 有条件汇总
-        node = "node0205"
-        validate_process_zong(node, True, "202602061000", "202602091000")  # 有条件汇总
-        # node = "node0210"
-        # validate_process_zong(node, True, "202602101500", None)  # 有条件汇总
+        # node = "node0205"
+        # validate_process_zong(node, True, "202602061000", "202602091000")  # 有条件汇总
+        node = "node0211"
+        validate_process_zong(node, True, "202602111100", None)  # 有条件汇总
     # 1 自动验证
     else:
         node = "node0127"