瀏覽代碼

再次调整判定细节

node04 3 周之前
父節點
當前提交
8eb2b0f833
共有 2 個文件被更改,包括 8 次插入8 次删除
  1. 7 7
      data_preprocess.py
  2. 1 1
      descending_cabin_task.py

+ 7 - 7
data_preprocess.py

@@ -1268,9 +1268,9 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
                         # df_match_chk = df_match_chk.loc[dur_vals.notna()].copy()
                         # df_match_chk = df_match_chk.loc[(dur_vals.loc[dur_vals.notna()] - float(dur_base)).abs() <= 36].copy()
 
-                        # drop_hud_vals = pd.to_numeric(df_match_chk['drop_hours_until_departure'], errors='coerce')
-                        # df_match_chk = df_match_chk.loc[drop_hud_vals.notna()].copy()
-                        # df_match_chk = df_match_chk.loc[(drop_hud_vals.loc[drop_hud_vals.notna()] - float(hud_base)).abs() <= 24].copy()
+                        drop_hud_vals = pd.to_numeric(df_match_chk['drop_hours_until_departure'], errors='coerce')
+                        df_match_chk = df_match_chk.loc[drop_hud_vals.notna()].copy()
+                        df_match_chk = df_match_chk.loc[(float(hud_base) - drop_hud_vals.loc[drop_hud_vals.notna()]) >= -24].copy()
 
                         # seats_vals = pd.to_numeric(df_match_chk['high_price_seats_remaining_change_amount'], errors='coerce')
                         # df_match_chk = df_match_chk.loc[seats_vals.notna()].copy()
@@ -1371,9 +1371,9 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
                         # df_match_chk_1 = df_match_chk_1.loc[dur_vals_1.notna()].copy()
                         # df_match_chk_1 = df_match_chk_1.loc[(dur_vals_1.loc[dur_vals_1.notna()] - float(dur_base_1)).abs() <= 24].copy()
 
-                        # rise_hud_vals_1 = pd.to_numeric(df_match_chk_1['rise_hours_until_departure'], errors='coerce')
-                        # df_match_chk_1 = df_match_chk_1.loc[rise_hud_vals_1.notna()].copy()
-                        # df_match_chk_1 = df_match_chk_1.loc[(rise_hud_vals_1.loc[rise_hud_vals_1.notna()] - float(hud_base_1)).abs() <= 24].copy()
+                        rise_hud_vals_1 = pd.to_numeric(df_match_chk_1['rise_hours_until_departure'], errors='coerce')
+                        df_match_chk_1 = df_match_chk_1.loc[rise_hud_vals_1.notna()].copy()
+                        df_match_chk_1 = df_match_chk_1.loc[(float(hud_base_1) - rise_hud_vals_1.loc[rise_hud_vals_1.notna()]) >= -24].copy()
 
                         # seats_vals_1 = pd.to_numeric(df_match_chk_1['rise_seats_remaining_change_amount'], errors='coerce')
                         # df_match_chk_1 = df_match_chk_1.loc[seats_vals_1.notna()].copy()
@@ -1404,7 +1404,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
                             else:
                                 drop_prob = round(length_drop / (length_rise + length_drop), 2)
                                 # 依旧保持之前的降价判定,概率修改
-                                if drop_prob > 0.6:
+                                if drop_prob >= 0.7:
                                     df_min_hours.loc[idx, 'simple_will_price_drop'] = 1
                                     # df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = 'd1'
                                     df_min_hours.loc[idx, 'flag_dist'] = 'd1'

+ 1 - 1
descending_cabin_task.py

@@ -517,7 +517,7 @@ def _process_one_task(row, runner):
         return None
     
     drop_price_sample_size = int(task.get("drop_price_sample_size", "0"))
-    if drop_price_sample_size < 3:  # 丢弃历史降价样本数过少(小于3)的
+    if drop_price_sample_size < 2:  # 丢弃历史降价样本数过少(小于2)的
         return None
     
     flight_day = task.get("flight_day")