Explorar el Código

调整uo的预测判定和vj一致

node04 hace 2 semanas
padre
commit
155c6b6f38
Se han modificado 2 ficheros con 22 adiciones y 12 borrados
  1. 2 2
      data_loader.py
  2. 20 10
      data_process.py

+ 2 - 2
data_loader.py

@@ -655,8 +655,8 @@ if __name__ == "__main__":
     output_dir = f"./photo"
     os.makedirs(output_dir, exist_ok=True)
 
-    from_date_begin = "2026-03-17"
-    from_date_end = "2026-03-26"
+    from_date_begin = "2026-04-01"
+    from_date_end = "2026-04-20"
 
     uo_city_pairs = uo_city_pairs_new.copy()
 

+ 20 - 10
data_process.py

@@ -199,6 +199,8 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
         .reset_index(drop=True)
     )
 
+    df_min_hours = df_min_hours[(df_min_hours['ticket_amount'] >= 2)].reset_index(drop=True)
+
     # 读历史降价场景
     drop_info_csv_path = os.path.join(object_dir, f'{city_pair}_drop_info.csv')
     if os.path.exists(drop_info_csv_path):
@@ -341,15 +343,19 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
                 # 历史上出现的极近似的增长(下降)幅度后的降价场景
                 if not df_match.empty:
                     dur_base = pd.to_numeric(price_duration_hours, errors='coerce')
-                    # hud_base = pd.to_numeric(hours_until_departure, errors='coerce')
+                    hud_base = pd.to_numeric(hours_until_departure, errors='coerce')
                     dtd_base = pd.to_numeric(days_to_departure, errors='coerce')
 
-                    if pd.notna(dur_base) and pd.notna(dtd_base): 
+                    if pd.notna(dur_base) and pd.notna(dtd_base) and pd.notna(hud_base): 
                         df_match_chk = df_match.copy()
 
-                        drop_dtd_vals = pd.to_numeric(df_match_chk['drop_days_to_departure'], errors='coerce')
-                        df_match_chk = df_match_chk.loc[drop_dtd_vals.notna()].copy()
-                        df_match_chk = df_match_chk.loc[(drop_dtd_vals.loc[drop_dtd_vals.notna()] - float(dtd_base)).abs() <= 3].copy()
+                        # drop_dtd_vals = pd.to_numeric(df_match_chk['drop_days_to_departure'], errors='coerce')
+                        # df_match_chk = df_match_chk.loc[drop_dtd_vals.notna()].copy()
+                        # df_match_chk = df_match_chk.loc[(drop_dtd_vals.loc[drop_dtd_vals.notna()] - float(dtd_base)).abs() <= 3].copy()
+
+                        drop_hud_vals = pd.to_numeric(df_match_chk['drop_hours_until_departure'], errors='coerce')
+                        df_match_chk = df_match_chk.loc[drop_hud_vals.notna()].copy()
+                        df_match_chk = df_match_chk.loc[(float(hud_base) - drop_hud_vals.loc[drop_hud_vals.notna()]) >= -24].copy()
 
                         # 距离起飞天数也对的上
                         if not df_match_chk.empty:
@@ -415,15 +421,19 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
                 # 历史上出现的极近似的增长(下降)幅度后的升价场景
                 if not df_match_1.empty:
                     dur_base_1 = pd.to_numeric(price_duration_hours, errors='coerce')
-                    # hud_base_1 = pd.to_numeric(hours_until_departure, errors='coerce')
+                    hud_base_1 = pd.to_numeric(hours_until_departure, errors='coerce')
                     dtd_base_1 = pd.to_numeric(days_to_departure, errors='coerce')
 
-                    if pd.notna(dur_base_1) and pd.notna(dtd_base_1): 
+                    if pd.notna(dur_base_1) and pd.notna(dtd_base_1) and pd.notna(hud_base_1): 
                         df_match_chk_1 = df_match_1.copy()
                         
-                        drop_dtd_vals_1 = pd.to_numeric(df_match_chk_1['rise_days_to_departure'], errors='coerce')
-                        df_match_chk_1 = df_match_chk_1.loc[drop_dtd_vals_1.notna()].copy()
-                        df_match_chk_1 = df_match_chk_1.loc[(drop_dtd_vals_1.loc[drop_dtd_vals_1.notna()] - float(dtd_base_1)).abs() <= 3].copy()
+                        # drop_dtd_vals_1 = pd.to_numeric(df_match_chk_1['rise_days_to_departure'], errors='coerce')
+                        # df_match_chk_1 = df_match_chk_1.loc[drop_dtd_vals_1.notna()].copy()
+                        # df_match_chk_1 = df_match_chk_1.loc[(drop_dtd_vals_1.loc[drop_dtd_vals_1.notna()] - float(dtd_base_1)).abs() <= 3].copy()
+
+                        rise_hud_vals_1 = pd.to_numeric(df_match_chk_1['rise_hours_until_departure'], errors='coerce')
+                        df_match_chk_1 = df_match_chk_1.loc[rise_hud_vals_1.notna()].copy()
+                        df_match_chk_1 = df_match_chk_1.loc[(float(hud_base_1) - rise_hud_vals_1.loc[rise_hud_vals_1.notna()]) >= -24].copy()
 
                         # 距离起飞天数也对的上
                         if not df_match_chk_1.empty: