Quellcode durchsuchen

调整初始化维护表时的一些策略

node04 vor 2 Wochen
Ursprung
Commit
4f9d864de0
1 geänderte Dateien mit 17 neuen und 2 gelöschten Zeilen
  1. 17 2
      follow_up.py

+ 17 - 2
follow_up.py

@@ -122,10 +122,25 @@ def follow_up_handle():
         df_keep_info = df_last_predict_will_drop.copy()
         df_keep_info["keep_flag"] = 1
         # df_keep_info["last_predict_time"] = target_time
-        df_keep_info.to_csv(keep_info_path, index=False, encoding="utf-8-sig")
-        print(f"维护表已初始化: {keep_info_path} (rows={len(df_keep_info)})")
+
+        # 将长时间没更新的航班标记为-1
+        dt_update_hour = pd.to_datetime(df_keep_info["update_hour"], errors="coerce")
+        dt_crawl_date = pd.to_datetime(df_keep_info["crawl_date"], errors="coerce")
+        mask_abnormal_time = (dt_update_hour - dt_crawl_date) > pd.Timedelta(hours=12)
+        if mask_abnormal_time.any():
+            df_keep_info.loc[mask_abnormal_time.fillna(False), "keep_flag"] = -1
+        
         df_keep_info.to_csv(keep_info_snapshot_path, index=False, encoding="utf-8-sig")
         print(f"维护表快照已保存: {keep_info_snapshot_path} (rows={len(df_keep_info)})")
+
+        # 移除 keep_flag 为 -1 的行
+        # before_rm = len(df_keep_info)
+        df_keep_info = df_keep_info.loc[df_keep_info["keep_flag"] != -1].reset_index(drop=True)
+        # rm_rows = before_rm - len(df_keep_info)
+
+        df_keep_info.to_csv(keep_info_path, index=False, encoding="utf-8-sig")
+        print(f"维护表已初始化: {keep_info_path} (rows={len(df_keep_info)})")
+        
     # 已存在维护表
     else:
         if "keep_flag" not in df_keep_info.columns: