|
|
@@ -122,10 +122,25 @@ def follow_up_handle():
|
|
|
df_keep_info = df_last_predict_will_drop.copy()
|
|
|
df_keep_info["keep_flag"] = 1
|
|
|
# df_keep_info["last_predict_time"] = target_time
|
|
|
- df_keep_info.to_csv(keep_info_path, index=False, encoding="utf-8-sig")
|
|
|
- print(f"维护表已初始化: {keep_info_path} (rows={len(df_keep_info)})")
|
|
|
+
|
|
|
+ # 将长时间没更新的航班标记为-1
|
|
|
+ dt_update_hour = pd.to_datetime(df_keep_info["update_hour"], errors="coerce")
|
|
|
+ dt_crawl_date = pd.to_datetime(df_keep_info["crawl_date"], errors="coerce")
|
|
|
+ mask_abnormal_time = (dt_update_hour - dt_crawl_date) > pd.Timedelta(hours=12)
|
|
|
+ if mask_abnormal_time.any():
|
|
|
+ df_keep_info.loc[mask_abnormal_time.fillna(False), "keep_flag"] = -1
|
|
|
+
|
|
|
df_keep_info.to_csv(keep_info_snapshot_path, index=False, encoding="utf-8-sig")
|
|
|
print(f"维护表快照已保存: {keep_info_snapshot_path} (rows={len(df_keep_info)})")
|
|
|
+
|
|
|
+ # 移除 keep_flag 为 -1 的行
|
|
|
+ # before_rm = len(df_keep_info)
|
|
|
+ df_keep_info = df_keep_info.loc[df_keep_info["keep_flag"] != -1].reset_index(drop=True)
|
|
|
+ # rm_rows = before_rm - len(df_keep_info)
|
|
|
+
|
|
|
+ df_keep_info.to_csv(keep_info_path, index=False, encoding="utf-8-sig")
|
|
|
+ print(f"维护表已初始化: {keep_info_path} (rows={len(df_keep_info)})")
|
|
|
+
|
|
|
# 已存在维护表
|
|
|
else:
|
|
|
if "keep_flag" not in df_keep_info.columns:
|