|
|
@@ -1373,7 +1373,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
# df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 0.0
|
|
|
# df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = 'n1'
|
|
|
pass
|
|
|
-
|
|
|
+ print("判定过程结束")
|
|
|
df_min_hours = df_min_hours.rename(columns={'seg1_dep_time': 'from_time'})
|
|
|
_pred_dt = pd.to_datetime(str(pred_time_str), format="%Y%m%d%H%M", errors="coerce")
|
|
|
df_min_hours["update_hour"] = _pred_dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
@@ -1403,6 +1403,15 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
na_position='last',
|
|
|
).reset_index(drop=True)
|
|
|
|
|
|
+ # 时间段过滤 过久没更新的(超过8小时)可能是已售完 不参与预测
|
|
|
+ update_dt = pd.to_datetime(df_predict["update_hour"], errors="coerce")
|
|
|
+ crawl_dt = pd.to_datetime(df_predict["crawl_date"], errors="coerce")
|
|
|
+ dt_diff = update_dt - crawl_dt
|
|
|
+ df_predict = df_predict.loc[
|
|
|
+ (dt_diff >= pd.Timedelta(0)) & (dt_diff <= pd.Timedelta(hours=8))
|
|
|
+ ].reset_index(drop=True)
|
|
|
+ print("更新时间过滤")
|
|
|
+
|
|
|
csv_path1 = os.path.join(predict_dir, f'future_predictions_{pred_time_str}.csv')
|
|
|
df_predict.to_csv(csv_path1, mode='a', index=False, header=not os.path.exists(csv_path1), encoding='utf-8-sig')
|
|
|
|