|
@@ -902,7 +902,7 @@ def preprocess_data_simple(df_input, is_train=False):
|
|
|
|
|
|
|
|
# 训练过程
|
|
# 训练过程
|
|
|
if is_train:
|
|
if is_train:
|
|
|
- df_target = df_input[(df_input['hours_until_departure'] >= 4) & (df_input['hours_until_departure'] <= 60)].copy()
|
|
|
|
|
|
|
+ df_target = df_input[(df_input['hours_until_departure'] >= 12) & (df_input['hours_until_departure'] <= 60)].copy()
|
|
|
df_target = df_target.sort_values(
|
|
df_target = df_target.sort_values(
|
|
|
by=['gid', 'hours_until_departure'],
|
|
by=['gid', 'hours_until_departure'],
|
|
|
ascending=[True, False]
|
|
ascending=[True, False]
|
|
@@ -1016,7 +1016,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
).reset_index(drop=True)
|
|
).reset_index(drop=True)
|
|
|
|
|
|
|
|
df_sorted = df_sorted[
|
|
df_sorted = df_sorted[
|
|
|
- df_sorted['hours_until_departure'].between(4, 60)
|
|
|
|
|
|
|
+ df_sorted['hours_until_departure'].between(12, 60)
|
|
|
].reset_index(drop=True)
|
|
].reset_index(drop=True)
|
|
|
|
|
|
|
|
# 每个 gid 取 hours_until_departure 最小的一条
|
|
# 每个 gid 取 hours_until_departure 最小的一条
|
|
@@ -1025,9 +1025,9 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
.reset_index(drop=True)
|
|
.reset_index(drop=True)
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
- # 确保 hours_until_departure 在 [4, 60] 的 范围内
|
|
|
|
|
|
|
+ # 确保 hours_until_departure 在 [12, 60] 的 范围内
|
|
|
# df_min_hours = df_min_hours[
|
|
# df_min_hours = df_min_hours[
|
|
|
- # df_min_hours['hours_until_departure'].between(4, 60)
|
|
|
|
|
|
|
+ # df_min_hours['hours_until_departure'].between(12, 60)
|
|
|
# ].reset_index(drop=True)
|
|
# ].reset_index(drop=True)
|
|
|
|
|
|
|
|
drop_info_csv_path = os.path.join(output_dir, f'{group_route_str}_drop_info.csv')
|
|
drop_info_csv_path = os.path.join(output_dir, f'{group_route_str}_drop_info.csv')
|
|
@@ -1120,11 +1120,11 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
df_match_chk = df_match.copy()
|
|
df_match_chk = df_match.copy()
|
|
|
dur_vals = pd.to_numeric(df_match_chk['high_price_duration_hours'], errors='coerce')
|
|
dur_vals = pd.to_numeric(df_match_chk['high_price_duration_hours'], errors='coerce')
|
|
|
df_match_chk = df_match_chk.loc[dur_vals.notna()].copy()
|
|
df_match_chk = df_match_chk.loc[dur_vals.notna()].copy()
|
|
|
- df_match_chk = df_match_chk.loc[(dur_vals.loc[dur_vals.notna()] - float(dur_base)).abs() <= 20].copy()
|
|
|
|
|
|
|
+ df_match_chk = df_match_chk.loc[(dur_vals.loc[dur_vals.notna()] - float(dur_base)).abs() <= 24].copy()
|
|
|
|
|
|
|
|
drop_hud_vals = pd.to_numeric(df_match_chk['drop_hours_until_departure'], errors='coerce')
|
|
drop_hud_vals = pd.to_numeric(df_match_chk['drop_hours_until_departure'], errors='coerce')
|
|
|
df_match_chk = df_match_chk.loc[drop_hud_vals.notna()].copy()
|
|
df_match_chk = df_match_chk.loc[drop_hud_vals.notna()].copy()
|
|
|
- df_match_chk = df_match_chk.loc[(drop_hud_vals.loc[drop_hud_vals.notna()] - float(hud_base)).abs() <= 20].copy()
|
|
|
|
|
|
|
+ df_match_chk = df_match_chk.loc[(drop_hud_vals.loc[drop_hud_vals.notna()] - float(hud_base)).abs() <= 12].copy()
|
|
|
|
|
|
|
|
# seats_vals = pd.to_numeric(df_match_chk['high_price_seats_remaining_change_amount'], errors='coerce')
|
|
# seats_vals = pd.to_numeric(df_match_chk['high_price_seats_remaining_change_amount'], errors='coerce')
|
|
|
# df_match_chk = df_match_chk.loc[seats_vals.notna()].copy()
|
|
# df_match_chk = df_match_chk.loc[seats_vals.notna()].copy()
|
|
@@ -1306,7 +1306,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
df_match_chk_1 = df_match_1.copy()
|
|
df_match_chk_1 = df_match_1.copy()
|
|
|
dur_vals_1 = pd.to_numeric(df_match_chk_1['modify_keep_price_duration_hours'], errors='coerce')
|
|
dur_vals_1 = pd.to_numeric(df_match_chk_1['modify_keep_price_duration_hours'], errors='coerce')
|
|
|
df_match_chk_1 = df_match_chk_1.loc[dur_vals_1.notna()].copy()
|
|
df_match_chk_1 = df_match_chk_1.loc[dur_vals_1.notna()].copy()
|
|
|
- df_match_chk_1 = df_match_chk_1.loc[(dur_vals_1.loc[dur_vals_1.notna()] - float(dur_base_1)).abs() <= 20].copy()
|
|
|
|
|
|
|
+ df_match_chk_1 = df_match_chk_1.loc[(dur_vals_1.loc[dur_vals_1.notna()] - float(dur_base_1)).abs() <= 24].copy()
|
|
|
|
|
|
|
|
# drop_hud_vals_1 = pd.to_numeric(df_match_chk_1['keep_hours_until_departure'], errors='coerce')
|
|
# drop_hud_vals_1 = pd.to_numeric(df_match_chk_1['keep_hours_until_departure'], errors='coerce')
|
|
|
# df_match_chk_1 = df_match_chk_1.loc[drop_hud_vals_1.notna()].copy()
|
|
# df_match_chk_1 = df_match_chk_1.loc[drop_hud_vals_1.notna()].copy()
|
|
@@ -1398,7 +1398,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
df_min_hours["update_hour"] = _pred_dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
df_min_hours["update_hour"] = _pred_dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
_dep_hour = pd.to_datetime(df_min_hours["from_time"], errors="coerce").dt.floor("h")
|
|
_dep_hour = pd.to_datetime(df_min_hours["from_time"], errors="coerce").dt.floor("h")
|
|
|
df_min_hours["valid_begin_hour"] = (_dep_hour - pd.to_timedelta(60, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
df_min_hours["valid_begin_hour"] = (_dep_hour - pd.to_timedelta(60, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
- df_min_hours["valid_end_hour"] = (_dep_hour - pd.to_timedelta(4, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
|
|
|
+ df_min_hours["valid_end_hour"] = (_dep_hour - pd.to_timedelta(12, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
|
|
|
|
order_cols = ['city_pair', 'flight_day', 'flight_number_1', 'flight_number_2', 'from_time',
|
|
order_cols = ['city_pair', 'flight_day', 'flight_number_1', 'flight_number_2', 'from_time',
|
|
|
'baggage', 'seats_remaining', 'currency',
|
|
'baggage', 'seats_remaining', 'currency',
|