|
@@ -84,7 +84,7 @@ def preprocess_data_simple(df_input, is_train=False, hourly_time=None):
|
|
|
|
|
|
|
|
# 训练过程
|
|
# 训练过程
|
|
|
if is_train:
|
|
if is_train:
|
|
|
- df_target = df_input[(df_input['hours_until_departure'] >= 24) & (df_input['hours_until_departure'] <= 360)].copy()
|
|
|
|
|
|
|
+ df_target = df_input[(df_input['hours_until_departure'] >= 72) & (df_input['hours_until_departure'] <= 360)].copy()
|
|
|
df_target = df_target.sort_values(
|
|
df_target = df_target.sort_values(
|
|
|
by=['gid', 'baggage_weight', 'hours_until_departure'],
|
|
by=['gid', 'baggage_weight', 'hours_until_departure'],
|
|
|
ascending=[True, True, False]
|
|
ascending=[True, True, False]
|
|
@@ -190,7 +190,7 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
|
|
|
).reset_index(drop=True)
|
|
).reset_index(drop=True)
|
|
|
|
|
|
|
|
df_sorted = df_sorted[
|
|
df_sorted = df_sorted[
|
|
|
- df_sorted['hours_until_departure'].between(24, 360)
|
|
|
|
|
|
|
+ df_sorted['hours_until_departure'].between(72, 360)
|
|
|
].reset_index(drop=True)
|
|
].reset_index(drop=True)
|
|
|
|
|
|
|
|
# 每个 gid baggage_weight 取 hours_until_departure 最小的一条 (当前小时)
|
|
# 每个 gid baggage_weight 取 hours_until_departure 最小的一条 (当前小时)
|
|
@@ -459,12 +459,12 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
|
|
|
|
|
|
|
|
_dep_hour = pd.to_datetime(df_min_hours["from_time"], errors="coerce").dt.floor("h")
|
|
_dep_hour = pd.to_datetime(df_min_hours["from_time"], errors="coerce").dt.floor("h")
|
|
|
df_min_hours["valid_begin_hour"] = (_dep_hour - pd.to_timedelta(360, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
df_min_hours["valid_begin_hour"] = (_dep_hour - pd.to_timedelta(360, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
- df_min_hours["valid_end_hour"] = (_dep_hour - pd.to_timedelta(24, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
|
|
|
+ df_min_hours["valid_end_hour"] = (_dep_hour - pd.to_timedelta(72, unit="h")).dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
|
|
|
|
# 要展示在预测表里的字段
|
|
# 要展示在预测表里的字段
|
|
|
order_cols = [
|
|
order_cols = [
|
|
|
"citypair", "flight_numbers", "baggage_weight", "from_date", "from_time",
|
|
"citypair", "flight_numbers", "baggage_weight", "from_date", "from_time",
|
|
|
- "cabins", "ticket_amount", "currency",
|
|
|
|
|
|
|
+ "cabins", "ticket_amount", "currency", "price_base", "price_tax",
|
|
|
"price_total", 'relative_position', 'days_to_departure', 'hours_until_departure',
|
|
"price_total", 'relative_position', 'days_to_departure', 'hours_until_departure',
|
|
|
'price_change_amount', 'price_change_percent', 'price_duration_hours',
|
|
'price_change_amount', 'price_change_percent', 'price_duration_hours',
|
|
|
"update_hour", "create_time",
|
|
"update_hour", "create_time",
|