|
@@ -87,7 +87,7 @@ def preprocess_data_simple(df_input, is_train=False, hourly_time=None):
|
|
|
|
|
|
|
|
# 训练过程
|
|
# 训练过程
|
|
|
if is_train:
|
|
if is_train:
|
|
|
- df_target = df_input[(df_input['hours_until_departure'] >= 72) & (df_input['hours_until_departure'] <= 360)].copy()
|
|
|
|
|
|
|
+ df_target = df_input[(df_input['hours_until_departure'] >= 48) & (df_input['hours_until_departure'] <= 384)].copy()
|
|
|
df_target = df_target.sort_values(
|
|
df_target = df_target.sort_values(
|
|
|
by=['gid', 'baggage_weight', 'hours_until_departure'],
|
|
by=['gid', 'baggage_weight', 'hours_until_departure'],
|
|
|
ascending=[True, True, False]
|
|
ascending=[True, True, False]
|
|
@@ -140,6 +140,9 @@ def preprocess_data_simple(df_input, is_train=False, hourly_time=None):
|
|
|
]
|
|
]
|
|
|
# 按顺序排列 保留gid
|
|
# 按顺序排列 保留gid
|
|
|
df_drop_nodes = df_drop_nodes[flight_info_cols + drop_info_cols]
|
|
df_drop_nodes = df_drop_nodes[flight_info_cols + drop_info_cols]
|
|
|
|
|
+ df_drop_nodes['start_hours_until_departure'] = (df_drop_nodes['drop_hours_until_departure'] + df_drop_nodes['high_price_duration_hours']).round().astype('Int64')
|
|
|
|
|
+ df_drop_nodes = df_drop_nodes[df_drop_nodes['drop_hours_until_departure'] <= 360]
|
|
|
|
|
+ df_drop_nodes = df_drop_nodes[df_drop_nodes['start_hours_until_departure'] >= 72]
|
|
|
|
|
|
|
|
# 反例库:所有有效节点(不限升价)中,未来24小时内未发生降价
|
|
# 反例库:所有有效节点(不限升价)中,未来24小时内未发生降价
|
|
|
# seg_start_mask = df_target['price_duration_hours'].eq(1)
|
|
# seg_start_mask = df_target['price_duration_hours'].eq(1)
|
|
@@ -176,6 +179,9 @@ def preprocess_data_simple(df_input, is_train=False, hourly_time=None):
|
|
|
'prev_rise_duration_hours', 'prev_rise_change_percent', 'prev_rise_change_amount', 'prev_rise_amount', 'prev_rise_cabins'
|
|
'prev_rise_duration_hours', 'prev_rise_change_percent', 'prev_rise_change_amount', 'prev_rise_amount', 'prev_rise_cabins'
|
|
|
]
|
|
]
|
|
|
df_rise_nodes = df_rise_nodes[flight_info_cols + rise_info_cols]
|
|
df_rise_nodes = df_rise_nodes[flight_info_cols + rise_info_cols]
|
|
|
|
|
+ df_rise_nodes['start_hours_until_departure'] = (df_rise_nodes['rise_hours_until_departure'] + df_rise_nodes['prev_rise_duration_hours']).round().astype('Int64')
|
|
|
|
|
+ df_rise_nodes = df_rise_nodes[df_rise_nodes['rise_hours_until_departure'] <= 360]
|
|
|
|
|
+ df_rise_nodes = df_rise_nodes[df_rise_nodes['start_hours_until_departure'] >= 72]
|
|
|
|
|
|
|
|
# 制作历史包络线
|
|
# 制作历史包络线
|
|
|
envelope_group = ['citypair', 'flight_numbers', 'from_date', 'baggage_weight']
|
|
envelope_group = ['citypair', 'flight_numbers', 'from_date', 'baggage_weight']
|
|
@@ -348,7 +354,7 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
|
|
|
# df_min_hours['is_target_day'] = (df_min_hours['from_date'] == df_min_hours['target_flight_day']).astype(int)
|
|
# df_min_hours['is_target_day'] = (df_min_hours['from_date'] == df_min_hours['target_flight_day']).astype(int)
|
|
|
|
|
|
|
|
# 综合评分阈值:大于阈值的都认为值得投放
|
|
# 综合评分阈值:大于阈值的都认为值得投放
|
|
|
- relative_position_threshold = 0.5
|
|
|
|
|
|
|
+ relative_position_threshold = 0.4
|
|
|
df_min_hours['is_good_target'] = (df_min_hours['relative_position'] >= relative_position_threshold).astype(int)
|
|
df_min_hours['is_good_target'] = (df_min_hours['relative_position'] >= relative_position_threshold).astype(int)
|
|
|
total_cnt_before = len(df_min_hours) # 记录下过滤前的总数
|
|
total_cnt_before = len(df_min_hours) # 记录下过滤前的总数
|
|
|
df_min_hours = df_min_hours[(df_min_hours['is_good_target'] == 1)].reset_index(drop=True) # 保留值得投放的
|
|
df_min_hours = df_min_hours[(df_min_hours['is_good_target'] == 1)].reset_index(drop=True) # 保留值得投放的
|