|
@@ -1173,7 +1173,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
df_min_hours = df_min_hours[(df_min_hours['seats_remaining'] >= 5)].reset_index(drop=True)
|
|
df_min_hours = df_min_hours[(df_min_hours['seats_remaining'] >= 5)].reset_index(drop=True)
|
|
|
|
|
|
|
|
df_min_hours['simple_will_price_drop'] = 0
|
|
df_min_hours['simple_will_price_drop'] = 0
|
|
|
- df_min_hours['simple_drop_in_hours'] = 0
|
|
|
|
|
|
|
+ # df_min_hours['simple_drop_in_hours'] = 0
|
|
|
df_min_hours['simple_drop_in_hours_prob'] = 0.0
|
|
df_min_hours['simple_drop_in_hours_prob'] = 0.0
|
|
|
df_min_hours['simple_drop_in_hours_dist'] = '' # 空串 表示未知
|
|
df_min_hours['simple_drop_in_hours_dist'] = '' # 空串 表示未知
|
|
|
df_min_hours['flag_dist'] = ''
|
|
df_min_hours['flag_dist'] = ''
|
|
@@ -1272,6 +1272,12 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
df_match_chk = df_match_chk.loc[drop_hud_vals.notna()].copy()
|
|
df_match_chk = df_match_chk.loc[drop_hud_vals.notna()].copy()
|
|
|
df_match_chk = df_match_chk.loc[(float(hud_base) - drop_hud_vals.loc[drop_hud_vals.notna()]) >= -24].copy()
|
|
df_match_chk = df_match_chk.loc[(float(hud_base) - drop_hud_vals.loc[drop_hud_vals.notna()]) >= -24].copy()
|
|
|
|
|
|
|
|
|
|
+ dur_num_chk = pd.to_numeric(df_match_chk['high_price_duration_hours'], errors='coerce')
|
|
|
|
|
+ dur_delta = dur_num_chk - float(dur_base)
|
|
|
|
|
+ df_match_chk = df_match_chk.assign(dur_delta=dur_delta)
|
|
|
|
|
+ df_match_chk = df_match_chk.loc[df_match_chk['dur_delta'].notna()].copy()
|
|
|
|
|
+ df_match_chk = df_match_chk.loc[df_match_chk['dur_delta'].abs() <= 48].copy()
|
|
|
|
|
+
|
|
|
# seats_vals = pd.to_numeric(df_match_chk['high_price_seats_remaining_change_amount'], errors='coerce')
|
|
# seats_vals = pd.to_numeric(df_match_chk['high_price_seats_remaining_change_amount'], errors='coerce')
|
|
|
# df_match_chk = df_match_chk.loc[seats_vals.notna()].copy()
|
|
# df_match_chk = df_match_chk.loc[seats_vals.notna()].copy()
|
|
|
# df_match_chk = df_match_chk.loc[seats_vals.loc[seats_vals.notna()] == float(seats_base)].copy()
|
|
# df_match_chk = df_match_chk.loc[seats_vals.loc[seats_vals.notna()] == float(seats_base)].copy()
|
|
@@ -1290,23 +1296,26 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
# if len(drop_mode_values) > 0:
|
|
# if len(drop_mode_values) > 0:
|
|
|
# df_min_hours.loc[idx, 'drop_price_change_mode'] = round(float(drop_mode_values[0]), 2)
|
|
# df_min_hours.loc[idx, 'drop_price_change_mode'] = round(float(drop_mode_values[0]), 2)
|
|
|
|
|
|
|
|
- remaining_hours = (
|
|
|
|
|
- pd.to_numeric(df_match_chk['high_price_duration_hours'], errors='coerce') - float(dur_base)
|
|
|
|
|
- ).clip(lower=0)
|
|
|
|
|
- remaining_hours = remaining_hours.round().astype(int)
|
|
|
|
|
|
|
+ # remaining_hours = (
|
|
|
|
|
+ # pd.to_numeric(df_match_chk['high_price_duration_hours'], errors='coerce') - float(dur_base)
|
|
|
|
|
+ # ).clip(lower=0)
|
|
|
|
|
+ # remaining_hours = remaining_hours.round().astype(int)
|
|
|
|
|
|
|
|
- counts = remaining_hours.value_counts().sort_index()
|
|
|
|
|
- probs = (counts / counts.sum()).round(4)
|
|
|
|
|
|
|
+ # counts = remaining_hours.value_counts().sort_index()
|
|
|
|
|
+ # probs = (counts / counts.sum()).round(4)
|
|
|
|
|
|
|
|
- top_hours = int(probs.idxmax())
|
|
|
|
|
- top_prob = float(probs.max())
|
|
|
|
|
|
|
+ # top_hours = int(probs.idxmax())
|
|
|
|
|
+ # top_prob = float(probs.max())
|
|
|
|
|
|
|
|
- dist_items = list(zip(probs.index.tolist(), probs.tolist()))
|
|
|
|
|
- dist_items = dist_items[:10]
|
|
|
|
|
- dist_str = ' '.join([f"{int(h)}h->{float(p)}" for h, p in dist_items])
|
|
|
|
|
|
|
+ # dist_items = list(zip(probs.index.tolist(), probs.tolist()))
|
|
|
|
|
+ # dist_items = dist_items[:10]
|
|
|
|
|
+ # dist_str = ' '.join([f"{int(h)}h->{float(p)}" for h, p in dist_items])
|
|
|
|
|
+
|
|
|
|
|
+ dur_delta_list = df_match_chk['dur_delta'].tolist()
|
|
|
|
|
+ dist_str = "'" + ' '.join([f"{ddl:g}" for ddl in dur_delta_list])
|
|
|
|
|
|
|
|
df_min_hours.loc[idx, 'simple_will_price_drop'] = 1
|
|
df_min_hours.loc[idx, 'simple_will_price_drop'] = 1
|
|
|
- df_min_hours.loc[idx, 'simple_drop_in_hours'] = top_hours
|
|
|
|
|
|
|
+ # df_min_hours.loc[idx, 'simple_drop_in_hours'] = top_hours
|
|
|
df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 1
|
|
df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 1
|
|
|
df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = dist_str
|
|
df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = dist_str
|
|
|
df_min_hours.loc[idx, 'flag_dist'] = 'd0'
|
|
df_min_hours.loc[idx, 'flag_dist'] = 'd0'
|
|
@@ -1396,7 +1405,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
# 可以明确的判定不降价
|
|
# 可以明确的判定不降价
|
|
|
if length_drop == 0:
|
|
if length_drop == 0:
|
|
|
df_min_hours.loc[idx, 'simple_will_price_drop'] = 0
|
|
df_min_hours.loc[idx, 'simple_will_price_drop'] = 0
|
|
|
- df_min_hours.loc[idx, 'simple_drop_in_hours'] = 0
|
|
|
|
|
|
|
+ # df_min_hours.loc[idx, 'simple_drop_in_hours'] = 0
|
|
|
df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 0.0
|
|
df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 0.0
|
|
|
# df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = 'r0'
|
|
# df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = 'r0'
|
|
|
df_min_hours.loc[idx, 'flag_dist'] = 'r0'
|
|
df_min_hours.loc[idx, 'flag_dist'] = 'r0'
|
|
@@ -1460,7 +1469,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
'adult_total_price', 'days_to_departure', 'hours_until_departure', 'price_change_percent', 'price_change_amount', 'price_duration_hours',
|
|
'adult_total_price', 'days_to_departure', 'hours_until_departure', 'price_change_percent', 'price_change_amount', 'price_duration_hours',
|
|
|
'update_hour', 'crawl_date',
|
|
'update_hour', 'crawl_date',
|
|
|
'valid_begin_hour', 'valid_end_hour',
|
|
'valid_begin_hour', 'valid_end_hour',
|
|
|
- 'simple_will_price_drop', 'simple_drop_in_hours', 'simple_drop_in_hours_prob', 'simple_drop_in_hours_dist',
|
|
|
|
|
|
|
+ 'simple_will_price_drop', 'simple_drop_in_hours_prob', 'simple_drop_in_hours_dist',
|
|
|
'flag_dist',
|
|
'flag_dist',
|
|
|
'drop_price_change_upper', 'drop_price_change_lower', 'drop_price_sample_size',
|
|
'drop_price_change_upper', 'drop_price_change_lower', 'drop_price_sample_size',
|
|
|
'rise_price_change_upper', 'rise_price_change_lower', 'rise_price_sample_size',
|
|
'rise_price_change_upper', 'rise_price_change_lower', 'rise_price_sample_size',
|
|
@@ -1473,7 +1482,6 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
df_predict = df_min_hours[order_cols]
|
|
df_predict = df_min_hours[order_cols]
|
|
|
df_predict = df_predict.rename(columns={
|
|
df_predict = df_predict.rename(columns={
|
|
|
'simple_will_price_drop': 'will_price_drop',
|
|
'simple_will_price_drop': 'will_price_drop',
|
|
|
- 'simple_drop_in_hours': 'drop_in_hours',
|
|
|
|
|
'simple_drop_in_hours_prob': 'drop_in_hours_prob',
|
|
'simple_drop_in_hours_prob': 'drop_in_hours_prob',
|
|
|
'simple_drop_in_hours_dist': 'drop_in_hours_dist',
|
|
'simple_drop_in_hours_dist': 'drop_in_hours_dist',
|
|
|
}
|
|
}
|