|
|
@@ -278,7 +278,7 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
|
|
|
# =====================================================================
|
|
|
|
|
|
df_min_hours['simple_will_price_drop'] = 0
|
|
|
- df_min_hours['simple_drop_in_hours'] = 0
|
|
|
+ # df_min_hours['simple_drop_in_hours'] = 0
|
|
|
df_min_hours['simple_drop_in_hours_prob'] = 0.0
|
|
|
df_min_hours['simple_drop_in_hours_dist'] = '' # 空串 表示未知
|
|
|
df_min_hours['flag_dist'] = ''
|
|
|
@@ -357,6 +357,12 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
|
|
|
df_match_chk = df_match_chk.loc[drop_hud_vals.notna()].copy()
|
|
|
df_match_chk = df_match_chk.loc[(float(hud_base) - drop_hud_vals.loc[drop_hud_vals.notna()]) >= -24].copy()
|
|
|
|
|
|
+ dur_num_chk = pd.to_numeric(df_match_chk['high_price_duration_hours'], errors='coerce')
|
|
|
+ dur_delta = dur_num_chk - float(dur_base)
|
|
|
+ df_match_chk = df_match_chk.assign(dur_delta=dur_delta)
|
|
|
+ df_match_chk = df_match_chk.loc[df_match_chk['dur_delta'].notna()].copy()
|
|
|
+ df_match_chk = df_match_chk.loc[df_match_chk['dur_delta'].abs() <= 48].copy()
|
|
|
+
|
|
|
# 距离起飞天数也对的上
|
|
|
if not df_match_chk.empty:
|
|
|
length_drop = df_match_chk.shape[0]
|
|
|
@@ -367,23 +373,26 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
|
|
|
df_min_hours.loc[idx, 'drop_price_change_upper'] = round(drop_price_change_upper, 2)
|
|
|
df_min_hours.loc[idx, 'drop_price_change_lower'] = round(drop_price_change_lower, 2)
|
|
|
|
|
|
- remaining_hours = (
|
|
|
- pd.to_numeric(df_match_chk['high_price_duration_hours'], errors='coerce') - float(dur_base)
|
|
|
- ).clip(lower=0)
|
|
|
- remaining_hours = remaining_hours.round().astype(int)
|
|
|
+ # remaining_hours = (
|
|
|
+ # pd.to_numeric(df_match_chk['high_price_duration_hours'], errors='coerce') - float(dur_base)
|
|
|
+ # ).clip(lower=0)
|
|
|
+ # remaining_hours = remaining_hours.round().astype(int)
|
|
|
+
|
|
|
+ # counts = remaining_hours.value_counts().sort_index()
|
|
|
+ # probs = (counts / counts.sum()).round(4)
|
|
|
|
|
|
- counts = remaining_hours.value_counts().sort_index()
|
|
|
- probs = (counts / counts.sum()).round(4)
|
|
|
+ # top_hours = int(probs.idxmax())
|
|
|
+ # top_prob = float(probs.max())
|
|
|
|
|
|
- top_hours = int(probs.idxmax())
|
|
|
- top_prob = float(probs.max())
|
|
|
+ # dist_items = list(zip(probs.index.tolist(), probs.tolist()))
|
|
|
+ # dist_items = dist_items[:10]
|
|
|
+ # dist_str = ' '.join([f"{int(h)}h->{float(p)}" for h, p in dist_items])
|
|
|
|
|
|
- dist_items = list(zip(probs.index.tolist(), probs.tolist()))
|
|
|
- dist_items = dist_items[:10]
|
|
|
- dist_str = ' '.join([f"{int(h)}h->{float(p)}" for h, p in dist_items])
|
|
|
+ dur_delta_list = df_match_chk['dur_delta'].tolist()
|
|
|
+ dist_str = "'" + ' '.join([f"{ddl:g}" for ddl in dur_delta_list])
|
|
|
|
|
|
df_min_hours.loc[idx, 'simple_will_price_drop'] = 1
|
|
|
- df_min_hours.loc[idx, 'simple_drop_in_hours'] = top_hours
|
|
|
+ # df_min_hours.loc[idx, 'simple_drop_in_hours'] = top_hours
|
|
|
df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 1
|
|
|
df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = dist_str
|
|
|
df_min_hours.loc[idx, 'flag_dist'] = 'd0'
|
|
|
@@ -448,7 +457,7 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
|
|
|
# 可以明确的判定不降价
|
|
|
if length_drop == 0:
|
|
|
df_min_hours.loc[idx, 'simple_will_price_drop'] = 0
|
|
|
- df_min_hours.loc[idx, 'simple_drop_in_hours'] = 0
|
|
|
+ # df_min_hours.loc[idx, 'simple_drop_in_hours'] = 0
|
|
|
df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 0.0
|
|
|
# df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = 'r0'
|
|
|
df_min_hours.loc[idx, 'flag_dist'] = 'r0'
|
|
|
@@ -482,7 +491,7 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
|
|
|
'price_change_amount', 'price_change_percent', 'price_duration_hours',
|
|
|
"update_hour", "create_time",
|
|
|
'valid_begin_hour', 'valid_end_hour',
|
|
|
- 'simple_will_price_drop', 'simple_drop_in_hours', 'simple_drop_in_hours_prob', 'simple_drop_in_hours_dist',
|
|
|
+ 'simple_will_price_drop', 'simple_drop_in_hours_prob', 'simple_drop_in_hours_dist',
|
|
|
'flag_dist',
|
|
|
'drop_price_change_upper', 'drop_price_change_lower', 'drop_price_sample_size',
|
|
|
'rise_price_change_upper', 'rise_price_change_lower', 'rise_price_sample_size',
|
|
|
@@ -490,7 +499,6 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
|
|
|
df_predict = df_min_hours[order_cols]
|
|
|
df_predict = df_predict.rename(columns={
|
|
|
'simple_will_price_drop': 'will_price_drop',
|
|
|
- 'simple_drop_in_hours': 'drop_in_hours',
|
|
|
'simple_drop_in_hours_prob': 'drop_in_hours_prob',
|
|
|
'simple_drop_in_hours_dist': 'drop_in_hours_dist',
|
|
|
}
|