|
|
@@ -1016,15 +1016,15 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
else:
|
|
|
df_keep_nodes = pd.DataFrame()
|
|
|
|
|
|
- df_min_hours['simple_will_price_drop'] = -1 # -1 表示未知
|
|
|
+ df_min_hours['simple_will_price_drop'] = 0
|
|
|
df_min_hours['simple_drop_in_hours'] = 0
|
|
|
df_min_hours['simple_drop_in_hours_prob'] = 0.0
|
|
|
- df_min_hours['simple_drop_in_hours_dist'] = ''
|
|
|
+ df_min_hours['simple_drop_in_hours_dist'] = '' # 空串 表示未知
|
|
|
|
|
|
# 这个阈值取多少?
|
|
|
pct_threshold = 0.01
|
|
|
# pct_threshold = 2
|
|
|
- pct_threshold_1 = 0.001
|
|
|
+ pct_threshold_1 = 0.01
|
|
|
pct_threshold_c = 0.001
|
|
|
|
|
|
for idx, row in df_min_hours.iterrows():
|
|
|
@@ -1066,6 +1066,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
pct_diff = (df_drop_nodes_part['high_price_change_percent'] - float(price_change_percent)).abs()
|
|
|
df_match = df_drop_nodes_part.loc[pct_diff <= pct_threshold, ['high_price_duration_hours', 'high_price_change_percent']].copy()
|
|
|
|
|
|
+ # 历史上出现的降价幅度
|
|
|
if not df_match.empty and pd.notna(price_duration_hours):
|
|
|
remaining_hours = (df_match['high_price_duration_hours'] - float(price_duration_hours)).clip(lower=0)
|
|
|
remaining_hours = remaining_hours.round().astype(int)
|
|
|
@@ -1078,15 +1079,46 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
|
|
|
dist_items = list(zip(probs.index.tolist(), probs.tolist()))
|
|
|
dist_items = dist_items[:10]
|
|
|
- dist_str = ' | '.join([f"{int(h)}:{float(p)}" for h, p in dist_items])
|
|
|
+ dist_str = ' '.join([f"{int(h)}h->{float(p)}" for h, p in dist_items])
|
|
|
|
|
|
df_min_hours.loc[idx, 'simple_will_price_drop'] = 1
|
|
|
df_min_hours.loc[idx, 'simple_drop_in_hours'] = top_hours
|
|
|
df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = top_prob
|
|
|
df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = dist_str
|
|
|
|
|
|
- continue # 已经判定降价 后面不再做
|
|
|
-
|
|
|
+ continue # 已经判定降价 后面不再做
|
|
|
+
|
|
|
+ # 历史上未出现的降价幅度
|
|
|
+ else:
|
|
|
+ if pd.notna(price_duration_hours) and price_change_percent >= 0.1:
|
|
|
+ pct_vals = pd.to_numeric(
|
|
|
+ df_drop_nodes_part['high_price_change_percent'],
|
|
|
+ errors='coerce'
|
|
|
+ ).replace([np.inf, -np.inf], np.nan).dropna()
|
|
|
+ dur_vals = pd.to_numeric(
|
|
|
+ df_drop_nodes_part['high_price_duration_hours'],
|
|
|
+ errors='coerce'
|
|
|
+ ).replace([np.inf, -np.inf], np.nan).dropna()
|
|
|
+
|
|
|
+ if not pct_vals.empty and not dur_vals.empty:
|
|
|
+ pct_min = float(pct_vals.min())
|
|
|
+ pct_max = float(pct_vals.max())
|
|
|
+ dur_min = float(dur_vals.min())
|
|
|
+ dur_max = float(dur_vals.max())
|
|
|
+
|
|
|
+ if (pct_min <= float(price_change_percent) <= pct_max) and (dur_min <= float(price_duration_hours) <= dur_max):
|
|
|
+ df_min_hours.loc[idx, 'simple_will_price_drop'] = 1
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours'] = 0
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 0.5
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = '0h->0.5'
|
|
|
+ continue # 已经判定降价 后面不再做
|
|
|
+ elif (pct_min <= float(price_change_percent)) and (dur_min <= float(price_duration_hours)):
|
|
|
+ df_min_hours.loc[idx, 'simple_will_price_drop'] = 1
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours'] = 0
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 0.3
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = '0h->0.3'
|
|
|
+ continue # 已经判定降价 后面不再做
|
|
|
+
|
|
|
# 针对历史上发生 一直低价、一直高价、低价->高价、连续低价 等
|
|
|
if not df_keep_nodes.empty:
|
|
|
# 对准航班号, 不同起飞日期
|
|
|
@@ -1164,35 +1196,71 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
|
|
|
if not df_match_c.empty and pd.notna(price_duration_hours):
|
|
|
vals_c = df_match_c['keep_price_duration_hours'].replace([np.inf, -np.inf], np.nan).dropna()
|
|
|
if not vals_c.empty:
|
|
|
- min_val = vals_c.min()
|
|
|
- if min_val <= float(price_duration_hours):
|
|
|
+ min_val_c = vals_c.min()
|
|
|
+ if min_val_c <= float(price_duration_hours):
|
|
|
df_min_hours.loc[idx, 'simple_will_price_drop'] = 1
|
|
|
df_min_hours.loc[idx, 'simple_drop_in_hours'] = 0
|
|
|
df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 0.5
|
|
|
- df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = ''
|
|
|
- continue
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = 'c1'
|
|
|
+ continue # 已经判定降价 后面不再做
|
|
|
|
|
|
# 一般判定场景
|
|
|
pct_diff_1 = (df_keep_nodes_part['keep_price_change_percent'] - float(price_change_percent)).abs()
|
|
|
df_match_1 = df_keep_nodes_part.loc[pct_diff_1 <= pct_threshold_1, ['flight_day', 'keep_hours_until_departure', 'keep_price_duration_hours', 'keep_price_change_percent']].copy()
|
|
|
|
|
|
+ # 历史上出现过的保持低价场景
|
|
|
if not df_match_1.empty and pd.notna(price_duration_hours):
|
|
|
+ df_min_hours.loc[idx, 'simple_will_price_drop'] = 0
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours'] = 0
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 0.0
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = 'k0'
|
|
|
|
|
|
df_match_1['hours_delta'] = hours_until_departure - df_match_1['keep_hours_until_departure']
|
|
|
df_match_1['modify_keep_price_duration_hours'] = df_match_1['keep_price_duration_hours'] - df_match_1['hours_delta']
|
|
|
- df_match_1 = df_match_1[df_match_1['modify_keep_price_duration_hours'] > 0]
|
|
|
+ # df_match_1 = df_match_1[df_match_1['modify_keep_price_duration_hours'] > 0]
|
|
|
|
|
|
# 比较 price_duration_hours 在 modify_keep_price_duration_hours 的百分位
|
|
|
vals = df_match_1['modify_keep_price_duration_hours'].replace([np.inf, -np.inf], np.nan).dropna()
|
|
|
if not vals.empty:
|
|
|
- q10_11 = float(vals.quantile(0.10))
|
|
|
- # q90_11 = float(vals.quantile(0.90))
|
|
|
- if q10_11 <= float(price_duration_hours):
|
|
|
+ # q10_11 = float(vals.quantile(0.10))
|
|
|
+ min_val = vals.min()
|
|
|
+ if min_val <= float(price_duration_hours):
|
|
|
df_min_hours.loc[idx, 'simple_will_price_drop'] = 0
|
|
|
df_min_hours.loc[idx, 'simple_drop_in_hours'] = 0
|
|
|
df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 0.0
|
|
|
- df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = ''
|
|
|
-
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = 'k1'
|
|
|
+
|
|
|
+ # 历史上没有出现过的保持低价场景
|
|
|
+ else:
|
|
|
+ df_min_hours.loc[idx, 'simple_will_price_drop'] = 0
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours'] = 0
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 0.0
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = 'n0'
|
|
|
+
|
|
|
+ if pd.notna(price_duration_hours) and price_change_percent <= 0.1:
|
|
|
+ df_keep_nodes_part_1 = df_keep_nodes_part[df_keep_nodes_part['keep_price_change_percent'] <= 0.1]
|
|
|
+ pct_vals_1 = pd.to_numeric(
|
|
|
+ df_keep_nodes_part_1['keep_price_change_percent'],
|
|
|
+ errors='coerce'
|
|
|
+ ).replace([np.inf, -np.inf], np.nan).dropna()
|
|
|
+ dur_vals_1 = pd.to_numeric(
|
|
|
+ df_keep_nodes_part_1['keep_price_duration_hours'],
|
|
|
+ errors='coerce'
|
|
|
+ ).replace([np.inf, -np.inf], np.nan).dropna()
|
|
|
+
|
|
|
+ if not pct_vals_1.empty and not dur_vals_1.empty:
|
|
|
+ pct_min_1 = float(pct_vals_1.min())
|
|
|
+ pct_max_1 = float(pct_vals_1.max())
|
|
|
+ dur_min_1 = float(dur_vals_1.min())
|
|
|
+ dur_max_1 = float(dur_vals_1.max())
|
|
|
+
|
|
|
+ if (pct_min_1 <= float(price_change_percent) <= pct_max_1) and (dur_min_1 <= float(price_duration_hours) <= dur_max_1):
|
|
|
+ df_min_hours.loc[idx, 'simple_will_price_drop'] = 0
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours'] = 0
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 0.0
|
|
|
+ df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = 'n1'
|
|
|
+ pass
|
|
|
+
|
|
|
df_min_hours = df_min_hours.rename(columns={'seg1_dep_time': 'from_time'})
|
|
|
_pred_dt = pd.to_datetime(str(pred_time_str), format="%Y%m%d%H%M", errors="coerce")
|
|
|
df_min_hours["update_hour"] = _pred_dt
|