Parcourir la source

uo在匹配样本时加上舱位的对齐

node04 il y a 5 jours
Parent
commit
3d26d3cc7d
1 fichiers modifiés avec 7 ajouts et 4 suppressions
  1. 7 4
      data_process.py

+ 7 - 4
data_process.py

@@ -391,6 +391,7 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
         price_change_amount = row['price_change_amount']
         price_duration_hours = row['price_duration_hours']
         price_amount = row['price_total']
+        cabins = row['cabins']
 
         length_drop = 0
         length_rise = 0
@@ -411,7 +412,7 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
                 df_drop_gap = df_drop_nodes_part.loc[
                     pct_vals.notna(),
                     ['drop_days_to_departure', 'drop_hours_until_departure', 'drop_price_change_percent', 'drop_price_change_amount', 
-                     'high_price_duration_hours', 'high_price_change_percent', 'high_price_change_amount', 'high_price_amount', 'relative_position'
+                     'high_price_duration_hours', 'high_price_change_percent', 'high_price_change_amount', 'high_price_amount', 'high_price_cabins', 'relative_position'
                      ]
                 ].copy()
                 df_drop_gap['pct_gap'] = (pct_vals.loc[pct_vals.notna()] - pct_base)
@@ -429,8 +430,9 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
                 )
                 df_match = df_drop_gap[
                     (df_drop_gap['pct_abs_gap'] <= pct_threshold) 
-                    & (df_drop_gap['price_abs_gap'] <= 3.0)
+                    & (df_drop_gap['price_abs_gap'] <= 5.0)
                     & same_sign_mask
+                    & (df_drop_gap['high_price_cabins'] == cabins)
                 ].copy()
 
                 # 历史上出现的极近似的增长(下降)幅度后的降价场景
@@ -508,7 +510,7 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
                 df_rise_gap_1 = df_rise_nodes_part.loc[
                     pct_vals_1.notna(),
                     ['rise_days_to_departure', 'rise_hours_until_departure', 'rise_price_change_percent', 'rise_price_change_amount',
-                     'prev_rise_duration_hours', 'prev_rise_change_percent', 'prev_rise_change_amount', 'prev_rise_amount', 'relative_position']
+                     'prev_rise_duration_hours', 'prev_rise_change_percent', 'prev_rise_change_amount', 'prev_rise_amount', 'prev_rise_cabins', 'relative_position']
                 ].copy()
                 df_rise_gap_1['pct_gap'] = (pct_vals_1.loc[pct_vals_1.notna()] - pct_base_1)
                 df_rise_gap_1['pct_abs_gap'] = df_rise_gap_1['pct_gap'].abs()
@@ -525,8 +527,9 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
                 )
                 df_match_1 = df_rise_gap_1.loc[
                     (df_rise_gap_1['pct_abs_gap'] <= pct_threshold_1) 
-                    & (df_rise_gap_1['price_abs_gap'] <= 3.0)
+                    & (df_rise_gap_1['price_abs_gap'] <= 5.0)
                     & same_sign_mask_1
+                    & (df_rise_gap_1['prev_rise_cabins'] == cabins)
                 ].copy()
 
                 # 历史上出现的极近似的增长(下降)幅度后的升价场景