3 недель назад · 0e8e16c3e5
--- a/data_process.py
+++ b/data_process.py
@@ -29,7 +29,7 @@ def preprocess_data_simple(df_input, is_train=False, hourly_time=None):
 
															     ).reset_index(drop=True)
														
 
															     df_input = df_input[df_input['hours_until_departure'] <= 480]
														
 
															-    df_input = df_input[df_input['baggage_weight'] == 20]   # 先保留20公斤行李的
														
 
															+    df_input = df_input[df_input['baggage_weight'] == 0]   # 先保留0公斤行李的
														
 
															     # 在hours_until_departure 的末尾 保留到当前时刻的数据
														
 
															     if not is_train:
														
@@ -161,8 +161,8 @@ def preprocess_data_simple(df_input, is_train=False, hourly_time=None):
 
															         # 制作历史包络线
														
 
															         envelope_group = ['citypair', 'flight_numbers', 'from_date', 'baggage_weight']
														
 
															-        idx_peak = df_input.groupby(envelope_group)['price_total'].idxmax()
														
 
															-        df_envelope = df_input.loc[idx_peak, envelope_group + [
														
 
															+        idx_peak = df_target.groupby(envelope_group)['price_total'].idxmax()
														
 
															+        df_envelope = df_target.loc[idx_peak, envelope_group + [
														
 
															             'from_time', 'price_total', 'hours_until_departure', 'days_to_departure', 'update_hour', 'update_week',
														
 
															         ]].rename(columns={
														
 
															             'price_total': 'peak_price',
														
@@ -288,13 +288,16 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
 
															     df_min_hours['rise_price_sample_size'] = 0
														
 
															     # 这个阈值取多少?
														
 
															-    # pct_threshold = 0.01
														
 
															-    # pct_threshold_1 = 0.01
														
 
															+    pct_threshold = 0.01
														
 
															+    pct_threshold_1 = 0.01
														
 
															     for idx, row in df_min_hours.iterrows(): 
														
 
															         city_pair = row['citypair']
														
 
															         flight_numbers = row['flight_numbers']
														
 
															         baggage_weight = row['baggage_weight']
														
 
															+        from_date = row['from_date']
														
 
															+        if flight_numbers == "UO235" and from_date == "2026-04-25":   # 调试时用
														
 
															+            pass
														
 
															         days_to_departure = row['days_to_departure']
														
 
															         hours_until_departure = row['hours_until_departure']
														
 
															         price_change_percent = row['price_change_percent']
														
@@ -314,26 +317,26 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
 
															                 (df_drop_nodes['baggage_weight'] == baggage_weight)
														
 
															             ]
														
 
															             # 降价前 增量阈值、当前阈值 的匹配
														
 
															-            if not df_drop_nodes_part.empty and pd.notna(price_change_amount):   
														
 
															+            if not df_drop_nodes_part.empty and pd.notna(price_change_percent):   
														
 
															-                pca_base = float(price_change_amount)
														
 
															-                pca_vals = pd.to_numeric(df_drop_nodes_part['high_price_change_amount'], errors='coerce')
														
 
															+                pct_base = float(price_change_percent)
														
 
															+                pct_vals = pd.to_numeric(df_drop_nodes_part['high_price_change_percent'], errors='coerce')
														
 
															                 df_drop_gap = df_drop_nodes_part.loc[
														
 
															-                    pca_vals.notna(),
														
 
															+                    pct_vals.notna(),
														
 
															                     ['drop_days_to_departure', 'drop_hours_until_departure', 'drop_price_change_percent', 'drop_price_change_amount', 
														
 
															                      'high_price_duration_hours', 'high_price_change_percent', 'high_price_change_amount', 'high_price_amount', 'relative_position'
														
 
															                      ]
														
 
															                 ].copy()
														
 
															-                df_drop_gap['pca_gap'] = (pca_vals.loc[pca_vals.notna()] - pca_base)
														
 
															-                df_drop_gap['pca_abs_gap'] = df_drop_gap['pca_gap'].abs()
														
 
															+                df_drop_gap['pct_gap'] = (pct_vals.loc[pct_vals.notna()] - pct_base)
														
 
															+                df_drop_gap['pct_abs_gap'] = df_drop_gap['pct_gap'].abs()
														
 
															                 price_base = pd.to_numeric(price_amount, errors='coerce')
														
 
															                 high_price_vals = pd.to_numeric(df_drop_gap['high_price_amount'], errors='coerce')
														
 
															                 df_drop_gap['price_gap'] = high_price_vals - price_base
														
 
															                 df_drop_gap['price_abs_gap'] = df_drop_gap['price_gap'].abs()
														
 
															-                df_drop_gap = df_drop_gap.sort_values(['price_abs_gap', 'pca_abs_gap'], ascending=[True, True])
														
 
															-                df_match = df_drop_gap[(df_drop_gap['price_abs_gap'] <= 5.0) & (df_drop_gap['pca_abs_gap'] <= 10.0)].copy()
														
 
															+                df_drop_gap = df_drop_gap.sort_values(['price_abs_gap', 'pct_abs_gap'], ascending=[True, True])
														
 
															+                df_match = df_drop_gap[(df_drop_gap['pct_abs_gap'] <= pct_threshold) & (df_drop_gap['price_abs_gap'] <= 3.0)].copy()
														
 
															                 # 历史上出现的极近似的增长(下降)幅度后的降价场景
														
 
															                 if not df_match.empty:
														
@@ -390,24 +393,24 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
 
															                 (df_rise_nodes['baggage_weight'] == baggage_weight)
														
 
															             ]
														
 
															             # 升价前 增量阈值、当前阈值 的匹配
														
 
															-            if not df_rise_nodes_part.empty and pd.notna(price_change_amount):
														
 
															-                pca_base_1 = float(price_change_amount)
														
 
															-                pca_vals_1 = pd.to_numeric(df_rise_nodes_part['prev_rise_change_amount'], errors='coerce')
														
 
															+            if not df_rise_nodes_part.empty and pd.notna(price_change_percent):
														
 
															+                pct_base_1 = float(price_change_percent)
														
 
															+                pct_vals_1 = pd.to_numeric(df_rise_nodes_part['prev_rise_change_percent'], errors='coerce')
														
 
															                 df_rise_gap_1 = df_rise_nodes_part.loc[
														
 
															-                    pca_vals_1.notna(),
														
 
															+                    pct_vals_1.notna(),
														
 
															                     ['rise_days_to_departure', 'rise_hours_until_departure', 'rise_price_change_percent', 'rise_price_change_amount',
														
 
															                      'prev_rise_duration_hours', 'prev_rise_change_percent', 'prev_rise_change_amount', 'prev_rise_amount', 'relative_position']
														
 
															                 ].copy()
														
 
															-                df_rise_gap_1['pca_gap'] = (pca_vals_1.loc[pca_vals_1.notna()] - pca_base_1)
														
 
															-                df_rise_gap_1['pca_abs_gap'] = df_rise_gap_1['pca_gap'].abs()
														
 
															+                df_rise_gap_1['pct_gap'] = (pct_vals_1.loc[pct_vals_1.notna()] - pct_base_1)
														
 
															+                df_rise_gap_1['pct_abs_gap'] = df_rise_gap_1['pct_gap'].abs()
														
 
															                 price_base_1 = pd.to_numeric(price_amount, errors='coerce')
														
 
															                 rise_price_vals_1 = pd.to_numeric(df_rise_gap_1['prev_rise_amount'], errors='coerce')
														
 
															                 df_rise_gap_1['price_gap'] = rise_price_vals_1 - price_base_1
														
 
															                 df_rise_gap_1['price_abs_gap'] = df_rise_gap_1['price_gap'].abs()
														
 
															-                df_rise_gap_1 = df_rise_gap_1.sort_values(['price_abs_gap', 'pca_abs_gap'], ascending=[True, True])
														
 
															-                df_match_1 = df_rise_gap_1.loc[(df_rise_gap_1['price_abs_gap'] <= 5.0) & (df_rise_gap_1['pca_abs_gap'] <= 10.0)].copy()
														
 
															+                df_rise_gap_1 = df_rise_gap_1.sort_values(['price_abs_gap', 'pct_abs_gap'], ascending=[True, True])
														
 
															+                df_match_1 = df_rise_gap_1.loc[(df_rise_gap_1['pct_abs_gap'] <= pct_threshold_1) & (df_rise_gap_1['price_abs_gap'] <= 3.0)].copy()
														
 
															                 # 历史上出现的极近似的增长(下降)幅度后的升价场景
														
 
															                 if not df_match_1.empty:
														
@@ -443,7 +446,7 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
 
															                             else:
														
 
															                                 drop_prob = round(length_drop / (length_rise + length_drop), 2)
														
 
															                                 # 依旧保持之前的降价判定，概率修改
														
 
															-                                if drop_prob >= 0.4:
														
 
															+                                if drop_prob >= 0.6:
														
 
															                                     df_min_hours.loc[idx, 'simple_will_price_drop'] = 1
														
 
															                                     # df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = 'd1'
														
 
															                                     df_min_hours.loc[idx, 'flag_dist'] = 'd1'
														
--- a/descending_cabin_task.py
+++ b/descending_cabin_task.py
@@ -73,7 +73,11 @@ def _process_one_task(row):
 
															     drop_price_change_upper = float(task.get("drop_price_change_upper") or 0)   # 最小的降价幅度
														
 
															     max_threshold = round(drop_price_change_upper * 1.0)
														
 
															-    if abs(max_threshold) < 10:
														
 
															+    if abs(max_threshold) < 10:  # 丢弃小于10人民币的降价幅度
														
 
															+        return None
														
 
															+    
														
 
															+    drop_price_sample_size = int(task.get("drop_price_sample_size", "0"))
														
 
															+    if drop_price_sample_size < 2:  # 丢弃历史降价样本数过少(小于2)的
														
 
															         return None
														
 
															     end_segments = []