3 viikkoa sitten · 0e8e16c3e5
--- a/data_process.py
+++ b/data_process.py
@@ -29,7 +29,7 @@ def preprocess_data_simple(df_input, is_train=False, hourly_time=None):
 
				     ).reset_index(drop=True)
			
 
				 
			
 
				     df_input = df_input[df_input['hours_until_departure'] <= 480]
			
 
				-    df_input = df_input[df_input['baggage_weight'] == 20]   # 先保留20公斤行李的
			
 
				+    df_input = df_input[df_input['baggage_weight'] == 0]   # 先保留0公斤行李的
			
 
				 
			
 
				     # 在hours_until_departure 的末尾 保留到当前时刻的数据
			
 
				     if not is_train:
			
@@ -161,8 +161,8 @@ def preprocess_data_simple(df_input, is_train=False, hourly_time=None):
 
				         
			
 
				         # 制作历史包络线
			
 
				         envelope_group = ['citypair', 'flight_numbers', 'from_date', 'baggage_weight']
			
 
				-        idx_peak = df_input.groupby(envelope_group)['price_total'].idxmax()
			
 
				-        df_envelope = df_input.loc[idx_peak, envelope_group + [
			
 
				+        idx_peak = df_target.groupby(envelope_group)['price_total'].idxmax()
			
 
				+        df_envelope = df_target.loc[idx_peak, envelope_group + [
			
 
				             'from_time', 'price_total', 'hours_until_departure', 'days_to_departure', 'update_hour', 'update_week',
			
 
				         ]].rename(columns={
			
 
				             'price_total': 'peak_price',
			
@@ -288,13 +288,16 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
 
				     df_min_hours['rise_price_sample_size'] = 0
			
 
				 
			
 
				     # 这个阈值取多少?
			
 
				-    # pct_threshold = 0.01
			
 
				-    # pct_threshold_1 = 0.01
			
 
				+    pct_threshold = 0.01
			
 
				+    pct_threshold_1 = 0.01
			
 
				 
			
 
				     for idx, row in df_min_hours.iterrows(): 
			
 
				         city_pair = row['citypair']
			
 
				         flight_numbers = row['flight_numbers']
			
 
				         baggage_weight = row['baggage_weight']
			
 
				+        from_date = row['from_date']
			
 
				+        if flight_numbers == "UO235" and from_date == "2026-04-25":   # 调试时用
			
 
				+            pass
			
 
				         days_to_departure = row['days_to_departure']
			
 
				         hours_until_departure = row['hours_until_departure']
			
 
				         price_change_percent = row['price_change_percent']
			
@@ -314,26 +317,26 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
 
				                 (df_drop_nodes['baggage_weight'] == baggage_weight)
			
 
				             ]
			
 
				             # 降价前 增量阈值、当前阈值 的匹配
			
 
				-            if not df_drop_nodes_part.empty and pd.notna(price_change_amount):   
			
 
				+            if not df_drop_nodes_part.empty and pd.notna(price_change_percent):   
			
 
				                 
			
 
				-                pca_base = float(price_change_amount)
			
 
				-                pca_vals = pd.to_numeric(df_drop_nodes_part['high_price_change_amount'], errors='coerce')
			
 
				+                pct_base = float(price_change_percent)
			
 
				+                pct_vals = pd.to_numeric(df_drop_nodes_part['high_price_change_percent'], errors='coerce')
			
 
				                 df_drop_gap = df_drop_nodes_part.loc[
			
 
				-                    pca_vals.notna(),
			
 
				+                    pct_vals.notna(),
			
 
				                     ['drop_days_to_departure', 'drop_hours_until_departure', 'drop_price_change_percent', 'drop_price_change_amount', 
			
 
				                      'high_price_duration_hours', 'high_price_change_percent', 'high_price_change_amount', 'high_price_amount', 'relative_position'
			
 
				                      ]
			
 
				                 ].copy()
			
 
				-                df_drop_gap['pca_gap'] = (pca_vals.loc[pca_vals.notna()] - pca_base)
			
 
				-                df_drop_gap['pca_abs_gap'] = df_drop_gap['pca_gap'].abs()
			
 
				+                df_drop_gap['pct_gap'] = (pct_vals.loc[pct_vals.notna()] - pct_base)
			
 
				+                df_drop_gap['pct_abs_gap'] = df_drop_gap['pct_gap'].abs()
			
 
				 
			
 
				                 price_base = pd.to_numeric(price_amount, errors='coerce')
			
 
				                 high_price_vals = pd.to_numeric(df_drop_gap['high_price_amount'], errors='coerce')
			
 
				                 df_drop_gap['price_gap'] = high_price_vals - price_base
			
 
				                 df_drop_gap['price_abs_gap'] = df_drop_gap['price_gap'].abs()
			
 
				 
			
 
				-                df_drop_gap = df_drop_gap.sort_values(['price_abs_gap', 'pca_abs_gap'], ascending=[True, True])
			
 
				-                df_match = df_drop_gap[(df_drop_gap['price_abs_gap'] <= 5.0) & (df_drop_gap['pca_abs_gap'] <= 10.0)].copy()
			
 
				+                df_drop_gap = df_drop_gap.sort_values(['price_abs_gap', 'pct_abs_gap'], ascending=[True, True])
			
 
				+                df_match = df_drop_gap[(df_drop_gap['pct_abs_gap'] <= pct_threshold) & (df_drop_gap['price_abs_gap'] <= 3.0)].copy()
			
 
				 
			
 
				                 # 历史上出现的极近似的增长(下降)幅度后的降价场景
			
 
				                 if not df_match.empty:
			
@@ -390,24 +393,24 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
 
				                 (df_rise_nodes['baggage_weight'] == baggage_weight)
			
 
				             ]
			
 
				             # 升价前 增量阈值、当前阈值 的匹配
			
 
				-            if not df_rise_nodes_part.empty and pd.notna(price_change_amount):
			
 
				-                pca_base_1 = float(price_change_amount)
			
 
				-                pca_vals_1 = pd.to_numeric(df_rise_nodes_part['prev_rise_change_amount'], errors='coerce')
			
 
				+            if not df_rise_nodes_part.empty and pd.notna(price_change_percent):
			
 
				+                pct_base_1 = float(price_change_percent)
			
 
				+                pct_vals_1 = pd.to_numeric(df_rise_nodes_part['prev_rise_change_percent'], errors='coerce')
			
 
				                 df_rise_gap_1 = df_rise_nodes_part.loc[
			
 
				-                    pca_vals_1.notna(),
			
 
				+                    pct_vals_1.notna(),
			
 
				                     ['rise_days_to_departure', 'rise_hours_until_departure', 'rise_price_change_percent', 'rise_price_change_amount',
			
 
				                      'prev_rise_duration_hours', 'prev_rise_change_percent', 'prev_rise_change_amount', 'prev_rise_amount', 'relative_position']
			
 
				                 ].copy()
			
 
				-                df_rise_gap_1['pca_gap'] = (pca_vals_1.loc[pca_vals_1.notna()] - pca_base_1)
			
 
				-                df_rise_gap_1['pca_abs_gap'] = df_rise_gap_1['pca_gap'].abs()
			
 
				+                df_rise_gap_1['pct_gap'] = (pct_vals_1.loc[pct_vals_1.notna()] - pct_base_1)
			
 
				+                df_rise_gap_1['pct_abs_gap'] = df_rise_gap_1['pct_gap'].abs()
			
 
				 
			
 
				                 price_base_1 = pd.to_numeric(price_amount, errors='coerce')
			
 
				                 rise_price_vals_1 = pd.to_numeric(df_rise_gap_1['prev_rise_amount'], errors='coerce')
			
 
				                 df_rise_gap_1['price_gap'] = rise_price_vals_1 - price_base_1
			
 
				                 df_rise_gap_1['price_abs_gap'] = df_rise_gap_1['price_gap'].abs()
			
 
				 
			
 
				-                df_rise_gap_1 = df_rise_gap_1.sort_values(['price_abs_gap', 'pca_abs_gap'], ascending=[True, True])
			
 
				-                df_match_1 = df_rise_gap_1.loc[(df_rise_gap_1['price_abs_gap'] <= 5.0) & (df_rise_gap_1['pca_abs_gap'] <= 10.0)].copy()
			
 
				+                df_rise_gap_1 = df_rise_gap_1.sort_values(['price_abs_gap', 'pct_abs_gap'], ascending=[True, True])
			
 
				+                df_match_1 = df_rise_gap_1.loc[(df_rise_gap_1['pct_abs_gap'] <= pct_threshold_1) & (df_rise_gap_1['price_abs_gap'] <= 3.0)].copy()
			
 
				 
			
 
				                 # 历史上出现的极近似的增长(下降)幅度后的升价场景
			
 
				                 if not df_match_1.empty:
			
@@ -443,7 +446,7 @@ def predict_data_simple(df_input, city_pair, object_dir, predict_dir=".", pred_t
 
				                             else:
			
 
				                                 drop_prob = round(length_drop / (length_rise + length_drop), 2)
			
 
				                                 # 依旧保持之前的降价判定，概率修改
			
 
				-                                if drop_prob >= 0.4:
			
 
				+                                if drop_prob >= 0.6:
			
 
				                                     df_min_hours.loc[idx, 'simple_will_price_drop'] = 1
			
 
				                                     # df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = 'd1'
			
 
				                                     df_min_hours.loc[idx, 'flag_dist'] = 'd1'
			
--- a/descending_cabin_task.py
+++ b/descending_cabin_task.py
@@ -73,7 +73,11 @@ def _process_one_task(row):
 
				     drop_price_change_upper = float(task.get("drop_price_change_upper") or 0)   # 最小的降价幅度
			
 
				     max_threshold = round(drop_price_change_upper * 1.0)
			
 
				 
			
 
				-    if abs(max_threshold) < 10:
			
 
				+    if abs(max_threshold) < 10:  # 丢弃小于10人民币的降价幅度
			
 
				+        return None
			
 
				+    
			
 
				+    drop_price_sample_size = int(task.get("drop_price_sample_size", "0"))
			
 
				+    if drop_price_sample_size < 2:  # 丢弃历史降价样本数过少(小于2)的
			
 
				         return None
			
 
				 
			
 
				     end_segments = []