Ver Fonte

修正预测与验证部分细节

node04 há 1 semana atrás
pai
commit
9e0e80464c
2 ficheiros alterados com 18 adições e 12 exclusões
  1. 14 8
      data_preprocess.py
  2. 4 4
      result_validate_0.py

+ 14 - 8
data_preprocess.py

@@ -1136,7 +1136,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
 
                             df_min_hours.loc[idx, 'simple_will_price_drop'] = 1
                             df_min_hours.loc[idx, 'simple_drop_in_hours'] = top_hours
-                            df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = top_prob
+                            df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 1
                             df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = dist_str
 
                             length_drop = df_match_chk.shape[0]
@@ -1300,19 +1300,25 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
                         # 持续时间、距离起飞时间、座位变化都匹配上
                         if not df_match_chk_1.empty:
                             length_keep = df_match_chk_1.shape[0]
-                            if length_keep > length_drop:      # 不降价的多数压倒降价的少数
-                    
+                            # 可以明确的判定不降价
+                            if length_drop == 0:
                                 df_min_hours.loc[idx, 'simple_will_price_drop'] = 0
                                 df_min_hours.loc[idx, 'simple_drop_in_hours'] = 0
                                 df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 0.0
                                 df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = 'k0'
-                            
-                            elif length_keep == length_drop:   # 不降价与降价相同, 取0.5概率
-
-                                df_min_hours.loc[idx, 'simple_will_price_drop'] = 1
+                            # 依旧保持之前的降价判定,只是概率修改
+                            else:
+                                drop_prob = round(length_drop / (length_keep + length_drop), 2)
                                 df_min_hours.loc[idx, 'simple_drop_in_hours'] = 0
-                                df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 0.5
+                                df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = drop_prob
                                 df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = 'k1'
+
+                            # elif length_keep == length_drop:   # 不降价与降价相同, 取0.5概率
+
+                            #     df_min_hours.loc[idx, 'simple_will_price_drop'] = 1
+                            #     df_min_hours.loc[idx, 'simple_drop_in_hours'] = 0
+                            #     df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 0.5
+                            #     df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = 'k1'
                         
                                 # df_match_1['hours_delta'] = hours_until_departure - df_match_1['keep_hours_until_departure']
                                 # df_match_1['modify_keep_price_duration_hours'] = df_match_1['keep_price_duration_hours'] - df_match_1['hours_delta']

+ 4 - 4
result_validate_0.py

@@ -176,8 +176,8 @@ def validate_process_auto(node, interval_hours):
         print("没有找到有效的时间戳文件")
         return
 
-    # 目标验证文件(当前整点减50小时)
-    target_time = hourly_time - datetime.timedelta(hours=50)                          
+    # 目标验证文件(当前整点减56小时: 48 + (12 - 4) = 56)
+    target_time = hourly_time - datetime.timedelta(hours=56)                          
     target_time_str = target_time.strftime("%Y%m%d%H%M")
     print(f"目标验证时间: {target_time_str}")
 
@@ -225,9 +225,9 @@ if __name__ == "__main__":
 
     # 0 手动验证
     if interval_hours == 0:
-        node, pred_time_str = "node0127", "202601281700"
+        node, pred_time_str = "node0127", "202601292300"
         validate_process(node, interval_hours, pred_time_str)
     # 1 自动验证
     else:
-        node = "node0122"
+        node = "node0127"
         validate_process_auto(node, interval_hours)