2 недель назад · 2d7d39fe20
--- a/data_preprocess.py
+++ b/data_preprocess.py
@@ -1173,7 +1173,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
 
															     df_min_hours = df_min_hours[(df_min_hours['seats_remaining'] >= 5)].reset_index(drop=True)
														
 
															     df_min_hours['simple_will_price_drop'] = 0   
														
 
															-    df_min_hours['simple_drop_in_hours'] = 0
														
 
															+    # df_min_hours['simple_drop_in_hours'] = 0
														
 
															     df_min_hours['simple_drop_in_hours_prob'] = 0.0
														
 
															     df_min_hours['simple_drop_in_hours_dist'] = ''   # 空串 表示未知
														
 
															     df_min_hours['flag_dist'] = ''
														
@@ -1272,6 +1272,12 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
 
															                         df_match_chk = df_match_chk.loc[drop_hud_vals.notna()].copy()
														
 
															                         df_match_chk = df_match_chk.loc[(float(hud_base) - drop_hud_vals.loc[drop_hud_vals.notna()]) >= -24].copy()
														
 
															+                        dur_num_chk = pd.to_numeric(df_match_chk['high_price_duration_hours'], errors='coerce')
														
 
															+                        dur_delta = dur_num_chk - float(dur_base)
														
 
															+                        df_match_chk = df_match_chk.assign(dur_delta=dur_delta)
														
 
															+                        df_match_chk = df_match_chk.loc[df_match_chk['dur_delta'].notna()].copy()
														
 
															+                        df_match_chk = df_match_chk.loc[df_match_chk['dur_delta'].abs() <= 48].copy()
														
 
															+                        
														
 
															                         # seats_vals = pd.to_numeric(df_match_chk['high_price_seats_remaining_change_amount'], errors='coerce')
														
 
															                         # df_match_chk = df_match_chk.loc[seats_vals.notna()].copy()
														
 
															                         # df_match_chk = df_match_chk.loc[seats_vals.loc[seats_vals.notna()] == float(seats_base)].copy()
														
@@ -1290,23 +1296,26 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
 
															                             # if len(drop_mode_values) > 0:
														
 
															                             #     df_min_hours.loc[idx, 'drop_price_change_mode'] = round(float(drop_mode_values[0]), 2)
														
 
															-                            remaining_hours = (
														
 
															-                                pd.to_numeric(df_match_chk['high_price_duration_hours'], errors='coerce') - float(dur_base)
														
 
															-                            ).clip(lower=0)
														
 
															-                            remaining_hours = remaining_hours.round().astype(int)
														
 
															+                            # remaining_hours = (
														
 
															+                            #     pd.to_numeric(df_match_chk['high_price_duration_hours'], errors='coerce') - float(dur_base)
														
 
															+                            # ).clip(lower=0)
														
 
															+                            # remaining_hours = remaining_hours.round().astype(int)
														
 
															-                            counts = remaining_hours.value_counts().sort_index()
														
 
															-                            probs = (counts / counts.sum()).round(4)
														
 
															+                            # counts = remaining_hours.value_counts().sort_index()
														
 
															+                            # probs = (counts / counts.sum()).round(4)
														
 
															-                            top_hours = int(probs.idxmax())
														
 
															-                            top_prob = float(probs.max())
														
 
															+                            # top_hours = int(probs.idxmax())
														
 
															+                            # top_prob = float(probs.max())
														
 
															-                            dist_items = list(zip(probs.index.tolist(), probs.tolist()))
														
 
															-                            dist_items = dist_items[:10]
														
 
															-                            dist_str = ' '.join([f"{int(h)}h->{float(p)}" for h, p in dist_items])
														
 
															+                            # dist_items = list(zip(probs.index.tolist(), probs.tolist()))
														
 
															+                            # dist_items = dist_items[:10]
														
 
															+                            # dist_str = ' '.join([f"{int(h)}h->{float(p)}" for h, p in dist_items])
														
 
															+
														
 
															+                            dur_delta_list = df_match_chk['dur_delta'].tolist()
														
 
															+                            dist_str = "'" + ' '.join([f"{ddl:g}" for ddl in dur_delta_list])
														
 
															                             df_min_hours.loc[idx, 'simple_will_price_drop'] = 1
														
 
															-                            df_min_hours.loc[idx, 'simple_drop_in_hours'] = top_hours
														
 
															+                            # df_min_hours.loc[idx, 'simple_drop_in_hours'] = top_hours
														
 
															                             df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 1
														
 
															                             df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = dist_str
														
 
															                             df_min_hours.loc[idx, 'flag_dist'] = 'd0'
														
@@ -1396,7 +1405,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
 
															                             # 可以明确的判定不降价
														
 
															                             if length_drop == 0:
														
 
															                                 df_min_hours.loc[idx, 'simple_will_price_drop'] = 0
														
 
															-                                df_min_hours.loc[idx, 'simple_drop_in_hours'] = 0
														
 
															+                                # df_min_hours.loc[idx, 'simple_drop_in_hours'] = 0
														
 
															                                 df_min_hours.loc[idx, 'simple_drop_in_hours_prob'] = 0.0
														
 
															                                 # df_min_hours.loc[idx, 'simple_drop_in_hours_dist'] = 'r0'
														
 
															                                 df_min_hours.loc[idx, 'flag_dist'] = 'r0'
														
@@ -1460,7 +1469,7 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
 
															                   'adult_total_price', 'days_to_departure', 'hours_until_departure', 'price_change_percent', 'price_change_amount', 'price_duration_hours', 
														
 
															                   'update_hour', 'crawl_date',
														
 
															                   'valid_begin_hour', 'valid_end_hour',
														
 
															-                  'simple_will_price_drop', 'simple_drop_in_hours', 'simple_drop_in_hours_prob', 'simple_drop_in_hours_dist',
														
 
															+                  'simple_will_price_drop', 'simple_drop_in_hours_prob', 'simple_drop_in_hours_dist',
														
 
															                   'flag_dist',
														
 
															                   'drop_price_change_upper', 'drop_price_change_lower', 'drop_price_sample_size',
														
 
															                   'rise_price_change_upper', 'rise_price_change_lower', 'rise_price_sample_size',
														
@@ -1473,7 +1482,6 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
 
															     df_predict = df_min_hours[order_cols]
														
 
															     df_predict = df_predict.rename(columns={
														
 
															             'simple_will_price_drop': 'will_price_drop',
														
 
															-            'simple_drop_in_hours': 'drop_in_hours',
														
 
															             'simple_drop_in_hours_prob': 'drop_in_hours_prob',
														
 
															             'simple_drop_in_hours_dist': 'drop_in_hours_dist',
														
 
															         }
														
--- a/main_tr_0.py
+++ b/main_tr_0.py
@@ -50,7 +50,7 @@ def start_train():
 
															     # date_end = datetime.today().strftime("%Y-%m-%d")
														
 
															     date_end = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
														
 
															     # date_begin = (datetime.today() - timedelta(days=32)).strftime("%Y-%m-%d")
														
 
															-    date_begin = "2026-03-01"   # 2026-01-01  2026-04-17
														
 
															+    date_begin = "2026-03-01"   # 2026-01-01  2026-04-23
														
 
															     print(f"训练时间范围: {date_begin} 到 {date_end}")