Ver Fonte

数据加载规范一下

node04 há 1 semana atrás
pai
commit
54108d77c3
1 ficheiros alterados com 9 adições e 8 exclusões
  1. 9 8
      data_loader.py

+ 9 - 8
data_loader.py

@@ -81,8 +81,8 @@ def query_flight_range_status(db, table_name, from_city, to_city, dep_date_begin
                 "search_dep_time": {
                     "$gte": dep_date_begin,
                     "$lte": dep_date_end,
-                }
-                # "segments.baggage": baggage_str
+                },
+                "segments.baggage": {"$in": ["1-20", "1-30"]}  # 只查20公斤和30公斤行李的
             }
             # 动态添加航班号条件
             for i, flight_num in enumerate(flight_nums):
@@ -663,8 +663,8 @@ def load_train_data(db, flight_route_list, table_name, date_begin, date_end, out
                 if list_12:
                     df_c12 = pd.concat(list_12, ignore_index=True)
                     print(f"✅ dep_date:{dep_date}, 所有 baggage 数据合并完成,总形状: {df_c12.shape}")
-                    plot_c12_trend(df_c12, output_dir)
-                    print(f"✅ dep_date:{dep_date}, 所有 baggage 数据绘图完成")
+                    # plot_c12_trend(df_c12, output_dir)
+                    # print(f"✅ dep_date:{dep_date}, 所有 baggage 数据绘图完成")
                 else:
                     df_c12 = pd.DataFrame()
                     print(f"⚠️ dep_date:{dep_date}, 所有 baggage 数据合并为空")
@@ -690,11 +690,12 @@ def load_train_data(db, flight_route_list, table_name, date_begin, date_end, out
 
             del df1
             del df2
+            
+            # output_path = os.path.join(output_dir, f"./{route}_{timestamp_str}.csv")
+            # df_mid.to_csv(output_path, index=False, encoding="utf-8-sig", mode="a", header=not os.path.exists(output_path))
+            
+            del df_mid
             gc.collect()
-
-            output_path = os.path.join(output_dir, f"./{route}_{timestamp_str}.csv")
-            df_mid.to_csv(output_path, index=False, encoding="utf-8-sig", mode="a", header=not os.path.exists(output_path))
-
             print(f"结束处理航班号: {flight_nums}")
 
         print(f"结束处理航线: {from_city}-{to_city}")