|
@@ -236,7 +236,7 @@ def verify_process_2(min_batch_time_str, max_batch_time_str):
|
|
|
for gid, df_gid in df_keep_all.groupby("gid", sort=False):
|
|
for gid, df_gid in df_keep_all.groupby("gid", sort=False):
|
|
|
city_pair = df_gid["citypair"].iloc[0]
|
|
city_pair = df_gid["citypair"].iloc[0]
|
|
|
flight_numbers = df_gid["flight_numbers"].iloc[0]
|
|
flight_numbers = df_gid["flight_numbers"].iloc[0]
|
|
|
- baggage_weight = df_gid["baggage_weight"].iloc[0]
|
|
|
|
|
|
|
+ baggage_weight = int(df_gid["baggage_weight"].iloc[0])
|
|
|
from_date = df_gid["from_date"].iloc[0]
|
|
from_date = df_gid["from_date"].iloc[0]
|
|
|
into_update_hour = df_gid["into_update_hour"].iloc[0]
|
|
into_update_hour = df_gid["into_update_hour"].iloc[0]
|
|
|
valid_end_hour = df_gid["valid_end_hour"].iloc[0]
|
|
valid_end_hour = df_gid["valid_end_hour"].iloc[0]
|
|
@@ -271,12 +271,59 @@ def verify_process_2(min_batch_time_str, max_batch_time_str):
|
|
|
|
|
|
|
|
df_g1 = df_gid.copy()
|
|
df_g1 = df_gid.copy()
|
|
|
df_g2 = df_query.copy()
|
|
df_g2 = df_query.copy()
|
|
|
|
|
+
|
|
|
|
|
+ df_g1["_batch_dt"] = pd.to_datetime(
|
|
|
|
|
+ df_g1.get("batch_time_str"), format="%Y%m%d%H%M", errors="coerce"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ last_price = float(df_g1.iloc[-1]["price_total"])
|
|
|
|
|
+ df_last_price = df_g1[df_g1["price_total"] == last_price]
|
|
|
|
|
+ base_row = df_last_price.iloc[0]
|
|
|
|
|
+ # base_pos = int(df_last_price.index[0])
|
|
|
|
|
+ base_dt = base_row["_batch_dt"]
|
|
|
|
|
+ base_price = float(base_row["price_total"])
|
|
|
|
|
+
|
|
|
|
|
+ drop_create_time = pd.NA
|
|
|
|
|
+ drop_price = pd.NA
|
|
|
|
|
+ price_diff = 0.0
|
|
|
|
|
+ time_diff_hours = 0.0
|
|
|
|
|
|
|
|
|
|
+ if not df_g2.empty:
|
|
|
|
|
+ df_g2["create_dt"] = pd.to_datetime(df_g2.get("create_time"), errors="coerce")
|
|
|
|
|
+ mask_drop = df_g2["price_total"] < base_price
|
|
|
|
|
+ if mask_drop.any():
|
|
|
|
|
+ drop_row = df_g2.loc[mask_drop].iloc[0]
|
|
|
|
|
+ drop_create_time = drop_row.get("create_time")
|
|
|
|
|
+ drop_price = float(drop_row["price_total"])
|
|
|
|
|
+ price_diff = round(base_price - drop_price, 2)
|
|
|
|
|
+ time_diff_hours = round(
|
|
|
|
|
+ float((drop_row["create_dt"] - base_dt) / pd.Timedelta(hours=1)),
|
|
|
|
|
+ 2,
|
|
|
|
|
+ )
|
|
|
|
|
+ flag = 1 # 发生降价标记
|
|
|
|
|
|
|
|
- pass
|
|
|
|
|
|
|
+ base_row_cp = base_row.copy()
|
|
|
|
|
+ base_row_cp["end_batch_dt"] = batch_dt
|
|
|
|
|
+ base_row_cp["drop_create_time"] = drop_create_time
|
|
|
|
|
+ base_row_cp["drop_price"] = drop_price
|
|
|
|
|
+ base_row_cp["price_diff"] = price_diff
|
|
|
|
|
+ base_row_cp["time_diff_hours"] = time_diff_hours
|
|
|
|
|
+ base_row_cp["flag"] = flag
|
|
|
|
|
+ list_base_row.append(base_row_cp)
|
|
|
|
|
+
|
|
|
|
|
+ del df_g1
|
|
|
|
|
+ del df_g2
|
|
|
|
|
+ del df_last_price
|
|
|
|
|
+ del df_query
|
|
|
|
|
+
|
|
|
|
|
+ client.close()
|
|
|
|
|
|
|
|
- pass
|
|
|
|
|
|
|
+ df_base = pd.DataFrame(list_base_row)
|
|
|
|
|
+ df_base.to_csv(output_path, header=True, index=False, encoding="utf-8-sig")
|
|
|
|
|
+ print(f"输出: {output_path}")
|
|
|
|
|
+ return
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
|
- verify_process("202604071700", "202604081400")
|
|
|
|
|
|
|
+ # verify_process("202604091700", "202604150800")
|
|
|
|
|
+ verify_process_2("202604091700", "202604150800")
|
|
|
pass
|
|
pass
|