|
@@ -151,18 +151,45 @@ def query_flight_range_status(db, table_name, city_pair, flight_numbers, from_da
|
|
|
df = df.drop(columns=['_id'])
|
|
df = df.drop(columns=['_id'])
|
|
|
|
|
|
|
|
if 'from_time' in df.columns and 'from_date' in df.columns:
|
|
if 'from_time' in df.columns and 'from_date' in df.columns:
|
|
|
|
|
+ key_cols = ['citypair', 'flight_numbers', 'from_date']
|
|
|
|
|
+ group_cols = [col for col in key_cols if col in df.columns]
|
|
|
|
|
+
|
|
|
from_time_raw = df['from_time']
|
|
from_time_raw = df['from_time']
|
|
|
from_time_str = from_time_raw.fillna('').astype(str).str.strip()
|
|
from_time_str = from_time_raw.fillna('').astype(str).str.strip()
|
|
|
- non_empty = from_time_str[from_time_str.ne('')] # 找到原始 from_time 非空的记录
|
|
|
|
|
|
|
+ extracted_time = from_time_str.str.extract(r'(\d{2}:\d{2}:\d{2})$')[0]
|
|
|
|
|
+ valid_time_mask = from_time_str.ne('') & extracted_time.notna()
|
|
|
|
|
|
|
|
- extracted_time = non_empty.str.extract(r'(\d{2}:\d{2}:\d{2})$')[0].dropna()
|
|
|
|
|
- if not extracted_time.empty:
|
|
|
|
|
- more_time = extracted_time.value_counts().idxmax() # 按众数分配给其它行 构造from_time
|
|
|
|
|
|
|
+ if valid_time_mask.any():
|
|
|
missing_mask = from_time_raw.isna() | from_time_str.eq('')
|
|
missing_mask = from_time_raw.isna() | from_time_str.eq('')
|
|
|
- if missing_mask.any():
|
|
|
|
|
- df.loc[missing_mask, 'from_time'] = df.loc[missing_mask, 'from_date'].astype(str).str.strip() + ' ' + more_time
|
|
|
|
|
|
|
+
|
|
|
|
|
+ if group_cols:
|
|
|
|
|
+ mode_by_group = (
|
|
|
|
|
+ df.loc[valid_time_mask, group_cols]
|
|
|
|
|
+ .assign(_mode_time=extracted_time.loc[valid_time_mask].values)
|
|
|
|
|
+ .groupby(group_cols, dropna=False)['_mode_time']
|
|
|
|
|
+ .agg(lambda s: s.value_counts().idxmax())
|
|
|
|
|
+ .reset_index()
|
|
|
|
|
+ )
|
|
|
|
|
+ df = df.merge(mode_by_group, on=group_cols, how='left')
|
|
|
|
|
+ fill_mask = missing_mask & df['_mode_time'].notna()
|
|
|
|
|
+ if fill_mask.any():
|
|
|
|
|
+ df.loc[fill_mask, 'from_time'] = (
|
|
|
|
|
+ df.loc[fill_mask, 'from_date'].astype(str).str.strip() + ' ' + df.loc[fill_mask, '_mode_time']
|
|
|
|
|
+ )
|
|
|
|
|
+ df = df.drop(columns=['_mode_time'])
|
|
|
|
|
+
|
|
|
|
|
+ remaining_missing_mask = df['from_time'].isna() | df['from_time'].astype(str).str.strip().eq('')
|
|
|
|
|
+ if remaining_missing_mask.any():
|
|
|
|
|
+ more_time = extracted_time.loc[valid_time_mask].value_counts().idxmax()
|
|
|
|
|
+ df.loc[remaining_missing_mask, 'from_time'] = (
|
|
|
|
|
+ df.loc[remaining_missing_mask, 'from_date'].astype(str).str.strip() + ' ' + more_time
|
|
|
|
|
+ )
|
|
|
|
|
+ pass
|
|
|
|
|
+ else:
|
|
|
|
|
+ more_time = extracted_time.loc[valid_time_mask].value_counts().idxmax()
|
|
|
|
|
+ if missing_mask.any():
|
|
|
|
|
+ df.loc[missing_mask, 'from_time'] = df.loc[missing_mask, 'from_date'].astype(str).str.strip() + ' ' + more_time
|
|
|
else:
|
|
else:
|
|
|
- # 无法得到起飞日期的抛弃
|
|
|
|
|
print(f"⚠️ 无法提取有效起飞时间,抛弃该条记录")
|
|
print(f"⚠️ 无法提取有效起飞时间,抛弃该条记录")
|
|
|
return pd.DataFrame()
|
|
return pd.DataFrame()
|
|
|
|
|
|