import holidays import pandas as pd CLEAN_VJ_HOT_NEAR_INFO_TAB = "clean_flights_vj_hot_0_7_info_tab" CLEAN_VJ_HOT_FAR_INFO_TAB = "clean_flights_vj_hot_7_30_info_tab" CLEAN_VJ_NOTHOT_NEAR_INFO_TAB = "clean_flights_vj_nothot_0_7_info_tab" CLEAN_VJ_NOTHOT_FAR_INFO_TAB = "clean_flights_vj_nothot_7_30_info_tab" mongodb_config = { "host": "192.168.20.218", "port": 27017, "db": "flights_datas_db", "user": "", "pwd": "" } # 城市码-国家码的映射 city_to_country = { "CAN": "CN", # 广州,中国 "DPS": "ID", # 巴厘岛,印度尼西亚 "HAN": "VN", # 河内,越南 "SGN": "VN", # 胡志明(西贡),越南 "CTU": "CN", # 成都,中国 "DAD": "VN", # 岘港,越南 "SEL": "KR", # 首尔,韩国 "DEL": "IN", # 德里,印度 "UIH": "VN", # 归仁,越南 "HKG": "HK", # 香港,中国 "PQC": "VN", # 富国岛,越南 "KUL": "MY", # 吉隆坡,马来西亚 "NGO": "JP", # 名古屋,日本 "NHA": "VN", # 芽庄,越南 "PUS": "KR", # 釜山,韩国 "SHA": "CN", # 上海,中国 "SIN": "SG", # 新加坡,新加坡 "TPE": "TW", # 台北,中国台湾 "TYO": "JP", # 东京,日本 "BKK": "TH", # 曼谷,泰国 "BLR": "IN", # 班加罗尔,印度 "FUK": "JP", # 福冈,日本 "BMV": "VN", # 邦美蜀,越南 "BNE": "AU", # 布里斯班,澳大利亚 "BOM": "IN", # 孟买,印度 "DLI": "VN", # 大叻,越南 "OSA": "JP", # 大阪,日本 "RMQ": "TW", # 台中,中国台湾 "HKT": "TH", # 普吉岛,泰国 "HPH": "VN", # 海防,越南 "KHH": "TW", # 高雄,中国台湾 "MEL": "AU", # 墨尔本,澳大利亚 "MNL": "PH", # 马尼拉,菲律宾 "SYD": "AU", # 悉尼,澳大利亚 "REP": "KH", # 暹粒,柬埔寨 "VTE": "LA", # 万象,老挝 "HYD": "IN", # 海得拉巴,印度 "AMD": "IN", # 艾哈迈达巴德,印度 } # 生成各个国家(地区)的节假日 def build_country_holidays(city_to_country): countries = sorted(set(city_to_country.values())) start_date = pd.Timestamp('2025-11-01') end_date = pd.Timestamp('2026-12-31') country_holidays = {} for country in countries: try: hdays = holidays.country_holidays( country, years=[2025, 2026] ) # 转成 set[date],方便高速查询 country_holidays[country] = { d for d in hdays if start_date.date() <= d <= end_date.date() } except Exception: # 个别国家 holidays 库可能不支持 country_holidays[country] = set() return country_holidays # 热门的航线 vj_flight_route_list_hot = [ "CAN-DPS", "CAN-HAN", "CAN-SGN", "CTU-HAN", "CTU-SGN", "DAD-SEL", "DEL-HAN", "DPS-CAN", "HAN-CAN", "HAN-DAD", "HAN-SHA", "HAN-UIH", "HKG-PQC", "KUL-DAD", "NGO-HAN", "NHA-HAN", "NHA-SEL", "PQC-HKG", "PUS-NHA", "SEL-DAD", "SEL-SGN", "SGN-CAN", "SGN-DPS", "SGN-NGO", "SGN-NHA", "SGN-PQC", "SGN-SEL", "SGN-SHA", "SHA-HAN", "SHA-SGN", "SIN-DAD", "TPE-HAN", "TPE-PQC", "TPE-SGN", "TYO-HAN", "TYO-SGN" ] # 冷门的航线 vj_flight_route_list_nothot = [ "BKK-SGN", "BKK-TPE", "BLR-FUK", "BMV-HAN", "BNE-SGN", "CAN-PQC", "CTU-BNE", "CTU-DPS", "CTU-PQC", "DAD-DEL", "DAD-DLI", "DAD-HAN", "DLI-HAN", "DPS-BOM", "DPS-HAN", "DPS-SGN", "DPS-SHA", "FUK-DPS", "HAN-CTU", "HAN-DEL", "HAN-DLI", "HAN-KUL", "HAN-NGO", "HAN-NHA", "HAN-OSA", "HAN-PQC", "HAN-RMQ", "HAN-SGN", "HAN-TPE", "HAN-TYO", "HKG-SGN", "HKT-SGN", "HPH-NHA", "KHH-BNE", "KUL-HAN", "MEL-HKG", "MEL-HKT", "MEL-SGN", "MNL-SGN", "NGO-SYD", "OSA-REP", "OSA-SGN", "PQC-DAD", "PQC-SEL", "PQC-SGN", "SEL-NHA", "SEL-VTE", "SGN-CTU", "SGN-DAD", "SGN-HAN", "SGN-HKG", "SGN-HPH", "SGN-HYD", "SGN-MNL", "SGN-OSA", "SGN-SYD", "SGN-TPE", "SGN-TYO", "SHA-BKK", "SHA-BLR", "SHA-DPS", "SHA-PQC", "SIN-HAN", "SIN-PQC", "SIN-SGN", "SYD-SGN", "TYO-AMD", "UIH-SGN" ] # 所有航线 vj_flight_route_list = vj_flight_route_list_hot + vj_flight_route_list_nothot if __name__ == '__main__': # from collections import Counter # # 检查重复项 # # 统计每个航线出现的次数 # route_counter = Counter(vj_flight_route_list) # # 找出重复的航线 # duplicates = {route: count for route, count in route_counter.items() if count > 1} # # 输出结果 # if duplicates: # print("发现重复的航线:") # for route, count in duplicates.items(): # print(f" {route}: 出现 {count} 次") # print(f"\n总共发现 {len(duplicates)} 条重复航线") # # 查找这些航线分别在哪个列表中 # print("\n重复航线分布:") # for route in duplicates: # hot_count = vj_flight_route_list_hot.count(route) # nothot_count = vj_flight_route_list_nothot.count(route) # print(f" {route}: hot列表中出现 {hot_count} 次, nothot列表中出现 {nothot_count} 次") # else: # print("没有发现重复航线") COUNTRY_HOLIDAYS = build_country_holidays(city_to_country) print(COUNTRY_HOLIDAYS)