| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146 |
- import holidays
- import pandas as pd
- CLEAN_VJ_HOT_NEAR_INFO_TAB = "clean_flights_vj_hot_0_7_info_tab"
- CLEAN_VJ_HOT_FAR_INFO_TAB = "clean_flights_vj_hot_7_30_info_tab"
- CLEAN_VJ_NOTHOT_NEAR_INFO_TAB = "clean_flights_vj_nothot_0_7_info_tab"
- CLEAN_VJ_NOTHOT_FAR_INFO_TAB = "clean_flights_vj_nothot_7_30_info_tab"
- mongodb_config = {
- "host": "192.168.20.218",
- "port": 27017,
- "db": "flights_datas_db",
- "user": "",
- "pwd": ""
- }
- # 城市码-国家码的映射
- city_to_country = {
- "CAN": "CN", # 广州,中国
- "DPS": "ID", # 巴厘岛,印度尼西亚
- "HAN": "VN", # 河内,越南
- "SGN": "VN", # 胡志明(西贡),越南
- "CTU": "CN", # 成都,中国
- "DAD": "VN", # 岘港,越南
- "SEL": "KR", # 首尔,韩国
- "DEL": "IN", # 德里,印度
- "UIH": "VN", # 归仁,越南
- "HKG": "HK", # 香港,中国
- "PQC": "VN", # 富国岛,越南
- "KUL": "MY", # 吉隆坡,马来西亚
- "NGO": "JP", # 名古屋,日本
- "NHA": "VN", # 芽庄,越南
- "PUS": "KR", # 釜山,韩国
- "SHA": "CN", # 上海,中国
- "SIN": "SG", # 新加坡,新加坡
- "TPE": "TW", # 台北,中国台湾
- "TYO": "JP", # 东京,日本
- "BKK": "TH", # 曼谷,泰国
- "BLR": "IN", # 班加罗尔,印度
- "FUK": "JP", # 福冈,日本
- "BMV": "VN", # 邦美蜀,越南
- "BNE": "AU", # 布里斯班,澳大利亚
- "BOM": "IN", # 孟买,印度
- "DLI": "VN", # 大叻,越南
- "OSA": "JP", # 大阪,日本
- "RMQ": "TW", # 台中,中国台湾
- "HKT": "TH", # 普吉岛,泰国
- "HPH": "VN", # 海防,越南
- "KHH": "TW", # 高雄,中国台湾
- "MEL": "AU", # 墨尔本,澳大利亚
- "MNL": "PH", # 马尼拉,菲律宾
- "SYD": "AU", # 悉尼,澳大利亚
- "REP": "KH", # 暹粒,柬埔寨
- "VTE": "LA", # 万象,老挝
- "HYD": "IN", # 海得拉巴,印度
- "AMD": "IN", # 艾哈迈达巴德,印度
- }
- # 生成各个国家(地区)的节假日
- def build_country_holidays(city_to_country):
- countries = sorted(set(city_to_country.values()))
- start_date = pd.Timestamp('2025-11-01')
- end_date = pd.Timestamp('2026-12-31')
- country_holidays = {}
- for country in countries:
- try:
- hdays = holidays.country_holidays(
- country,
- years=[2025, 2026]
- )
- # 转成 set[date],方便高速查询
- country_holidays[country] = {
- d for d in hdays
- if start_date.date() <= d <= end_date.date()
- }
- except Exception:
- # 个别国家 holidays 库可能不支持
- country_holidays[country] = set()
- return country_holidays
- # 热门的航线
- vj_flight_route_list_hot = [
- "CAN-DPS", "CAN-HAN", "CAN-SGN", "CTU-HAN", "CTU-SGN",
- "DAD-SEL", "DEL-HAN", "DPS-CAN", "HAN-CAN", "HAN-DAD",
- "HAN-SHA", "HAN-UIH", "HKG-PQC", "KUL-DAD", "NGO-HAN",
- "NHA-HAN", "NHA-SEL", "PQC-HKG", "PUS-NHA", "SEL-DAD",
- "SEL-SGN", "SGN-CAN", "SGN-DPS", "SGN-NGO", "SGN-NHA",
- "SGN-PQC", "SGN-SEL", "SGN-SHA", "SHA-HAN", "SHA-SGN",
- "SIN-DAD", "TPE-HAN", "TPE-PQC", "TPE-SGN", "TYO-HAN",
- "TYO-SGN"
- ]
- # 冷门的航线
- vj_flight_route_list_nothot = [
- "BKK-SGN", "BKK-TPE", "BLR-FUK", "BMV-HAN", "BNE-SGN",
- "CAN-PQC", "CTU-BNE", "CTU-DPS", "CTU-PQC", "DAD-DEL",
- "DAD-DLI", "DAD-HAN", "DLI-HAN", "DPS-BOM", "DPS-HAN",
- "DPS-SGN", "DPS-SHA", "FUK-DPS", "HAN-CTU",
- "HAN-DEL", "HAN-DLI", "HAN-KUL", "HAN-NGO",
- "HAN-NHA", "HAN-OSA", "HAN-PQC", "HAN-RMQ", "HAN-SGN",
- "HAN-TPE", "HAN-TYO", "HKG-SGN", "HKT-SGN", "HPH-NHA",
- "KHH-BNE", "KUL-HAN", "MEL-HKG", "MEL-HKT", "MEL-SGN",
- "MNL-SGN", "NGO-SYD", "OSA-REP", "OSA-SGN", "PQC-DAD",
- "PQC-SEL", "PQC-SGN", "SEL-NHA", "SEL-VTE", "SGN-CTU",
- "SGN-DAD", "SGN-HAN", "SGN-HKG", "SGN-HPH", "SGN-HYD",
- "SGN-MNL", "SGN-OSA", "SGN-SYD",
- "SGN-TPE", "SGN-TYO", "SHA-BKK", "SHA-BLR", "SHA-DPS",
- "SHA-PQC", "SIN-HAN", "SIN-PQC", "SIN-SGN", "SYD-SGN",
- "TYO-AMD", "UIH-SGN"
- ]
- # 所有航线
- vj_flight_route_list = vj_flight_route_list_hot + vj_flight_route_list_nothot
- if __name__ == '__main__':
- # from collections import Counter
- # # 检查重复项
- # # 统计每个航线出现的次数
- # route_counter = Counter(vj_flight_route_list)
- # # 找出重复的航线
- # duplicates = {route: count for route, count in route_counter.items() if count > 1}
- # # 输出结果
- # if duplicates:
- # print("发现重复的航线:")
- # for route, count in duplicates.items():
- # print(f" {route}: 出现 {count} 次")
- # print(f"\n总共发现 {len(duplicates)} 条重复航线")
- # # 查找这些航线分别在哪个列表中
- # print("\n重复航线分布:")
- # for route in duplicates:
- # hot_count = vj_flight_route_list_hot.count(route)
- # nothot_count = vj_flight_route_list_nothot.count(route)
- # print(f" {route}: hot列表中出现 {hot_count} 次, nothot列表中出现 {nothot_count} 次")
- # else:
- # print("没有发现重复航线")
- COUNTRY_HOLIDAYS = build_country_holidays(city_to_country)
- print(COUNTRY_HOLIDAYS)
|