Kaynağa Gözat

细节调整, 对于历史样本过小,或降价阈值过大的不参与投放

node04 3 hafta önce
ebeveyn
işleme
b6fda80197
2 değiştirilmiş dosya ile 17 ekleme ve 11 silme
  1. 2 2
      data_preprocess.py
  2. 15 9
      descending_cabin_task.py

+ 2 - 2
data_preprocess.py

@@ -1486,12 +1486,12 @@ def predict_data_simple(df_input, group_route_str, output_dir, predict_dir=".",
         na_position='last',
     ).reset_index(drop=True)
 
-    # 时间段过滤 过滤掉异常时间(update_hour 早于 crawl_date, 以及超过12小时不更新的数据)
+    # 时间段过滤 过滤掉异常时间(update_hour 早于 crawl_date, 以及超过8小时不更新的数据)
     update_dt = pd.to_datetime(df_predict["update_hour"], errors="coerce")
     crawl_dt = pd.to_datetime(df_predict["crawl_date"], errors="coerce")
     dt_diff = update_dt - crawl_dt
     df_predict = df_predict.loc[
-        (dt_diff >= pd.Timedelta(0)) & (dt_diff <= pd.Timedelta(hours=12))
+        (dt_diff >= pd.Timedelta(0)) & (dt_diff <= pd.Timedelta(hours=8))
         # (dt_diff >= pd.Timedelta(0))
     ].reset_index(drop=True)
     print("更新时间过滤完成")

+ 15 - 9
descending_cabin_task.py

@@ -504,6 +504,21 @@ def _process_one_task(row, runner):
     # print(end_task)
     # print("--------------------------------")
 
+    # 在询价之前检查条件,task 存放了 keep_info 的全部字段
+    drop_price_change_upper = float(task.get("drop_price_change_upper"))   # 降价的最小幅度
+    drop_price_change_lower = float(task.get("drop_price_change_lower"))   # 降价的最大幅度 
+
+    if abs(drop_price_change_upper) > 200:  # 丢弃超过200美元的降价幅度
+        return None
+
+    max_threshold = round(drop_price_change_upper * runner.rate * 1.0)   # 降价阈值要按汇率转人民币(四舍五入到整数)
+    if abs(max_threshold) < 10:  # 丢弃小于10人民币的降价幅度
+        return None
+    
+    drop_price_sample_size = int(task.get("drop_price_sample_size", "0"))
+    if drop_price_sample_size < 2:  # 丢弃历史降价样本数过少(小于2)的
+        return None
+
     time.sleep(1)
     out = runner.run(end_task, do_verify=False)  # 不验价,仅询价
     # print(json.dumps(out, ensure_ascii=False, indent=2))
@@ -523,15 +538,6 @@ def _process_one_task(row, runner):
 
     print("raw_verify pass")
 
-    # task 存放了 keep_info 的全部字段
-    drop_price_change_upper = float(task.get("drop_price_change_upper"))   # 降价的最小幅度
-    drop_price_change_lower = float(task.get("drop_price_change_lower"))
-
-    max_threshold = round(drop_price_change_upper * runner.rate * 1.0)   # 降价阈值要按汇率转人民币(四舍五入到整数)
-
-    if abs(max_threshold) < 10:
-        return None
-
     result = results[0]
     adult_price = result.get("adult_price")
     adult_tax = result.get("adult_tax")