@@ -52,14 +52,18 @@ def __init__(self, redis_key, task_table=None):
5252 # 'table_name': ['id', 'name'...] # 缓存table_name与__update_key__的关系
5353 }
5454
55+ self ._item_pipelines = {
56+ # 'table_name': ['pipeline1', 'pipeline2'] # 缓存table_name与pipelines的关系
57+ }
58+
5559 self ._pipelines = self .load_pipelines ()
5660
5761 self ._have_mysql_pipeline = MYSQL_PIPELINE_PATH in setting .ITEM_PIPELINES
5862 self ._mysql_pipeline = None
5963
6064 if setting .ITEM_FILTER_ENABLE and not self .__class__ .dedup :
6165 if setting .ITEM_FILTER_SETTING .get (
62- "filter_type"
66+ "filter_type"
6367 ) == Dedup .BloomFilter or setting .ITEM_FILTER_SETTING .get ("name" ):
6468 self .__class__ .dedup = Dedup (
6569 to_md5 = False , ** setting .ITEM_FILTER_SETTING
@@ -217,7 +221,7 @@ def __pick_items(self, items, is_update_item=False):
217221 将每个表之间的数据分开 拆分后 原items为空
218222 @param items:
219223 @param is_update_item:
220- @return:
224+ @return: 表名与数据的字典
221225 """
222226 datas_dict = {
223227 # 'table_name': [{}, {}]
@@ -232,22 +236,24 @@ def __pick_items(self, items, is_update_item=False):
232236 if not table_name :
233237 table_name = item .table_name
234238 self ._item_tables [item_name ] = table_name
239+ self ._item_pipelines [table_name ] = item .pipelines
240+
241+ if is_update_item and table_name not in self ._item_update_keys :
242+ self ._item_update_keys [table_name ] = item .update_key
235243
236244 if table_name not in datas_dict :
237245 datas_dict [table_name ] = []
238246
239247 datas_dict [table_name ].append (item .to_dict )
240248
241- if is_update_item and table_name not in self ._item_update_keys :
242- self ._item_update_keys [table_name ] = item .update_key
243-
244249 return datas_dict
245250
246- def __export_to_db (self , table , datas , is_update = False , update_keys = ()):
247- for pipeline in self ._pipelines :
251+ def __export_to_db (self , table , datas , is_update = False , update_keys = (), used_pipelines = None ):
252+ pipelines = used_pipelines or self ._pipelines # 优先采用指定的pipelines
253+ for pipeline in pipelines :
248254 if is_update :
249255 if table == self ._task_table and not isinstance (
250- pipeline , MysqlPipeline
256+ pipeline , MysqlPipeline
251257 ):
252258 continue
253259
@@ -267,7 +273,7 @@ def __export_to_db(self, table, datas, is_update=False, update_keys=()):
267273 # 若是任务表, 且上面的pipeline里没mysql,则需调用mysql更新任务
268274 if not self ._have_mysql_pipeline and is_update and table == self ._task_table :
269275 if not self .mysql_pipeline .update_items (
270- table , datas , update_keys = update_keys
276+ table , datas , update_keys = update_keys
271277 ):
272278 log .error (
273279 f"{ self .mysql_pipeline .__class__ .__name__ } 更新数据失败. table: { table } items: { datas } "
@@ -278,7 +284,7 @@ def __export_to_db(self, table, datas, is_update=False, update_keys=()):
278284 return True
279285
280286 def __add_item_to_db (
281- self , items , update_items , requests , callbacks , items_fingerprints
287+ self , items , update_items , requests , callbacks , items_fingerprints
282288 ):
283289 export_success = True
284290 self ._is_adding_to_db = True
@@ -287,14 +293,15 @@ def __add_item_to_db(
287293 if setting .ITEM_FILTER_ENABLE :
288294 items , items_fingerprints = self .__dedup_items (items , items_fingerprints )
289295
290- # 分捡
296+ # 分捡(返回值包含 pipelines_dict)
291297 items_dict = self .__pick_items (items )
292298 update_items_dict = self .__pick_items (update_items , is_update_item = True )
293299
294300 # item批量入库
295301 failed_items = {"add" : [], "update" : [], "requests" : []}
296302 while items_dict :
297303 table , datas = items_dict .popitem ()
304+ used_pipelines = self ._item_pipelines .get (table )
298305
299306 log .debug (
300307 """
@@ -305,13 +312,14 @@ def __add_item_to_db(
305312 % (table , tools .dumps_json (datas , indent = 16 ))
306313 )
307314
308- if not self .__export_to_db (table , datas ):
315+ if not self .__export_to_db (table , datas , used_pipelines = used_pipelines ):
309316 export_success = False
310317 failed_items ["add" ].append ({"table" : table , "datas" : datas })
311318
312319 # 执行批量update
313320 while update_items_dict :
314321 table , datas = update_items_dict .popitem ()
322+ used_pipelines = self ._item_pipelines .get (table )
315323
316324 log .debug (
317325 """
@@ -324,7 +332,7 @@ def __add_item_to_db(
324332
325333 update_keys = self ._item_update_keys .get (table )
326334 if not self .__export_to_db (
327- table , datas , is_update = True , update_keys = update_keys
335+ table , datas , is_update = True , update_keys = update_keys , used_pipelines = used_pipelines
328336 ):
329337 export_success = False
330338 failed_items ["update" ].append (
0 commit comments