Boris-code
diff --git a/‎docs/csv_pipeline.md‎
Lines changed: 0 additions & 544 deletions b/‎docs/csv_pipeline.md‎
Lines changed: 0 additions & 544 deletions
diff --git a/‎docs/source_code/Item.md‎
Lines changed: 20 additions & 0 deletions b/‎docs/source_code/Item.md‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎docs/source_code/pipeline.md‎
Lines changed: 17 additions & 2 deletions b/‎docs/source_code/pipeline.md‎
Lines changed: 17 additions & 2 deletions
diff --git a/‎examples/csv_pipeline_example.py‎
Lines changed: 0 additions & 144 deletions b/‎examples/csv_pipeline_example.py‎
Lines changed: 0 additions & 144 deletions
diff --git a/‎feapder/buffer/item_buffer.py‎
Lines changed: 23 additions & 30 deletions b/‎feapder/buffer/item_buffer.py‎
Lines changed: 23 additions & 30 deletions
@@ -102,6 +102,26 @@ class SpiderDataItem(Item):
         self.title = self.title.strip()
 ```
 
+## 指定入库使用的pipelines
+
+```python
+
+from feapder import Item
+from feapder.pipelines.csv_pipeline import CsvPipeline
+
+
+class SpiderDataItem(Item):
+
+    __pipelines__ = [CsvPipeline()]
+
+    def __init__(self, *args, **kwargs):
+        # self.id = None
+        self.title = None
+```
+
+使用__pipelines__指定后，该item只会流经指定的pipelines处理
+
+
 ## 更新数据
 
 采集过程中，往往会有些数据漏采或解析出错，如果我们想更新已入库的数据，可将Item转为UpdateItem
 
@@ -2,11 +2,26 @@
 
 Pipeline是数据入库时流经的管道，用户可自定义，以便对接其他数据库。
 
-框架已内置mysql及mongo管道，其他管道作为扩展方式提供，可从[feapder_pipelines](https://github.com/Boris-code/feapder_pipelines)项目中按需安装
+框架已内置mysql、mongo、csv管道，其他管道作为扩展方式提供，可从[feapder_pipelines](https://github.com/Boris-code/feapder_pipelines)项目中按需安装
 
 项目地址：https://github.com/Boris-code/feapder_pipelines
 
-## 使用方式
+## 选择内置的pipeline
+
+在配置文件 `setting.py` 中的 `ITEM_PIPELINES` 中启用：
+
+```python
+ITEM_PIPELINES = [
+    "feapder.pipelines.mysql_pipeline.MysqlPipeline",
+    # "feapder.pipelines.mongo_pipeline.MongoPipeline",
+    # "feapder.pipelines.csv_pipeline.CsvPipeline",
+    # "feapder.pipelines.console_pipeline.ConsolePipeline",
+]
+```
+
+然后 爬虫中`yield`的`item`会流经选择的pipeline自动存储
+
+## 自定义pipeline
 
 注：item会被聚合成多条一起流经pipeline，方便批量入库
 
 
@@ -52,14 +52,18 @@ def __init__(self, redis_key, task_table=None):
                 # 'table_name': ['id', 'name'...] # 缓存table_name与__update_key__的关系
             }
 
+            self._item_pipelines = {
+                # 'table_name': ['pipeline1', 'pipeline2'] # 缓存table_name与pipelines的关系
+            }
+
             self._pipelines = self.load_pipelines()
 
             self._have_mysql_pipeline = MYSQL_PIPELINE_PATH in setting.ITEM_PIPELINES
             self._mysql_pipeline = None
 
             if setting.ITEM_FILTER_ENABLE and not self.__class__.dedup:
                 if setting.ITEM_FILTER_SETTING.get(
-                    "filter_type"
+                        "filter_type"
                 ) == Dedup.BloomFilter or setting.ITEM_FILTER_SETTING.get("name"):
                     self.__class__.dedup = Dedup(
                         to_md5=False, **setting.ITEM_FILTER_SETTING
@@ -217,14 +221,11 @@ def __pick_items(self, items, is_update_item=False):
         将每个表之间的数据分开 拆分后 原items为空
         @param items:
         @param is_update_item:
-        @return: (datas_dict, pipelines_dict)
+        @return: 表名与数据的字典
         """
         datas_dict = {
             # 'table_name': [{}, {}]
         }
-        pipelines_dict = {
-            # 'table_name': ['csv', 'mysql'] or None
-        }
 
         while items:
             item = items.pop(0)
@@ -235,32 +236,24 @@ def __pick_items(self, items, is_update_item=False):
             if not table_name:
                 table_name = item.table_name
                 self._item_tables[item_name] = table_name
+                self._item_pipelines[table_name] = item.pipelines
+
+            if is_update_item and table_name not in self._item_update_keys:
+                self._item_update_keys[table_name] = item.update_key
 
             if table_name not in datas_dict:
                 datas_dict[table_name] = []
-                # 保存这个 table 的 pipelines 配置（只需保存一次）
-                pipelines_dict[table_name] = getattr(item, '__pipelines__', None)
 
             datas_dict[table_name].append(item.to_dict)
 
-            if is_update_item and table_name not in self._item_update_keys:
-                self._item_update_keys[table_name] = item.update_key
-
-        return datas_dict, pipelines_dict
-
-    def __export_to_db(self, table, datas, is_update=False, update_keys=(), allowed_pipelines=None):
-        for pipeline in self._pipelines:
-            # 如果 item 指定了 pipelines，检查是否匹配（忽略大小写）
-            if allowed_pipelines is not None:
-                pipeline_name = pipeline.__class__.__name__.replace("Pipeline", "").lower()
-                # 将用户指定的 pipeline 名称也转为小写进行比较
-                allowed_pipelines_lower = [p.lower() for p in allowed_pipelines]
-                if pipeline_name not in allowed_pipelines_lower:
-                    continue  # 跳过不匹配的 pipeline
+        return datas_dict
 
+    def __export_to_db(self, table, datas, is_update=False, update_keys=(), used_pipelines=None):
+        pipelines = used_pipelines or self._pipelines  # 优先采用指定的pipelines
+        for pipeline in pipelines:
             if is_update:
                 if table == self._task_table and not isinstance(
-                    pipeline, MysqlPipeline
+                        pipeline, MysqlPipeline
                 ):
                     continue
 
@@ -280,7 +273,7 @@ def __export_to_db(self, table, datas, is_update=False, update_keys=(), allowed_
         # 若是任务表, 且上面的pipeline里没mysql，则需调用mysql更新任务
         if not self._have_mysql_pipeline and is_update and table == self._task_table:
             if not self.mysql_pipeline.update_items(
-                table, datas, update_keys=update_keys
+                    table, datas, update_keys=update_keys
             ):
                 log.error(
                     f"{self.mysql_pipeline.__class__.__name__} 更新数据失败. table: {table}  items: {datas}"
@@ -291,7 +284,7 @@ def __export_to_db(self, table, datas, is_update=False, update_keys=(), allowed_
         return True
 
     def __add_item_to_db(
-        self, items, update_items, requests, callbacks, items_fingerprints
+            self, items, update_items, requests, callbacks, items_fingerprints
     ):
         export_success = True
         self._is_adding_to_db = True
@@ -301,14 +294,14 @@ def __add_item_to_db(
             items, items_fingerprints = self.__dedup_items(items, items_fingerprints)
 
         # 分捡（返回值包含 pipelines_dict）
-        items_dict, items_pipelines = self.__pick_items(items)
-        update_items_dict, update_pipelines = self.__pick_items(update_items, is_update_item=True)
+        items_dict = self.__pick_items(items)
+        update_items_dict = self.__pick_items(update_items, is_update_item=True)
 
         # item批量入库
         failed_items = {"add": [], "update": [], "requests": []}
         while items_dict:
             table, datas = items_dict.popitem()
-            allowed_pipelines = items_pipelines.get(table)
+            used_pipelines = self._item_pipelines.get(table)
 
             log.debug(
                 """
@@ -319,14 +312,14 @@ def __add_item_to_db(
                 % (table, tools.dumps_json(datas, indent=16))
             )
 
-            if not self.__export_to_db(table, datas, allowed_pipelines=allowed_pipelines):
+            if not self.__export_to_db(table, datas, used_pipelines=used_pipelines):
                 export_success = False
                 failed_items["add"].append({"table": table, "datas": datas})
 
         # 执行批量update
         while update_items_dict:
             table, datas = update_items_dict.popitem()
-            allowed_pipelines = update_pipelines.get(table)
+            used_pipelines = self._item_pipelines.get(table)
 
             log.debug(
                 """
@@ -339,7 +332,7 @@ def __add_item_to_db(
 
             update_keys = self._item_update_keys.get(table)
             if not self.__export_to_db(
-                table, datas, is_update=True, update_keys=update_keys, allowed_pipelines=allowed_pipelines
+                    table, datas, is_update=True, update_keys=update_keys, used_pipelines=used_pipelines
             ):
                 export_success = False
                 failed_items["update"].append(