Skip to content

Commit aa9c681

Browse files
authored
Merge branch 'master' into master
2 parents aa399d2 + d84c780 commit aa9c681

6 files changed

Lines changed: 15 additions & 13 deletions

File tree

docs/question/运行问题.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
2222
delete_keys为需要删除的key,类型: 元组/bool/string,支持正则; 常用于清空任务队列,否则重启时会断点续爬,如写成`delete_keys=True`也是可以的
2323

24-
1. 手动修改任务分数为小于当前时间搓的分数
24+
1. 手动修改任务分数为小于当前时间戳的分数
2525

2626
![-w917](http://markdown-media.oss-cn-beijing.aliyuncs.com/2021/03/11/16154327722622.jpg)
2727

docs/source_code/UpdateItem.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# UpdateItem
22

3-
UpdateItem用于更新数据,继承至Item,所以使用方式基本与Item一致,下载只说不同之处
3+
UpdateItem用于更新数据,继承至Item,所以使用方式基本与Item一致,下面只说不同之处
44

55
## 更新逻辑
66

@@ -70,4 +70,4 @@ item = item.to_UpdateItem()
7070
item.update_key = "title"
7171
```
7272

73-
**推荐方式1,直接改Item类,不用修改爬虫代码**
73+
**推荐方式1,直接改Item类,不用修改爬虫代码**

feapder/core/collector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def __input_data(self):
6363

6464
current_timestamp = tools.get_current_timestamp()
6565

66-
# 取任务,只取当前时间搓以内的任务,同时将任务分数修改为 current_timestamp + setting.REQUEST_LOST_TIMEOUT
66+
# 取任务,只取当前时间戳以内的任务,同时将任务分数修改为 current_timestamp + setting.REQUEST_LOST_TIMEOUT
6767
requests_list = self._db.zrangebyscore_set_score(
6868
self._tab_requests,
6969
priority_min="-inf",

feapder/db/mysqldb.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def wapper(*args, **kwargs):
4141

4242
class MysqlDB:
4343
def __init__(
44-
self, ip=None, port=None, db=None, user_name=None, user_pass=None, set_session=None, **kwargs
44+
self, ip=None, port=None, db=None, user_name=None, user_pass=None, charset="utf8mb4", set_session=None, **kwargs
4545
):
4646
# 可能会改setting中的值,所以此处不能直接赋值为默认值,需要后加载赋值
4747
if not ip:
@@ -68,7 +68,7 @@ def __init__(
6868
user=user_name,
6969
passwd=user_pass,
7070
db=db,
71-
charset="utf8mb4",
71+
charset=charset,
7272
setsession=set_session,
7373
cursorclass=cursors.SSCursor,
7474
**kwargs
@@ -85,7 +85,7 @@ def __init__(
8585
user_pass: {}
8686
exception: {}
8787
""".format(
88-
ip, port, db, user_name, user_pass, e
88+
ip, port, db, user_name, user_pass, charset, e
8989
)
9090
)
9191
else:
@@ -119,7 +119,9 @@ def from_url(cls, url, **kwargs):
119119
"user_pass": url_parsed.password.strip(),
120120
"db": url_parsed.path.strip("/").strip(),
121121
}
122-
122+
# 解析 query 字符串参数,比如 ?charset=utf8
123+
query_params = dict(parse.parse_qsl(url_parsed.query))
124+
connect_params.update(query_params)
123125
connect_params.update(kwargs)
124126

125127
return cls(**connect_params)

feapder/network/item.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def fingerprint(self):
129129
for key, value in self.to_dict.items():
130130
if value:
131131
if (self.unique_key and key in self.unique_key) or not self.unique_key:
132-
args.append(str(value))
132+
args.append(key + str(value))
133133

134134
if args:
135135
args = sorted(args)

feapder/utils/metrics.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ def emit_any(
427427
fields: influxdb的field的字段和值
428428
classify: 点的类别
429429
measurement: 存储的表
430-
timestamp: 点的时间搓,默认为当前时间
430+
timestamp: 点的时间戳,默认为当前时间
431431
432432
Returns:
433433
@@ -458,7 +458,7 @@ def emit_counter(
458458
classify: 点的类别
459459
tags: influxdb的tag的字段和值
460460
measurement: 存储的表
461-
timestamp: 点的时间搓,默认为当前时间
461+
timestamp: 点的时间戳,默认为当前时间
462462
463463
Returns:
464464
@@ -489,7 +489,7 @@ def emit_timer(
489489
classify: 点的类别
490490
tags: influxdb的tag的字段和值
491491
measurement: 存储的表
492-
timestamp: 点的时间搓,默认为当前时间
492+
timestamp: 点的时间戳,默认为当前时间
493493
494494
Returns:
495495
@@ -520,7 +520,7 @@ def emit_store(
520520
classify: 点的类别
521521
tags: influxdb的tag的字段和值
522522
measurement: 存储的表
523-
timestamp: 点的时间搓,默认为当前时间
523+
timestamp: 点的时间戳,默认为当前时间
524524
525525
Returns:
526526

0 commit comments

Comments
 (0)