From 4eef604856ea4f14a3f9a3be3be39401c9a0a819 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E8=A7=81=E6=80=9D?=
 <5361064+zzbslayer@user.noreply.gitee.com>
Date: Fri, 8 May 2020 23:27:51 +0800
Subject: [PATCH 01/12] new feature: weibo poller

---
 config_sample.py               |   1 +
 hoshino/modules/weibo/weibo.py | 442 +++++++++++++++++++++++++++++++++
 2 files changed, 443 insertions(+)
 create mode 100644 hoshino/modules/weibo/weibo.py

diff --git a/config_sample.py b/config_sample.py
index dee3e59e3..193863362 100644
--- a/config_sample.py
+++ b/config_sample.py
@@ -43,4 +43,5 @@
     # 'setu',
     'translate',
     # 'twitter',
+    # 'weibo'
 }
diff --git a/hoshino/modules/weibo/weibo.py b/hoshino/modules/weibo/weibo.py
new file mode 100644
index 000000000..d064a9b92
--- /dev/null
+++ b/hoshino/modules/weibo/weibo.py
@@ -0,0 +1,442 @@
+# -*- coding: UTF-8 -*-
+
+import json
+import os
+import random
+import sys
+import traceback
+from collections import OrderedDict
+from datetime import date, datetime, timedelta
+from time import sleep
+
+import requests
+from lxml import etree
+from hoshino.service import Service, Privilege as Priv
+from hoshino import util, logger
+
+sv = Service('weibo-poller', use_priv=Priv.ADMIN, manage_priv=Priv.SUPERUSER, visible=False)
+
+class Weibo(object):
+    def __init__(self, config):
+        """Weibo类初始化"""
+        self.validate_config(config)
+        self.filter = config['filter']  
+        self.user = self.get_user_info(config["user_id"])
+        self.got_count = 0  # 存储爬取到的微博数
+        self.weibo = []  # 存储爬取到的所有微博信息
+        self.weibo_id_list = []  # 存储爬取到的所有微博id
+
+    def get_json(self, params):
+        """获取网页中json数据"""
+        url = 'https://m.weibo.cn/api/container/getIndex?'
+        r = requests.get(url, params=params)
+        return r.json()
+
+    def get_user_info(self, user_id):
+        """获取用户信息"""
+        params = {'containerid': '100505' + str(user_id)}
+        js = self.get_json(params)
+        if js['ok']:
+            info = js['data']['userInfo']
+            user_info = OrderedDict()
+            user_info['id'] = user_id
+            user_info['screen_name'] = info.get('screen_name', '')
+            user_info['gender'] = info.get('gender', '')
+            params = {
+                'containerid':
+                '230283' + str(user_id) + '_-_INFO'
+            }
+            zh_list = [
+                u'生日', u'所在地', u'小学', u'初中', u'高中', u'大学', u'公司', u'注册时间',
+                u'阳光信用'
+            ]
+            en_list = [
+                'birthday', 'location', 'education', 'education', 'education',
+                'education', 'company', 'registration_time', 'sunshine'
+            ]
+            for i in en_list:
+                user_info[i] = ''
+            js = self.get_json(params)
+            if js['ok']:
+                cards = js['data']['cards']
+                if isinstance(cards, list) and len(cards) > 1:
+                    card_list = cards[0]['card_group'] + cards[1]['card_group']
+                    for card in card_list:
+                        if card.get('item_name') in zh_list:
+                            user_info[en_list[zh_list.index(
+                                card.get('item_name'))]] = card.get(
+                                    'item_content', '')
+            user_info['statuses_count'] = info.get('statuses_count', 0)
+            user_info['followers_count'] = info.get('followers_count', 0)
+            user_info['follow_count'] = info.get('follow_count', 0)
+            user_info['description'] = info.get('description', '')
+            user_info['profile_url'] = info.get('profile_url', '')
+            user_info['profile_image_url'] = info.get('profile_image_url', '')
+            user_info['avatar_hd'] = info.get('avatar_hd', '')
+            user_info['urank'] = info.get('urank', 0)
+            user_info['mbrank'] = info.get('mbrank', 0)
+            user_info['verified'] = info.get('verified', False)
+            user_info['verified_type'] = info.get('verified_type', 0)
+            user_info['verified_reason'] = info.get('verified_reason', '')
+            user = self.standardize_info(user_info)
+            return user
+
+    def validate_config(self, config):
+        """验证配置是否正确"""
+
+        # 验证filter、original_pic_download、retweet_pic_download、original_video_download、retweet_video_download
+        argument_list = [
+            'filter'
+        ]
+        for argument in argument_list:
+            if config[argument] != 0 and config[argument] != 1:
+                logger.error(u'%s值应为0或1,请重新输入' % config[argument])
+
+        # 验证user_id_list
+        if "user_id" not in config:
+            logger.error(u'请填写用户 id')
+        if "service_name" not in config:
+            logger.error(u'请填写所属服务名')
+
+    def get_pics(self, weibo_info):
+        """获取微博原始图片url"""
+        if weibo_info.get('pics'):
+            pic_info = weibo_info['pics']
+            pic_list = [pic['large']['url'] for pic in pic_info]
+            pics = ','.join(pic_list)
+        else:
+            pics = ''
+        return pics
+
+    def get_live_photo(self, weibo_info):
+        """获取live photo中的视频url"""
+        live_photo_list = []
+        live_photo = weibo_info.get('pic_video')
+        if live_photo:
+            prefix = 'https://video.weibo.com/media/play?livephoto=//us.sinaimg.cn/'
+            for i in live_photo.split(','):
+                if len(i.split(':')) == 2:
+                    url = prefix + i.split(':')[1] + '.mov'
+                    live_photo_list.append(url)
+            return live_photo_list
+
+    def get_video_url(self, weibo_info):
+        """获取微博视频url"""
+        video_url = ''
+        video_url_list = []
+        if weibo_info.get('page_info'):
+            if weibo_info['page_info'].get('media_info') and weibo_info[
+                    'page_info'].get('type') == 'video':
+                media_info = weibo_info['page_info']['media_info']
+                video_url = media_info.get('mp4_720p_mp4')
+                if not video_url:
+                    video_url = media_info.get('mp4_hd_url')
+                    if not video_url:
+                        video_url = media_info.get('mp4_sd_url')
+                        if not video_url:
+                            video_url = media_info.get('stream_url_hd')
+                            if not video_url:
+                                video_url = media_info.get('stream_url')
+        if video_url:
+            video_url_list.append(video_url)
+        live_photo_list = self.get_live_photo(weibo_info)
+        if live_photo_list:
+            video_url_list += live_photo_list
+        return ';'.join(video_url_list)
+
+    def get_location(self, selector):
+        """获取微博发布位置"""
+        location_icon = 'timeline_card_small_location_default.png'
+        span_list = selector.xpath('//span')
+        location = ''
+        for i, span in enumerate(span_list):
+            if span.xpath('img/@src'):
+                if location_icon in span.xpath('img/@src')[0]:
+                    location = span_list[i + 1].xpath('string(.)')
+                    break
+        return location
+
+    def get_article_url(self, selector):
+        """获取微博中头条文章的url"""
+        article_url = ''
+        text = selector.xpath('string(.)')
+        if text.startswith(u'发布了头条文章'):
+            url = selector.xpath('//a/@data-url')
+            if url and url[0].startswith('http://t.cn'):
+                article_url = url[0]
+        return article_url
+
+    def get_topics(self, selector):
+        """获取参与的微博话题"""
+        span_list = selector.xpath("//span[@class='surl-text']")
+        topics = ''
+        topic_list = []
+        for span in span_list:
+            text = span.xpath('string(.)')
+            if len(text) > 2 and text[0] == '#' and text[-1] == '#':
+                topic_list.append(text[1:-1])
+        if topic_list:
+            topics = ','.join(topic_list)
+        return topics
+
+    def get_at_users(self, selector):
+        """获取@用户"""
+        a_list = selector.xpath('//a')
+        at_users = ''
+        at_list = []
+        for a in a_list:
+            if '@' + a.xpath('@href')[0][3:] == a.xpath('string(.)'):
+                at_list.append(a.xpath('string(.)')[1:])
+        if at_list:
+            at_users = ','.join(at_list)
+        return at_users
+
+    def string_to_int(self, string):
+        """字符串转换为整数"""
+        if isinstance(string, int):
+            return string
+        elif string.endswith(u'万+'):
+            string = int(string[:-2] + '0000')
+        elif string.endswith(u'万'):
+            string = int(string[:-1] + '0000')
+        return int(string)
+
+    def standardize_date(self, created_at):
+        """标准化微博发布时间"""
+        if u"刚刚" in created_at:
+            created_at = datetime.now().strftime("%Y-%m-%d")
+        elif u"分钟" in created_at:
+            minute = created_at[:created_at.find(u"分钟")]
+            minute = timedelta(minutes=int(minute))
+            created_at = (datetime.now() - minute).strftime("%Y-%m-%d")
+        elif u"小时" in created_at:
+            hour = created_at[:created_at.find(u"小时")]
+            hour = timedelta(hours=int(hour))
+            created_at = (datetime.now() - hour).strftime("%Y-%m-%d")
+        elif u"昨天" in created_at:
+            day = timedelta(days=1)
+            created_at = (datetime.now() - day).strftime("%Y-%m-%d")
+        elif created_at.count('-') == 1:
+            year = datetime.now().strftime("%Y")
+            created_at = year + "-" + created_at
+        return created_at
+
+    def standardize_info(self, weibo):
+        """标准化信息，去除乱码"""
+        for k, v in weibo.items():
+            if 'bool' not in str(type(v)) and 'int' not in str(
+                    type(v)) and 'list' not in str(
+                        type(v)) and 'long' not in str(type(v)):
+                weibo[k] = v.replace(u"\u200b", "").encode(
+                    sys.stdout.encoding, "ignore").decode(sys.stdout.encoding)
+        return weibo
+
+    def parse_weibo(self, weibo_info):
+        weibo = OrderedDict()
+        if weibo_info['user']:
+            weibo['user_id'] = weibo_info['user']['id']
+            weibo['screen_name'] = weibo_info['user']['screen_name']
+        else:
+            weibo['user_id'] = ''
+            weibo['screen_name'] = ''
+        weibo['id'] = int(weibo_info['id'])
+        weibo['bid'] = weibo_info['bid']
+        text_body = weibo_info['text']
+        selector = etree.HTML(text_body)
+        weibo['text'] = etree.HTML(text_body).xpath('string(.)')
+        weibo['article_url'] = self.get_article_url(selector)
+        weibo['pics'] = self.get_pics(weibo_info)
+        weibo['video_url'] = self.get_video_url(weibo_info)
+        weibo['location'] = self.get_location(selector)
+        weibo['created_at'] = weibo_info['created_at']
+        weibo['source'] = weibo_info['source']
+        weibo['attitudes_count'] = self.string_to_int(
+            weibo_info.get('attitudes_count', 0))
+        weibo['comments_count'] = self.string_to_int(
+            weibo_info.get('comments_count', 0))
+        weibo['reposts_count'] = self.string_to_int(
+            weibo_info.get('reposts_count', 0))
+        weibo['topics'] = self.get_topics(selector)
+        weibo['at_users'] = self.get_at_users(selector)
+        return self.standardize_info(weibo)
+
+    def print_one_weibo(self, weibo):
+        """打印一条微博"""
+        try:
+            logger.info(u'微博id：%d' % weibo['id'])
+            logger.info(u'微博正文：%s' % weibo['text'])
+            logger.info(u'原始图片url：%s' % weibo['pics'])
+            logger.info(u'微博位置：%s' % weibo['location'])
+            logger.info(u'发布时间：%s' % weibo['created_at'])
+            logger.info(u'发布工具：%s' % weibo['source'])
+            logger.info(u'点赞数：%d' % weibo['attitudes_count'])
+            logger.info(u'评论数：%d' % weibo['comments_count'])
+            logger.info(u'转发数：%d' % weibo['reposts_count'])
+            logger.info(u'话题：%s' % weibo['topics'])
+            logger.info(u'@用户：%s' % weibo['at_users'])
+            logger.info(u'url：https://m.weibo.cn/detail/%d' % weibo['id'])
+        except OSError:
+            pass
+    
+    def print_weibo(self, weibo):
+        """打印微博，若为转发微博，会同时打印原创和转发部分"""
+        if weibo.get('retweet'):
+            logger.info('*' * 100)
+            logger.info(u'转发部分：')
+            self.print_one_weibo(weibo['retweet'])
+            logger.info('*' * 100)
+            logger.info(u'原创部分：')
+        self.print_one_weibo(weibo)
+        logger.info('-' * 120)
+
+    def get_username(self):
+        return self.user["screen_name"]
+    
+    def get_user_id(self):
+        return self.user["id"]
+
+    def get_weibo_json(self, page):
+        """获取网页中微博json数据"""
+        params = {
+            'containerid': '107603' + self.get_user_id(),
+            'page': page
+        }
+        js = self.get_json(params)
+        return js
+
+    def get_long_weibo(self, id):
+        """获取长微博"""
+        for i in range(5):
+            url = 'https://m.weibo.cn/detail/%s' % id
+            html = requests.get(url).text
+            html = html[html.find('"status":'):]
+            html = html[:html.rfind('"hotScheme"')]
+            html = html[:html.rfind(',')]
+            html = '{' + html + '}'
+            js = json.loads(html, strict=False)
+            weibo_info = js.get('status')
+            if weibo_info:
+                weibo = self.parse_weibo(weibo_info)
+                return weibo
+            sleep(random.randint(6, 10))
+
+    def print_user_info(self):
+        """打印用户信息"""
+        logger.info('+' * 100)
+        logger.info(u'用户信息')
+        logger.info(u'用户id：%s' % self.user['id'])
+        logger.info(u'用户昵称：%s' % self.user['screen_name'])
+        gender = u'女' if self.user['gender'] == 'f' else u'男'
+        logger.info(u'性别：%s' % gender)
+        logger.info(u'生日：%s' % self.user['birthday'])
+        logger.info(u'所在地：%s' % self.user['location'])
+        logger.info(u'教育经历：%s' % self.user['education'])
+        logger.info(u'公司：%s' % self.user['company'])
+        logger.info(u'阳光信用：%s' % self.user['sunshine'])
+        logger.info(u'注册时间：%s' % self.user['registration_time'])
+        logger.info(u'微博数：%d' % self.user['statuses_count'])
+        logger.info(u'粉丝数：%d' % self.user['followers_count'])
+        logger.info(u'关注数：%d' % self.user['follow_count'])
+        logger.info(u'url：https://m.weibo.cn/profile/%s' % self.user['id'])
+        if self.user.get('verified_reason'):
+            logger.info(self.user['verified_reason'])
+        logger.info(self.user['description'])
+        logger.info('+' * 100)
+
+    def get_one_weibo(self, info):
+        """获取一条微博的全部信息"""
+        try:
+            weibo_info = info['mblog']
+            weibo_id = weibo_info['id']
+            retweeted_status = weibo_info.get('retweeted_status')
+            is_long = weibo_info.get('isLongText')
+            if retweeted_status and retweeted_status.get('id'):  # 转发
+                retweet_id = retweeted_status.get('id')
+                is_long_retweet = retweeted_status.get('isLongText')
+                if is_long:
+                    weibo = self.get_long_weibo(weibo_id)
+                    if not weibo:
+                        weibo = self.parse_weibo(weibo_info)
+                else:
+                    weibo = self.parse_weibo(weibo_info)
+                if is_long_retweet:
+                    retweet = self.get_long_weibo(retweet_id)
+                    if not retweet:
+                        retweet = self.parse_weibo(retweeted_status)
+                else:
+                    retweet = self.parse_weibo(retweeted_status)
+                retweet['created_at'] = self.standardize_date(
+                    retweeted_status['created_at'])
+                weibo['retweet'] = retweet
+            else:  # 原创
+                if is_long:
+                    weibo = self.get_long_weibo(weibo_id)
+                    if not weibo:
+                        weibo = self.parse_weibo(weibo_info)
+                else:
+                    weibo = self.parse_weibo(weibo_info)
+            weibo['created_at'] = self.standardize_date(
+                weibo_info['created_at'])
+            return weibo
+        except Exception as e:
+            logger.exception(e)
+
+    def get_latest_weibos(self):
+        try:
+            latest_weibos = []
+            js = self.get_weibo_json(1)
+            if js['ok']:
+                weibos = js['data']['cards']
+                for w in weibos:
+                    if w['card_type'] == 9:
+                        wb = self.get_one_weibo(w)
+                        if wb:
+                            if wb['created_at'] != str(date.today()):
+                                continue
+                            if wb['id'] in self.weibo_id_list:
+                                continue
+                            if (not self.filter) or (
+                                    'retweet' not in wb.keys()):
+                                self.weibo.append(wb)
+                                latest_weibos.append(wb)
+                                self.weibo_id_list.append(wb['id'])
+                                self.got_count += 1
+                                self.print_weibo(wb)
+                            
+            return latest_weibos
+        except Exception as e:
+            logger.exception(e)
+            return []
+
+
+user_configs = util.load_config(__file__)
+subr_dic = {}
+
+for config in user_configs:
+    print(config)
+    wb = Weibo(config)
+    service_name = config["service_name"]
+    subService = Service(service_name, enable_on_default=True)
+
+    if service_name not in subr_dic:
+        subr_dic[service_name] = {"service": subService, "spiders": [wb]}
+    else:
+        subr_dic[service_name]["spiders"].append(wb)
+
+@sv.scheduled_job('interval', seconds=60 * 20)
+async def weibo_poller():
+    for sv_name, serviceObj in subr_dic.items():
+        weibos = []
+        ssv = serviceObj["service"]
+        spiders = serviceObj["spiders"]
+        for spider in spiders:
+            latest_weibos = spider.get_latest_weibos()
+            formatted_weibos = [wb["text"] for wb in latest_weibos]
+
+            if l := len(formatted_weibos):
+                sv.logger.info(f"成功获取@{spider.get_username()}的新微博{l}条")
+            else:
+                sv.logger.info(f"未检测到@{spider.get_username()}的新微博")
+
+            weibos.extend(formatted_weibos)
+        await ssv.broadcast(weibos, ssv.name, 0.5)
\ No newline at end of file

From 7c6cc1b94458ca54f324e65858096ddb3228b3f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E8=A7=81=E6=80=9D?=
 <5361064+zzbslayer@user.noreply.gitee.com>
Date: Sat, 9 May 2020 01:52:37 +0800
Subject: [PATCH 02/12] Send weibo images; fix subservice bug

---
 hoshino/modules/weibo/weibo.py | 46 ++++++++++++++++++++++------------
 hoshino/res.py                 | 22 ++++++++++++++++
 2 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/hoshino/modules/weibo/weibo.py b/hoshino/modules/weibo/weibo.py
index d064a9b92..eb4d19c16 100644
--- a/hoshino/modules/weibo/weibo.py
+++ b/hoshino/modules/weibo/weibo.py
@@ -13,6 +13,7 @@
 from lxml import etree
 from hoshino.service import Service, Privilege as Priv
 from hoshino import util, logger
+from hoshino.res import R
 
 sv = Service('weibo-poller', use_priv=Priv.ADMIN, manage_priv=Priv.SUPERUSER, visible=False)
 
@@ -22,9 +23,8 @@ def __init__(self, config):
         self.validate_config(config)
         self.filter = config['filter']  
         self.user = self.get_user_info(config["user_id"])
-        self.got_count = 0  # 存储爬取到的微博数
-        self.weibo = []  # 存储爬取到的所有微博信息
-        self.weibo_id_list = []  # 存储爬取到的所有微博id
+
+        self.__recent = False
 
     def get_json(self, params):
         """获取网页中json数据"""
@@ -103,10 +103,9 @@ def get_pics(self, weibo_info):
         if weibo_info.get('pics'):
             pic_info = weibo_info['pics']
             pic_list = [pic['large']['url'] for pic in pic_info]
-            pics = ','.join(pic_list)
         else:
-            pics = ''
-        return pics
+            pic_list = []
+        return pic_list
 
     def get_live_photo(self, weibo_info):
         """获取live photo中的视频url"""
@@ -142,7 +141,7 @@ def get_video_url(self, weibo_info):
         live_photo_list = self.get_live_photo(weibo_info)
         if live_photo_list:
             video_url_list += live_photo_list
-        return ';'.join(video_url_list)
+        return video_url_list
 
     def get_location(self, selector):
         """获取微博发布位置"""
@@ -205,20 +204,25 @@ def standardize_date(self, created_at):
         """标准化微博发布时间"""
         if u"刚刚" in created_at:
             created_at = datetime.now().strftime("%Y-%m-%d")
+            self.__recent = True
         elif u"分钟" in created_at:
             minute = created_at[:created_at.find(u"分钟")]
             minute = timedelta(minutes=int(minute))
             created_at = (datetime.now() - minute).strftime("%Y-%m-%d")
+            self.__recent = True
         elif u"小时" in created_at:
             hour = created_at[:created_at.find(u"小时")]
             hour = timedelta(hours=int(hour))
             created_at = (datetime.now() - hour).strftime("%Y-%m-%d")
+            self.__recent = False
         elif u"昨天" in created_at:
             day = timedelta(days=1)
             created_at = (datetime.now() - day).strftime("%Y-%m-%d")
+            self.__recent = False
         elif created_at.count('-') == 1:
             year = datetime.now().strftime("%Y")
             created_at = year + "-" + created_at
+            self.__recent = False
         return created_at
 
     def standardize_info(self, weibo):
@@ -380,6 +384,7 @@ def get_one_weibo(self, info):
             return weibo
         except Exception as e:
             logger.exception(e)
+            self.__recent = False
 
     def get_latest_weibos(self):
         try:
@@ -391,16 +396,11 @@ def get_latest_weibos(self):
                     if w['card_type'] == 9:
                         wb = self.get_one_weibo(w)
                         if wb:
-                            if wb['created_at'] != str(date.today()):
-                                continue
-                            if wb['id'] in self.weibo_id_list:
+                            if not self.__recent:
                                 continue
                             if (not self.filter) or (
                                     'retweet' not in wb.keys()):
-                                self.weibo.append(wb)
                                 latest_weibos.append(wb)
-                                self.weibo_id_list.append(wb['id'])
-                                self.got_count += 1
                                 self.print_weibo(wb)
                             
             return latest_weibos
@@ -416,14 +416,28 @@ def get_latest_weibos(self):
     print(config)
     wb = Weibo(config)
     service_name = config["service_name"]
-    subService = Service(service_name, enable_on_default=True)
 
     if service_name not in subr_dic:
+        subService = Service(service_name, enable_on_default=True)
         subr_dic[service_name] = {"service": subService, "spiders": [wb]}
     else:
         subr_dic[service_name]["spiders"].append(wb)
 
-@sv.scheduled_job('interval', seconds=60 * 20)
+def wb_to_message(wb):
+    msg = f'@{wb["screen_name"]}:\n{wb["text"]}'
+    if sv.bot.config.IS_CQPRO and len(wb["pics"]) > 0:
+        images_url = wb["pics"]
+        msg = f'{msg}\n'
+        res_imgs = [R.remote_img(url).cqcode for url in images_url]
+        for img in res_imgs:
+            msg = f'{msg}{img}'
+    if len(wb["video_url"]) > 0:
+        videos = wb["video_url"]
+        res_videos = ';'.join(videos)
+        msg = f'{msg}\n视频链接：{res_videos}'
+    return msg
+
+@sv.scheduled_job('interval', seconds=20*60)
 async def weibo_poller():
     for sv_name, serviceObj in subr_dic.items():
         weibos = []
@@ -431,7 +445,7 @@ async def weibo_poller():
         spiders = serviceObj["spiders"]
         for spider in spiders:
             latest_weibos = spider.get_latest_weibos()
-            formatted_weibos = [wb["text"] for wb in latest_weibos]
+            formatted_weibos = [wb_to_message(wb) for wb in latest_weibos]
 
             if l := len(formatted_weibos):
                 sv.logger.info(f"成功获取@{spider.get_username()}的新微博{l}条")
diff --git a/hoshino/res.py b/hoshino/res.py
index 436e16959..a071bc219 100644
--- a/hoshino/res.py
+++ b/hoshino/res.py
@@ -1,5 +1,7 @@
 import os
 from PIL import Image
+import requests
+from io import BytesIO
 from urllib.request import pathname2url
 from urllib.parse import urljoin
 
@@ -19,7 +21,27 @@ def get(path, *paths):
     def img(path, *paths):
         return ResImg(os.path.join('img', path, *paths))
 
+    @staticmethod
+    def remote_img(url):
+        return RemoteResImg(url)
+
+class RemoteResObj:
+    def __init__(self, url):
+        self.__path = url
+    
+    @property
+    def url(self):
+        return self.__path
 
+class RemoteResImg(RemoteResObj):
+    @property
+    def cqcode(self) -> MessageSegment:
+        return MessageSegment.image(self.url)
+    
+    def open(self) -> Image:
+        response = requests.get(self.url)
+        return Image.open(BytesIO(response))
+    
 
 class ResObj:
 

From 2c0abae9c677237dd15b7149a678a3dabc5a1567 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E8=A7=81=E6=80=9D?=
 <5361064+zzbslayer@user.noreply.gitee.com>
Date: Sat, 9 May 2020 02:36:43 +0800
Subject: [PATCH 03/12] Replace requests with httpx; small fix for code review

---
 hoshino/modules/weibo/weibo.py | 39 ++++++++++++++++++++--------------
 hoshino/res.py                 |  3 ++-
 requirements.txt               |  3 ++-
 3 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/hoshino/modules/weibo/weibo.py b/hoshino/modules/weibo/weibo.py
index eb4d19c16..79d48f0ac 100644
--- a/hoshino/modules/weibo/weibo.py
+++ b/hoshino/modules/weibo/weibo.py
@@ -9,13 +9,23 @@
 from datetime import date, datetime, timedelta
 from time import sleep
 
-import requests
+import httpx as requests
 from lxml import etree
 from hoshino.service import Service, Privilege as Priv
 from hoshino import util, logger
 from hoshino.res import R
 
 sv = Service('weibo-poller', use_priv=Priv.ADMIN, manage_priv=Priv.SUPERUSER, visible=False)
+user_configs = util.load_config(__file__)
+'''
+sample config.json
+
+[{
+    "user_id": "6603867494",
+    "service_name": "pcr-weibo",
+    "filter": true
+}]
+'''
 
 class Weibo(object):
     def __init__(self, config):
@@ -83,20 +93,18 @@ def get_user_info(self, user_id):
 
     def validate_config(self, config):
         """验证配置是否正确"""
+        exist_argument_list = ['user_id', 'service_name']
+        true_false_argument_list = ['filter']
 
-        # 验证filter、original_pic_download、retweet_pic_download、original_video_download、retweet_video_download
-        argument_list = [
-            'filter'
-        ]
-        for argument in argument_list:
-            if config[argument] != 0 and config[argument] != 1:
-                logger.error(u'%s值应为0或1,请重新输入' % config[argument])
+        for argument in true_false_argument_list:
+            if argument not in config:
+                logger.error(f'请填写 {argument}')
+            if config[argument] != True and config[argument] != False:
+                logger.error(f'{argument} 值应为 True 或 False,请重新输入')
 
-        # 验证user_id_list
-        if "user_id" not in config:
-            logger.error(u'请填写用户 id')
-        if "service_name" not in config:
-            logger.error(u'请填写所属服务名')
+        for argument in exist_argument_list: 
+            if argument not in config:
+                logger.error(f'请填写 {argument}')
 
     def get_pics(self, weibo_info):
         """获取微博原始图片url"""
@@ -409,11 +417,10 @@ def get_latest_weibos(self):
             return []
 
 
-user_configs = util.load_config(__file__)
 subr_dic = {}
 
 for config in user_configs:
-    print(config)
+    sv.logger.debug(config)
     wb = Weibo(config)
     service_name = config["service_name"]
 
@@ -437,7 +444,7 @@ def wb_to_message(wb):
         msg = f'{msg}\n视频链接：{res_videos}'
     return msg
 
-@sv.scheduled_job('interval', seconds=20*60)
+@sv.scheduled_job('interval', seconds=10)
 async def weibo_poller():
     for sv_name, serviceObj in subr_dic.items():
         weibos = []
diff --git a/hoshino/res.py b/hoshino/res.py
index a071bc219..6803c6025 100644
--- a/hoshino/res.py
+++ b/hoshino/res.py
@@ -1,6 +1,7 @@
 import os
+import asyncio
 from PIL import Image
-import requests
+import httpx as requests
 from io import BytesIO
 from urllib.request import pathname2url
 from urllib.parse import urljoin
diff --git a/requirements.txt b/requirements.txt
index bccc5b948..e0907f59f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,5 @@ zhconv>=1.4.0
 Pillow>=6.2.1
 TwitterAPI>=2.5.10
 matplotlib>=3.2.0
-numpy>=1.18.0
\ No newline at end of file
+numpy>=1.18.0
+httpx>=0.12.1
\ No newline at end of file

From a7a1c350132740b8e766f93d640372788c74d371 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E8=A7=81=E6=80=9D?=
 <5361064+zzbslayer@user.noreply.gitee.com>
Date: Sat, 9 May 2020 03:46:02 +0800
Subject: [PATCH 04/12] Replace all sync requests with await/async

---
 hoshino/modules/weibo/__init__.py  |  61 ++++++++++++
 hoshino/modules/weibo/exception.py |  21 +++++
 hoshino/modules/weibo/weibo.py     | 143 +++++++++--------------------
 hoshino/res.py                     |  10 +-
 4 files changed, 133 insertions(+), 102 deletions(-)
 create mode 100644 hoshino/modules/weibo/__init__.py
 create mode 100644 hoshino/modules/weibo/exception.py

diff --git a/hoshino/modules/weibo/__init__.py b/hoshino/modules/weibo/__init__.py
new file mode 100644
index 000000000..84a130984
--- /dev/null
+++ b/hoshino/modules/weibo/__init__.py
@@ -0,0 +1,61 @@
+from .weibo import WeiboSpider
+from hoshino.service import Service, Privilege as Priv
+from hoshino.res import R
+from hoshino import util
+
+sv = Service('weibo-poller', use_priv=Priv.ADMIN, manage_priv=Priv.SUPERUSER, visible=False)
+user_configs = util.load_config(__file__)
+'''
+sample config.json
+
+[{
+    "user_id": "6603867494",
+    "service_name": "bcr-weibo",
+    "filter": true
+}]
+'''
+
+subr_dic = {}
+
+for config in user_configs:
+    sv.logger.debug(config)
+    wb_spider = WeiboSpider(config)
+    service_name = config["service_name"]
+
+    if service_name not in subr_dic:
+        subService = Service(service_name, enable_on_default=True)
+        subr_dic[service_name] = {"service": subService, "spiders": [wb_spider]}
+    else:
+        subr_dic[service_name]["spiders"].append(wb_spider)
+
+def wb_to_message(wb):
+    msg = f'@{wb["screen_name"]}:\n{wb["text"]}'
+    if sv.bot.config.IS_CQPRO and len(wb["pics"]) > 0:
+        images_url = wb["pics"]
+        msg = f'{msg}\n'
+        res_imgs = [R.remote_img(url).cqcode for url in images_url]
+        for img in res_imgs:
+            msg = f'{msg}{img}'
+    if len(wb["video_url"]) > 0:
+        videos = wb["video_url"]
+        res_videos = ';'.join(videos)
+        msg = f'{msg}\n视频链接：{res_videos}'
+    return msg
+
+@sv.scheduled_job('interval', seconds=20*60)
+async def weibo_poller():
+    for sv_name, serviceObj in subr_dic.items():
+        weibos = []
+        ssv = serviceObj["service"]
+        spiders = serviceObj["spiders"]
+        for spider in spiders:
+            latest_weibos = await spider.get_latest_weibos()
+            formatted_weibos = [wb_to_message(wb) for wb in latest_weibos]
+
+            if l := len(formatted_weibos):
+                sv.logger.info(f"成功获取@{spider.get_username()}的新微博{l}条")
+            else:
+                sv.logger.info(f"未检测到@{spider.get_username()}的新微博")
+
+            weibos.extend(formatted_weibos)
+        await ssv.broadcast(weibos, ssv.name, 0.5)
\ No newline at end of file
diff --git a/hoshino/modules/weibo/exception.py b/hoshino/modules/weibo/exception.py
new file mode 100644
index 000000000..5d1fab491
--- /dev/null
+++ b/hoshino/modules/weibo/exception.py
@@ -0,0 +1,21 @@
+class WeiboError(Exception):
+    def __init__(self, msg, *msgs):
+        self._msgs = [msg, *msgs]
+
+    def __str__(self):
+        return '\n'.join(self._msgs)
+
+    @property
+    def message(self):
+        return str(self)
+
+    def append(self, msg:str):
+        self._msgs.append(msg)
+
+
+class ParseError(WeiboError):
+    pass
+
+
+class NotFoundError(WeiboError):
+    pass
\ No newline at end of file
diff --git a/hoshino/modules/weibo/weibo.py b/hoshino/modules/weibo/weibo.py
index 79d48f0ac..384ddc67e 100644
--- a/hoshino/modules/weibo/weibo.py
+++ b/hoshino/modules/weibo/weibo.py
@@ -1,51 +1,42 @@
 # -*- coding: UTF-8 -*-
 
 import json
-import os
 import random
 import sys
-import traceback
 from collections import OrderedDict
 from datetime import date, datetime, timedelta
 from time import sleep
 
-import httpx as requests
+import httpx
 from lxml import etree
-from hoshino.service import Service, Privilege as Priv
-from hoshino import util, logger
-from hoshino.res import R
+from hoshino import logger
+from .exception import *
 
-sv = Service('weibo-poller', use_priv=Priv.ADMIN, manage_priv=Priv.SUPERUSER, visible=False)
-user_configs = util.load_config(__file__)
-'''
-sample config.json
-
-[{
-    "user_id": "6603867494",
-    "service_name": "pcr-weibo",
-    "filter": true
-}]
-'''
-
-class Weibo(object):
+class WeiboSpider(object):
     def __init__(self, config):
         """Weibo类初始化"""
         self.validate_config(config)
-        self.filter = config['filter']  
-        self.user = self.get_user_info(config["user_id"])
-
+        self.filter = config['filter'] 
+        self.user_id = config['user_id']
+        self.user = self.get_user_info(self.user_id)
         self.__recent = False
 
-    def get_json(self, params):
+    async def get_json(self, params):
         """获取网页中json数据"""
         url = 'https://m.weibo.cn/api/container/getIndex?'
-        r = requests.get(url, params=params)
+        async with httpx.AsyncClient() as client:
+            r = await client.get(url, params=params)
+            return r.json()
+
+    def sync_get_json(self, params):
+        url = 'https://m.weibo.cn/api/container/getIndex?'
+        r = httpx.get(url, params=params)
         return r.json()
 
     def get_user_info(self, user_id):
         """获取用户信息"""
         params = {'containerid': '100505' + str(user_id)}
-        js = self.get_json(params)
+        js = self.sync_get_json(params)
         if js['ok']:
             info = js['data']['userInfo']
             user_info = OrderedDict()
@@ -66,7 +57,7 @@ def get_user_info(self, user_id):
             ]
             for i in en_list:
                 user_info[i] = ''
-            js = self.get_json(params)
+            js = self.sync_get_json(params)
             if js['ok']:
                 cards = js['data']['cards']
                 if isinstance(cards, list) and len(cards) > 1:
@@ -98,13 +89,13 @@ def validate_config(self, config):
 
         for argument in true_false_argument_list:
             if argument not in config:
-                logger.error(f'请填写 {argument}')
+                raise NotFoundError(f'未找到参数{argument}')
             if config[argument] != True and config[argument] != False:
-                logger.error(f'{argument} 值应为 True 或 False,请重新输入')
+                raise ParseError(f'{argument} 值应为 True 或 False')
 
         for argument in exist_argument_list: 
             if argument not in config:
-                logger.error(f'请填写 {argument}')
+                raise NotFoundError(f'未找到参数{argument}')
 
     def get_pics(self, weibo_info):
         """获取微博原始图片url"""
@@ -305,32 +296,34 @@ def get_username(self):
         return self.user["screen_name"]
     
     def get_user_id(self):
-        return self.user["id"]
+        return self.user_id
 
-    def get_weibo_json(self, page):
+    async def get_weibo_json(self, page):
         """获取网页中微博json数据"""
         params = {
             'containerid': '107603' + self.get_user_id(),
             'page': page
         }
-        js = self.get_json(params)
+        js = await self.get_json(params)
         return js
 
-    def get_long_weibo(self, id):
+    async def get_long_weibo(self, id):
         """获取长微博"""
         for i in range(5):
             url = 'https://m.weibo.cn/detail/%s' % id
-            html = requests.get(url).text
-            html = html[html.find('"status":'):]
-            html = html[:html.rfind('"hotScheme"')]
-            html = html[:html.rfind(',')]
-            html = '{' + html + '}'
-            js = json.loads(html, strict=False)
-            weibo_info = js.get('status')
-            if weibo_info:
-                weibo = self.parse_weibo(weibo_info)
-                return weibo
-            sleep(random.randint(6, 10))
+            async with httpx.AsyncClient() as client:
+                html = await client.get(url)
+                html = html.text
+                html = html[html.find('"status":'):]
+                html = html[:html.rfind('"hotScheme"')]
+                html = html[:html.rfind(',')]
+                html = '{' + html + '}'
+                js = json.loads(html, strict=False)
+                weibo_info = js.get('status')
+                if weibo_info:
+                    weibo = self.parse_weibo(weibo_info)
+                    return weibo
+                sleep(random.randint(6, 10))
 
     def print_user_info(self):
         """打印用户信息"""
@@ -355,7 +348,7 @@ def print_user_info(self):
         logger.info(self.user['description'])
         logger.info('+' * 100)
 
-    def get_one_weibo(self, info):
+    async def get_one_weibo(self, info):
         """获取一条微博的全部信息"""
         try:
             weibo_info = info['mblog']
@@ -366,13 +359,13 @@ def get_one_weibo(self, info):
                 retweet_id = retweeted_status.get('id')
                 is_long_retweet = retweeted_status.get('isLongText')
                 if is_long:
-                    weibo = self.get_long_weibo(weibo_id)
+                    weibo = await self.get_long_weibo(weibo_id)
                     if not weibo:
                         weibo = self.parse_weibo(weibo_info)
                 else:
                     weibo = self.parse_weibo(weibo_info)
                 if is_long_retweet:
-                    retweet = self.get_long_weibo(retweet_id)
+                    retweet = await self.get_long_weibo(retweet_id)
                     if not retweet:
                         retweet = self.parse_weibo(retweeted_status)
                 else:
@@ -382,7 +375,7 @@ def get_one_weibo(self, info):
                 weibo['retweet'] = retweet
             else:  # 原创
                 if is_long:
-                    weibo = self.get_long_weibo(weibo_id)
+                    weibo = await self.get_long_weibo(weibo_id)
                     if not weibo:
                         weibo = self.parse_weibo(weibo_info)
                 else:
@@ -394,15 +387,15 @@ def get_one_weibo(self, info):
             logger.exception(e)
             self.__recent = False
 
-    def get_latest_weibos(self):
+    async def get_latest_weibos(self):
         try:
             latest_weibos = []
-            js = self.get_weibo_json(1)
+            js = await self.get_weibo_json(1)
             if js['ok']:
                 weibos = js['data']['cards']
                 for w in weibos:
                     if w['card_type'] == 9:
-                        wb = self.get_one_weibo(w)
+                        wb = await self.get_one_weibo(w)
                         if wb:
                             if not self.__recent:
                                 continue
@@ -414,50 +407,4 @@ def get_latest_weibos(self):
             return latest_weibos
         except Exception as e:
             logger.exception(e)
-            return []
-
-
-subr_dic = {}
-
-for config in user_configs:
-    sv.logger.debug(config)
-    wb = Weibo(config)
-    service_name = config["service_name"]
-
-    if service_name not in subr_dic:
-        subService = Service(service_name, enable_on_default=True)
-        subr_dic[service_name] = {"service": subService, "spiders": [wb]}
-    else:
-        subr_dic[service_name]["spiders"].append(wb)
-
-def wb_to_message(wb):
-    msg = f'@{wb["screen_name"]}:\n{wb["text"]}'
-    if sv.bot.config.IS_CQPRO and len(wb["pics"]) > 0:
-        images_url = wb["pics"]
-        msg = f'{msg}\n'
-        res_imgs = [R.remote_img(url).cqcode for url in images_url]
-        for img in res_imgs:
-            msg = f'{msg}{img}'
-    if len(wb["video_url"]) > 0:
-        videos = wb["video_url"]
-        res_videos = ';'.join(videos)
-        msg = f'{msg}\n视频链接：{res_videos}'
-    return msg
-
-@sv.scheduled_job('interval', seconds=10)
-async def weibo_poller():
-    for sv_name, serviceObj in subr_dic.items():
-        weibos = []
-        ssv = serviceObj["service"]
-        spiders = serviceObj["spiders"]
-        for spider in spiders:
-            latest_weibos = spider.get_latest_weibos()
-            formatted_weibos = [wb_to_message(wb) for wb in latest_weibos]
-
-            if l := len(formatted_weibos):
-                sv.logger.info(f"成功获取@{spider.get_username()}的新微博{l}条")
-            else:
-                sv.logger.info(f"未检测到@{spider.get_username()}的新微博")
-
-            weibos.extend(formatted_weibos)
-        await ssv.broadcast(weibos, ssv.name, 0.5)
\ No newline at end of file
+            return []
\ No newline at end of file
diff --git a/hoshino/res.py b/hoshino/res.py
index 6803c6025..d50fcd185 100644
--- a/hoshino/res.py
+++ b/hoshino/res.py
@@ -1,7 +1,7 @@
 import os
 import asyncio
 from PIL import Image
-import httpx as requests
+import httpx
 from io import BytesIO
 from urllib.request import pathname2url
 from urllib.parse import urljoin
@@ -39,9 +39,11 @@ class RemoteResImg(RemoteResObj):
     def cqcode(self) -> MessageSegment:
         return MessageSegment.image(self.url)
     
-    def open(self) -> Image:
-        response = requests.get(self.url)
-        return Image.open(BytesIO(response))
+    async def open(self) -> Image:
+        async with httpx.AsyncClient() as client:
+            r = await client.get(self.url)
+            response = requests.get(self.url)
+            return Image.open(BytesIO(response))
     
 
 class ResObj:

From 89e740c87e02be78c6b1ee10226115b6d4cbd70d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E8=A7=81=E6=80=9D?=
 <5361064+zzbslayer@user.noreply.gitee.com>
Date: Sat, 9 May 2020 13:25:52 +0800
Subject: [PATCH 05/12] Fix logic of latest weibo; modify the format of
 config.json

---
 hoshino/modules/weibo/__init__.py | 47 +++++++++++++++++++++----------
 hoshino/modules/weibo/weibo.py    |  9 +++++-
 2 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/hoshino/modules/weibo/__init__.py b/hoshino/modules/weibo/__init__.py
index 84a130984..a76a925fc 100644
--- a/hoshino/modules/weibo/__init__.py
+++ b/hoshino/modules/weibo/__init__.py
@@ -3,30 +3,39 @@
 from hoshino.res import R
 from hoshino import util
 
-sv = Service('weibo-poller', use_priv=Priv.ADMIN, manage_priv=Priv.SUPERUSER, visible=False)
-user_configs = util.load_config(__file__)
 '''
 sample config.json
 
 [{
-    "user_id": "6603867494",
     "service_name": "bcr-weibo",
-    "filter": true
+    "enable_on_default": true,
+    "users":[{
+        "user_id": "6603867494",
+        "filter": true
+    }]
+    
 }]
 '''
+def _load_config(services_config):
+    for sv_config in services_config:
+        sv.logger.debug(sv_config)
+        service_name = sv_config["service_name"]
+        enable_on_default = sv_config.get("enable_on_default", False)
+        users_config = sv_config["users"]
 
-subr_dic = {}
+        sv_spider_list = []
+        for user_config in users_config:
+            wb_spider = WeiboSpider(user_config)
+            sv_spider_list.append(wb_spider)
+        
+        subService = Service(service_name, enable_on_default=enable_on_default)
+        subr_dic[service_name] = {"service": subService, "spiders": sv_spider_list}
 
-for config in user_configs:
-    sv.logger.debug(config)
-    wb_spider = WeiboSpider(config)
-    service_name = config["service_name"]
 
-    if service_name not in subr_dic:
-        subService = Service(service_name, enable_on_default=True)
-        subr_dic[service_name] = {"service": subService, "spiders": [wb_spider]}
-    else:
-        subr_dic[service_name]["spiders"].append(wb_spider)
+sv = Service('weibo-poller', use_priv=Priv.ADMIN, manage_priv=Priv.SUPERUSER, visible=False)
+services_config = util.load_config(__file__)
+subr_dic = {}
+_load_config(services_config)
 
 def wb_to_message(wb):
     msg = f'@{wb["screen_name"]}:\n{wb["text"]}'
@@ -58,4 +67,12 @@ async def weibo_poller():
                 sv.logger.info(f"未检测到@{spider.get_username()}的新微博")
 
             weibos.extend(formatted_weibos)
-        await ssv.broadcast(weibos, ssv.name, 0.5)
\ No newline at end of file
+        await ssv.broadcast(weibos, ssv.name, 0.5)
+
+@sv.scheduled_job('interval', seconds=60*60*24)
+async def clear_spider_buffer():
+    sv.logger.info("Clearing weibo spider buffer...")
+    for sv_name, serviceObj in subr_dic.items():
+        spiders = serviceObj["spiders"]
+        for spider in spiders:
+            spider.clear_buffer()
\ No newline at end of file
diff --git a/hoshino/modules/weibo/weibo.py b/hoshino/modules/weibo/weibo.py
index 384ddc67e..6b22b0a5a 100644
--- a/hoshino/modules/weibo/weibo.py
+++ b/hoshino/modules/weibo/weibo.py
@@ -19,7 +19,11 @@ def __init__(self, config):
         self.filter = config['filter'] 
         self.user_id = config['user_id']
         self.user = self.get_user_info(self.user_id)
+        self.received_weibo_ids = []
         self.__recent = False
+    
+    def clear_buffer(self):
+        self.received_weibo_ids.clear()
 
     async def get_json(self, params):
         """获取网页中json数据"""
@@ -84,7 +88,7 @@ def get_user_info(self, user_id):
 
     def validate_config(self, config):
         """验证配置是否正确"""
-        exist_argument_list = ['user_id', 'service_name']
+        exist_argument_list = ['user_id']
         true_false_argument_list = ['filter']
 
         for argument in true_false_argument_list:
@@ -399,9 +403,12 @@ async def get_latest_weibos(self):
                         if wb:
                             if not self.__recent:
                                 continue
+                            if wb["id"] in self.received_weibo_ids:
+                                continue
                             if (not self.filter) or (
                                     'retweet' not in wb.keys()):
                                 latest_weibos.append(wb)
+                                self.received_weibo_ids.append(wb["id"])
                                 self.print_weibo(wb)
                             
             return latest_weibos

From f27e3ccd7a29258f05690f55dfb8afd2648de7c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E8=A7=81=E6=80=9D?=
 <5361064+zzbslayer@user.noreply.gitee.com>
Date: Sat, 9 May 2020 13:29:01 +0800
Subject: [PATCH 06/12] Small fix for async

---
 hoshino/modules/weibo/weibo.py | 3 ++-
 hoshino/res.py                 | 3 +--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/hoshino/modules/weibo/weibo.py b/hoshino/modules/weibo/weibo.py
index 6b22b0a5a..9105f3b2a 100644
--- a/hoshino/modules/weibo/weibo.py
+++ b/hoshino/modules/weibo/weibo.py
@@ -3,6 +3,7 @@
 import json
 import random
 import sys
+import asyncio
 from collections import OrderedDict
 from datetime import date, datetime, timedelta
 from time import sleep
@@ -327,7 +328,7 @@ async def get_long_weibo(self, id):
                 if weibo_info:
                     weibo = self.parse_weibo(weibo_info)
                     return weibo
-                sleep(random.randint(6, 10))
+                asyncio.sleep(random.randint(6, 10))
 
     def print_user_info(self):
         """打印用户信息"""
diff --git a/hoshino/res.py b/hoshino/res.py
index d50fcd185..6aace99c5 100644
--- a/hoshino/res.py
+++ b/hoshino/res.py
@@ -42,8 +42,7 @@ def cqcode(self) -> MessageSegment:
     async def open(self) -> Image:
         async with httpx.AsyncClient() as client:
             r = await client.get(self.url)
-            response = requests.get(self.url)
-            return Image.open(BytesIO(response))
+            return Image.open(BytesIO(r))
     
 
 class ResObj:

From b2d8595aec3d4c8000ae69110ff53450d28338e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E8=A7=81=E6=80=9D?=
 <5361064+zzbslayer@user.noreply.gitee.com>
Date: Sat, 9 May 2020 19:35:16 +0800
Subject: [PATCH 07/12] Call async func in __init__ with asyncio

---
 hoshino/modules/weibo/weibo.py | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/hoshino/modules/weibo/weibo.py b/hoshino/modules/weibo/weibo.py
index 9105f3b2a..2dbf9cb70 100644
--- a/hoshino/modules/weibo/weibo.py
+++ b/hoshino/modules/weibo/weibo.py
@@ -19,29 +19,24 @@ def __init__(self, config):
         self.validate_config(config)
         self.filter = config['filter'] 
         self.user_id = config['user_id']
-        self.user = self.get_user_info(self.user_id)
         self.received_weibo_ids = []
         self.__recent = False
+        asyncio.get_event_loop().run_until_complete(self._async_init())
+    
+    async def _async_init(self):
+        self.user = await self.get_user_info(self.user_id)
     
-    def clear_buffer(self):
-        self.received_weibo_ids.clear()
-
     async def get_json(self, params):
         """获取网页中json数据"""
         url = 'https://m.weibo.cn/api/container/getIndex?'
         async with httpx.AsyncClient() as client:
-            r = await client.get(url, params=params)
+            r = await client.get(url, params=params, timeout=10.0) # sometimes timeout
             return r.json()
 
-    def sync_get_json(self, params):
-        url = 'https://m.weibo.cn/api/container/getIndex?'
-        r = httpx.get(url, params=params)
-        return r.json()
-
-    def get_user_info(self, user_id):
+    async def get_user_info(self, user_id):
         """获取用户信息"""
         params = {'containerid': '100505' + str(user_id)}
-        js = self.sync_get_json(params)
+        js = await self.get_json(params)
         if js['ok']:
             info = js['data']['userInfo']
             user_info = OrderedDict()
@@ -62,7 +57,7 @@ def get_user_info(self, user_id):
             ]
             for i in en_list:
                 user_info[i] = ''
-            js = self.sync_get_json(params)
+            js =  await self.get_json(params)
             if js['ok']:
                 cards = js['data']['cards']
                 if isinstance(cards, list) and len(cards) > 1:
@@ -87,6 +82,9 @@ def get_user_info(self, user_id):
             user = self.standardize_info(user_info)
             return user
 
+    def clear_buffer(self):
+        self.received_weibo_ids.clear()
+
     def validate_config(self, config):
         """验证配置是否正确"""
         exist_argument_list = ['user_id']

From 125039971cae1a4cb94527743682188c4f763173 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E8=A7=81=E6=80=9D?=
 <5361064+zzbslayer@user.noreply.gitee.com>
Date: Mon, 11 May 2020 03:40:57 +0800
Subject: [PATCH 08/12] Handle retweet weibo

---
 hoshino/modules/weibo/__init__.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/hoshino/modules/weibo/__init__.py b/hoshino/modules/weibo/__init__.py
index a76a925fc..ecc5dac18 100644
--- a/hoshino/modules/weibo/__init__.py
+++ b/hoshino/modules/weibo/__init__.py
@@ -38,7 +38,15 @@ def _load_config(services_config):
 _load_config(services_config)
 
 def wb_to_message(wb):
-    msg = f'@{wb["screen_name"]}:\n{wb["text"]}'
+    msg = f'@{wb["screen_name"]}'
+    if "retweet" in wb:
+        msg = f'{msg} 转发:\n{wb["text"]}\n======================'
+        wb = wb["retweet"]
+    else:
+        msg = f'{msg}:'
+
+    msg = f'{msg}\n{wb["text"]}'
+
     if sv.bot.config.IS_CQPRO and len(wb["pics"]) > 0:
         images_url = wb["pics"]
         msg = f'{msg}\n'

From ebf0d71d99d2e2438bfb22fd03db68d3939e32b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E8=A7=81=E6=80=9D?=
 <5361064+zzbslayer@user.noreply.gitee.com>
Date: Mon, 11 May 2020 16:08:03 +0800
Subject: [PATCH 09/12] Users can fetch the latest 5 weibos forwardly by alias

---
 hoshino/modules/weibo/__init__.py  | 36 +++++++++++++++++++++++++++++-
 hoshino/modules/weibo/exception.py |  3 +++
 hoshino/modules/weibo/weibo.py     |  8 +++++++
 3 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/hoshino/modules/weibo/__init__.py b/hoshino/modules/weibo/__init__.py
index ecc5dac18..b75083417 100644
--- a/hoshino/modules/weibo/__init__.py
+++ b/hoshino/modules/weibo/__init__.py
@@ -2,6 +2,7 @@
 from hoshino.service import Service, Privilege as Priv
 from hoshino.res import R
 from hoshino import util
+from .exception import *
 
 '''
 sample config.json
@@ -11,6 +12,7 @@
     "enable_on_default": true,
     "users":[{
         "user_id": "6603867494",
+        "alias": ["公主连接", "公主连结", "公主链接"],
         "filter": true
     }]
     
@@ -21,20 +23,31 @@ def _load_config(services_config):
         sv.logger.debug(sv_config)
         service_name = sv_config["service_name"]
         enable_on_default = sv_config.get("enable_on_default", False)
+        
         users_config = sv_config["users"]
 
         sv_spider_list = []
         for user_config in users_config:
             wb_spider = WeiboSpider(user_config)
             sv_spider_list.append(wb_spider)
+            alias_list = user_config.get("alias", [])
+            for alias in alias_list:
+                if alias in alias_dic:
+                    raise DuplicateError(f"Alias {alias} is duplicate")
+                alias_dic[alias] = {
+                    "service_name":service_name, 
+                    "user_id":wb_spider.get_user_id()
+                    }
         
         subService = Service(service_name, enable_on_default=enable_on_default)
         subr_dic[service_name] = {"service": subService, "spiders": sv_spider_list}
 
-
+        
+        
 sv = Service('weibo-poller', use_priv=Priv.ADMIN, manage_priv=Priv.SUPERUSER, visible=False)
 services_config = util.load_config(__file__)
 subr_dic = {}
+alias_dic = {}
 _load_config(services_config)
 
 def wb_to_message(wb):
@@ -59,6 +72,27 @@ def wb_to_message(wb):
         msg = f'{msg}\n视频链接：{res_videos}'
     return msg
 
+# @bot 看微博 alias
+@sv.on_command('看微博', only_to_me=True)
+async def get_last_5_weibo(session):
+    alias = session.current_arg_text
+    if alias not in alias_dic:
+        await session.finish(f"未找到微博: {alias}")
+        return
+    service_name = alias_dic[alias]["service_name"]
+    user_id = alias_dic[alias]["user_id"]
+
+    spiders = subr_dic[service_name]["spiders"]
+    for spider in spiders:
+        if spider.get_user_id() == user_id:
+            last_5_weibos = spider.get_last_5_weibos()
+            formatted_weibos = [wb_to_message(wb) for wb in last_5_weibos]
+            for wb in formatted_weibos:
+                await session.send(wb)
+            await session.finish(f"以上为 {alias} 的最新 {len(formatted_weibos)} 条微博")
+            return
+    await session.finish(f"未找到微博: {alias}")
+
 @sv.scheduled_job('interval', seconds=20*60)
 async def weibo_poller():
     for sv_name, serviceObj in subr_dic.items():
diff --git a/hoshino/modules/weibo/exception.py b/hoshino/modules/weibo/exception.py
index 5d1fab491..bda1619b8 100644
--- a/hoshino/modules/weibo/exception.py
+++ b/hoshino/modules/weibo/exception.py
@@ -18,4 +18,7 @@ class ParseError(WeiboError):
 
 
 class NotFoundError(WeiboError):
+    pass
+
+class DuplicateError(WeiboError):
     pass
\ No newline at end of file
diff --git a/hoshino/modules/weibo/weibo.py b/hoshino/modules/weibo/weibo.py
index 2dbf9cb70..a91a54280 100644
--- a/hoshino/modules/weibo/weibo.py
+++ b/hoshino/modules/weibo/weibo.py
@@ -20,6 +20,7 @@ def __init__(self, config):
         self.filter = config['filter'] 
         self.user_id = config['user_id']
         self.received_weibo_ids = []
+        self.last_5_weibos = []
         self.__recent = False
         asyncio.get_event_loop().run_until_complete(self._async_init())
     
@@ -301,6 +302,9 @@ def get_username(self):
     def get_user_id(self):
         return self.user_id
 
+    def get_last_5_weibos(self):
+        return self.last_5_weibos
+
     async def get_weibo_json(self, page):
         """获取网页中微博json数据"""
         params = {
@@ -406,6 +410,10 @@ async def get_latest_weibos(self):
                                 continue
                             if (not self.filter) or (
                                     'retweet' not in wb.keys()):
+                                if len(self.last_5_weibos) == 5:
+                                    self.last_5_weibos.pop(0)
+                                self.last_5_weibos.append(wb)
+
                                 latest_weibos.append(wb)
                                 self.received_weibo_ids.append(wb["id"])
                                 self.print_weibo(wb)

From f0f8941993b9d952377f8612bb16879d89249027 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E8=A7=81=E6=80=9D?=
 <5361064+zzbslayer@user.noreply.gitee.com>
Date: Thu, 21 May 2020 21:35:08 +0800
Subject: [PATCH 10/12] Fix parsing logic for url in weibo text

---
 hoshino/modules/weibo/__init__.py |  9 ++++++
 hoshino/modules/weibo/weibo.py    | 50 ++++++++++++++++++++++---------
 2 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/hoshino/modules/weibo/__init__.py b/hoshino/modules/weibo/__init__.py
index b75083417..1b781b26e 100644
--- a/hoshino/modules/weibo/__init__.py
+++ b/hoshino/modules/weibo/__init__.py
@@ -18,6 +18,9 @@
     
 }]
 '''
+
+lmt = util.FreqLimiter(5)
+
 def _load_config(services_config):
     for sv_config in services_config:
         sv.logger.debug(sv_config)
@@ -70,11 +73,17 @@ def wb_to_message(wb):
         videos = wb["video_url"]
         res_videos = ';'.join(videos)
         msg = f'{msg}\n视频链接：{res_videos}'
+
     return msg
 
 # @bot 看微博 alias
 @sv.on_command('看微博', only_to_me=True)
 async def get_last_5_weibo(session):
+    uid = session.ctx['user_id']
+    if not lmt.check(uid):
+        session.finish('您查询得过于频繁，请稍等片刻', at_sender=True)
+    lmt.start_cd(uid)
+
     alias = session.current_arg_text
     if alias not in alias_dic:
         await session.finish(f"未找到微博: {alias}")
diff --git a/hoshino/modules/weibo/weibo.py b/hoshino/modules/weibo/weibo.py
index a91a54280..65c8319f5 100644
--- a/hoshino/modules/weibo/weibo.py
+++ b/hoshino/modules/weibo/weibo.py
@@ -25,7 +25,10 @@ def __init__(self, config):
         asyncio.get_event_loop().run_until_complete(self._async_init())
     
     async def _async_init(self):
+        self.__init = True
         self.user = await self.get_user_info(self.user_id)
+        await self.get_latest_weibos()
+        self.__init = False
     
     async def get_json(self, params):
         """获取网页中json数据"""
@@ -158,16 +161,6 @@ def get_location(self, selector):
                     break
         return location
 
-    def get_article_url(self, selector):
-        """获取微博中头条文章的url"""
-        article_url = ''
-        text = selector.xpath('string(.)')
-        if text.startswith(u'发布了头条文章'):
-            url = selector.xpath('//a/@data-url')
-            if url and url[0].startswith('http://t.cn'):
-                article_url = url[0]
-        return article_url
-
     def get_topics(self, selector):
         """获取参与的微博话题"""
         span_list = selector.xpath("//span[@class='surl-text']")
@@ -193,6 +186,27 @@ def get_at_users(self, selector):
             at_users = ','.join(at_list)
         return at_users
 
+    def get_text(self, text_body):
+        selector = etree.HTML(text_body)
+        url_lists = selector.xpath('//a[@data-url]/@data-url')
+        url_elems = selector.xpath('//a[@data-url]/span[@class="surl-text"]')
+
+        '''
+        Add the url of <a/> to the text of <a/>
+        For example:
+            <a data-url="http://t.cn/A622uDbW" href="https://weibo.com/ttarticle/p/show?id=2309404507062473195617">
+            <span class=\'url-icon\'>
+            <img style=\'width: 1rem;height: 1rem\' src=\'https://h5.sinaimg.cn/upload/2015/09/25/3/timeline_card_small_article_default.png\'></span>
+            <span class="surl-text">本地化笔记第三期——剧情活动排期调整及版本更新内容前瞻</span>
+            </a>
+
+            replace <span class="surl-text">本地化笔记第三期——剧情活动排期调整及版本更新内容前瞻</span>
+            with <span class="surl-text">本地化笔记第三期——剧情活动排期调整及版本更新内容前瞻(http://t.cn/A622uDbW)</span>
+        '''
+        for i in range(0, len(url_lists)):
+            url_elems[i].text = f'{url_elems[i].text}({url_lists[i]})'
+        return selector.xpath('string(.)')
+
     def string_to_int(self, string):
         """字符串转换为整数"""
         if isinstance(string, int):
@@ -217,11 +231,17 @@ def standardize_date(self, created_at):
             hour = created_at[:created_at.find(u"小时")]
             hour = timedelta(hours=int(hour))
             created_at = (datetime.now() - hour).strftime("%Y-%m-%d")
-            self.__recent = False
+            if self.__init:
+                self.__recent = True
+            else:
+                self.__recent = False
         elif u"昨天" in created_at:
             day = timedelta(days=1)
             created_at = (datetime.now() - day).strftime("%Y-%m-%d")
-            self.__recent = False
+            if self.__init:
+                self.__recent = True
+            else:
+                self.__recent = False
         elif created_at.count('-') == 1:
             year = datetime.now().strftime("%Y")
             created_at = year + "-" + created_at
@@ -250,8 +270,10 @@ def parse_weibo(self, weibo_info):
         weibo['bid'] = weibo_info['bid']
         text_body = weibo_info['text']
         selector = etree.HTML(text_body)
-        weibo['text'] = etree.HTML(text_body).xpath('string(.)')
-        weibo['article_url'] = self.get_article_url(selector)
+
+        
+        weibo['text'] = self.get_text(text_body)
+
         weibo['pics'] = self.get_pics(weibo_info)
         weibo['video_url'] = self.get_video_url(weibo_info)
         weibo['location'] = self.get_location(selector)

From c1dcef7330be571dc8cfabc06d69c7d2236bfdd3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E8=A7=81=E6=80=9D?=
 <5361064+zzbslayer@user.noreply.gitee.com>
Date: Thu, 21 May 2020 23:32:31 +0800
Subject: [PATCH 11/12] Add img of article to image list

---
 hoshino/modules/weibo/weibo.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/hoshino/modules/weibo/weibo.py b/hoshino/modules/weibo/weibo.py
index 65c8319f5..254d2924f 100644
--- a/hoshino/modules/weibo/weibo.py
+++ b/hoshino/modules/weibo/weibo.py
@@ -111,6 +111,12 @@ def get_pics(self, weibo_info):
             pic_list = [pic['large']['url'] for pic in pic_info]
         else:
             pic_list = []
+
+        """获取文章封面图片url"""
+        if 'page_info' in weibo_info and weibo_info['page_info']['type'] == 'article':
+            if 'page_pic' in weibo_info['page_info']:
+                pic_list.append(weibo_info['page_info']['page_pic']['url'])
+
         return pic_list
 
     def get_live_photo(self, weibo_info):

From 466eedf394743fd1baf52a2b9355563b572b0f76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E8=A7=81=E6=80=9D?=
 <5361064+zzbslayer@user.noreply.gitee.com>
Date: Fri, 29 May 2020 03:07:40 +0800
Subject: [PATCH 12/12] Check weibo config from bot; other small fix

---
 hoshino/modules/weibo/__init__.py | 30 +++++++++++++++++++++++++-----
 hoshino/modules/weibo/weibo.py    | 12 ++++++++++--
 2 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/hoshino/modules/weibo/__init__.py b/hoshino/modules/weibo/__init__.py
index 1b781b26e..9b4ac21d2 100644
--- a/hoshino/modules/weibo/__init__.py
+++ b/hoshino/modules/weibo/__init__.py
@@ -8,7 +8,7 @@
 sample config.json
 
 [{
-    "service_name": "bcr-weibo",
+    "service_name": "weibo-bcr",
     "enable_on_default": true,
     "users":[{
         "user_id": "6603867494",
@@ -47,7 +47,7 @@ def _load_config(services_config):
 
         
         
-sv = Service('weibo-poller', use_priv=Priv.ADMIN, manage_priv=Priv.SUPERUSER, visible=False)
+sv = Service('weibo-poller', manage_priv=Priv.SUPERUSER, visible=False)
 services_config = util.load_config(__file__)
 subr_dic = {}
 alias_dic = {}
@@ -76,18 +76,38 @@ def wb_to_message(wb):
 
     return msg
 
+weibo_url_prefix = "https://weibo.com/u"
+@sv.on_command('weibo-config',aliases=('查看微博服务', '微博服务', '微博配置', '查看微博配置'))
+async def weibo_config(session):
+    msg = '微博推送配置：服务名，别名，微博链接'
+    index = 1
+    for service_config in services_config:
+        service_name = service_config['service_name']
+        users_config = service_config['users']
+        for user_config in users_config:
+            weibo_id =  user_config['user_id']
+            alias = user_config['alias']
+            weibo_url = f'{weibo_url_prefix}/{weibo_id}'
+            msg = f'{msg}\n{index}. {service_name}, {alias}, {weibo_url}'
+            index+=1
+    session.finish(msg)
+
+
 # @bot 看微博 alias
 @sv.on_command('看微博', only_to_me=True)
 async def get_last_5_weibo(session):
     uid = session.ctx['user_id']
     if not lmt.check(uid):
         session.finish('您查询得过于频繁，请稍等片刻', at_sender=True)
+        return
+
     lmt.start_cd(uid)
 
     alias = session.current_arg_text
     if alias not in alias_dic:
-        await session.finish(f"未找到微博: {alias}")
+        session.finish(f"未找到微博: {alias}")
         return
+
     service_name = alias_dic[alias]["service_name"]
     user_id = alias_dic[alias]["user_id"]
 
@@ -98,9 +118,9 @@ async def get_last_5_weibo(session):
             formatted_weibos = [wb_to_message(wb) for wb in last_5_weibos]
             for wb in formatted_weibos:
                 await session.send(wb)
-            await session.finish(f"以上为 {alias} 的最新 {len(formatted_weibos)} 条微博")
+            session.finish(f"以上为 {alias} 的最新 {len(formatted_weibos)} 条微博")
             return
-    await session.finish(f"未找到微博: {alias}")
+    session.finish(f"未找到微博: {alias}")
 
 @sv.scheduled_job('interval', seconds=20*60)
 async def weibo_poller():
diff --git a/hoshino/modules/weibo/weibo.py b/hoshino/modules/weibo/weibo.py
index 254d2924f..7d2e18183 100644
--- a/hoshino/modules/weibo/weibo.py
+++ b/hoshino/modules/weibo/weibo.py
@@ -87,7 +87,12 @@ async def get_user_info(self, user_id):
             return user
 
     def clear_buffer(self):
-        self.received_weibo_ids.clear()
+        """
+        如果清理缓存前一分钟，该微博账号瞬间发送了 20 条微博
+        然后清理缓存仅仅保留后 10 条的微博id，因此可能会重复推送前 10 条微博
+        当然这种情况通常不会发生
+        """
+        self.received_weibo_ids = self.received_weibo_ids[-10:]
 
     def validate_config(self, config):
         """验证配置是否正确"""
@@ -251,7 +256,10 @@ def standardize_date(self, created_at):
         elif created_at.count('-') == 1:
             year = datetime.now().strftime("%Y")
             created_at = year + "-" + created_at
-            self.__recent = False
+            if self.__init:
+                self.__recent = True
+            else:
+                self.__recent = False
         return created_at
 
     def standardize_info(self, weibo):