From 0118621a7998ae68d10f569bb6a8d6dbe93e5ee4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=BF=9F=E6=8C=81=E6=B1=9F?= <129171955+2513502304@users.noreply.github.com> Date: Fri, 10 Jan 2025 19:20:01 +0800 Subject: [PATCH] =?UTF-8?q?=E5=B0=86=E5=BE=AE=E5=8D=9A=E8=AF=84=E8=AE=BA?= =?UTF-8?q?=E7=88=AC=E5=8F=96=E5=87=BD=E6=95=B0get=5Fnote=5Fall=5Fcomments?= =?UTF-8?q?=E7=9A=84max=5Fid=5Ftype=E4=BE=BF=E4=B8=BA=E5=8F=AF=E5=8F=98?= =?UTF-8?q?=E8=AF=B7=E6=B1=82=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 除了原先的max_id参数外,max_id_type参数也附加在上一次api结果的解析中,初始为0,但随着获取的评论越来越多,会更改为1。 此外,修改了WeiboClient类的request函数,将返回的ok_code异常处理进行了优化,细分为0,1,else...。这样即便获取到的max_id和max_id_type为None,也不会触发像'>' not supported between instances of 'NoneType' and 'int'这样模棱两可的异常提示,方便溯源问题所在,即api响应错误。 对于评论的数据获取不全的情况,在浏览器中获取显示的评论数量为1000+,更改此次提交前获取的个数为308条,更改后为319条,使用网页端打开手动刷评论的最后一条和程序获取的最后一条内容一致,可能是微博默认开启的精选评论功能导致无法获取所有的微博... --- media_platform/weibo/client.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/media_platform/weibo/client.py b/media_platform/weibo/client.py index e1deec7..d94be39 100644 --- a/media_platform/weibo/client.py +++ b/media_platform/weibo/client.py @@ -63,10 +63,13 @@ class WeiboClient: data: Dict = response.json() ok_code = data.get("ok") - if ok_code not in [0, 1]: + if ok_code == 0: # response error utils.logger.error(f"[WeiboClient.request] request {method}:{url} err, res:{data}") - raise DataFetchError(data.get("msg", "unkonw error")) - else: + raise DataFetchError(data.get("msg", "response error")) + elif ok_code != 1: # unknown error + utils.logger.error(f"[WeiboClient.request] request {method}:{url} err, res:{data}") + raise DataFetchError(data.get("msg", "unknown error")) + else: # response right return data.get("data", {}) async def get(self, uri: str, params=None, headers=None, **kwargs) -> Union[Response, Dict]: @@ -127,31 +130,34 @@ class WeiboClient: } return await self.get(uri, params) - async def get_note_comments(self, mid_id: str, max_id: int) -> Dict: + async def get_note_comments(self, mid_id: str, max_id: int, max_id_type: int = 0) -> Dict: """get notes comments :param mid_id: 微博ID :param max_id: 分页参数ID + :param max_id_type: 分页参数ID类型 :return: """ uri = "/comments/hotflow" params = { "id": mid_id, "mid": mid_id, - "max_id_type": 0, + "max_id_type": max_id_type, } if max_id > 0: params.update({"max_id": max_id}) - referer_url = f"https://m.weibo.cn/detail/{mid_id}" headers = copy.copy(self.headers) headers["Referer"] = referer_url return await self.get(uri, params, headers=headers) - async def get_note_all_comments(self, note_id: str, crawl_interval: float = 1.0, - callback: Optional[Callable] = None, - max_count: int = 10, - ): + async def get_note_all_comments( + self, + note_id: str, + crawl_interval: float = 1.0, + callback: Optional[Callable] = None, + max_count: int = 10, + ): """ get note all comments include sub comments :param note_id: @@ -160,13 +166,14 @@ class WeiboClient: :param max_count: :return: """ - result = [] is_end = False max_id = -1 + max_id_type = 0 while not is_end and len(result) < max_count: - comments_res = await self.get_note_comments(note_id, max_id) + comments_res = await self.get_note_comments(note_id, max_id, max_id_type) max_id: int = comments_res.get("max_id") + max_id_type: int = comments_res.get("max_id_type") comment_list: List[Dict] = comments_res.get("data", []) is_end = max_id == 0 if len(result) + len(comment_list) > max_count: