From 6b6e2b8ba0f83f98ffa9719acb2336ac92ed144e Mon Sep 17 00:00:00 2001 From: crpa33 Date: Thu, 27 Mar 2025 23:18:01 +0800 Subject: [PATCH 1/7] =?UTF-8?q?=E4=BF=AE=E5=A4=8DNoneType=E5=AF=BC?= =?UTF-8?q?=E8=87=B4=E7=9A=84=E6=8E=A8=E5=AF=BC=E5=BC=8F=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- media_platform/bilibili/core.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/media_platform/bilibili/core.py b/media_platform/bilibili/core.py index de2809a..5b8624b 100644 --- a/media_platform/bilibili/core.py +++ b/media_platform/bilibili/core.py @@ -161,7 +161,11 @@ class BilibiliCrawler(AbstractCrawler): video_list: List[Dict] = videos_res.get("result") semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM) - task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list] + task_list = [] + try: + task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list] + finally: + pass video_items = await asyncio.gather(*task_list) for video_item in video_items: if video_item: @@ -199,7 +203,11 @@ class BilibiliCrawler(AbstractCrawler): video_list: List[Dict] = videos_res.get("result") semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM) - task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list] + task_list = [] + try: + task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list] + finally: + pass video_items = await asyncio.gather(*task_list) for video_item in video_items: if video_item: From 3c72fc48b030862a4e98154d79244abc53616760 Mon Sep 17 00:00:00 2001 From: crpa33 Date: Thu, 27 Mar 2025 23:22:47 +0800 Subject: [PATCH 2/7] =?UTF-8?q?=E4=BF=9D=E6=8A=A4author=E4=B8=BANone?= =?UTF-8?q?=E4=BD=86=E6=9C=AA=E8=A2=AB=E8=AF=86=E5=88=AB=E7=9A=84=E6=83=85?= =?UTF-8?q?=E5=86=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- media_platform/zhihu/help.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/media_platform/zhihu/help.py b/media_platform/zhihu/help.py index b1454b2..34381e3 100644 --- a/media_platform/zhihu/help.py +++ b/media_platform/zhihu/help.py @@ -192,15 +192,18 @@ class ZhihuExtractor: """ res = ZhihuCreator() - if not author: - return res - if not author.get("id"): - author = author.get("member") - res.user_id = author.get("id") - res.user_link = f"{zhihu_constant.ZHIHU_URL}/people/{author.get('url_token')}" - res.user_nickname = author.get("name") - res.user_avatar = author.get("avatar_url") - res.url_token = author.get("url_token") + try: + if not author: + return res + if not author.get("id"): + author = author.get("member") + res.user_id = author.get("id") + res.user_link = f"{zhihu_constant.ZHIHU_URL}/people/{author.get('url_token')}" + res.user_nickname = author.get("name") + res.user_avatar = author.get("avatar_url") + res.url_token = author.get("url_token") + finally: + pass return res def extract_comments(self, page_content: ZhihuContent, comments: List[Dict]) -> List[ZhihuComment]: From 2c4af2337ed356e7d70aabf9356743c3f309ca49 Mon Sep 17 00:00:00 2001 From: crpa33 Date: Thu, 27 Mar 2025 23:32:21 +0800 Subject: [PATCH 3/7] =?UTF-8?q?douyin=E6=90=9C=E7=B4=A2=E9=A1=B5=E4=B8=BA?= =?UTF-8?q?=E7=A9=BA=E8=B7=B3=E4=B8=8B=E4=B8=80=E5=85=B3=E9=94=AE=E8=AF=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 预计页数没到,空了也跳 --- media_platform/douyin/core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/media_platform/douyin/core.py b/media_platform/douyin/core.py index 426b33b..78c2a90 100644 --- a/media_platform/douyin/core.py +++ b/media_platform/douyin/core.py @@ -108,6 +108,9 @@ class DouYinCrawler(AbstractCrawler): publish_time=PublishTimeType(config.PUBLISH_TIME_TYPE), search_id=dy_search_id ) + if posts_res.get("data") is None or posts_res.get("data") == []: + utils.logger.info(f"[DouYinCrawler.search] search douyin keyword: {keyword}, page: {page} is empty,{posts_res.get('data')}`") + break except DataFetchError: utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed") break From eaf14721f8f355ffde9a076f175715cceb71a688 Mon Sep 17 00:00:00 2001 From: crpa33 Date: Wed, 2 Apr 2025 11:48:36 +0800 Subject: [PATCH 4/7] =?UTF-8?q?=E8=BE=93=E5=87=BA=E5=88=B0=E6=97=A5?= =?UTF-8?q?=E5=BF=97-NoneType=E5=AF=BC=E8=87=B4=E7=9A=84=E6=8E=A8=E5=AF=BC?= =?UTF-8?q?=E5=BC=8F=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- media_platform/bilibili/core.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/media_platform/bilibili/core.py b/media_platform/bilibili/core.py index 5b8624b..cf6cde6 100644 --- a/media_platform/bilibili/core.py +++ b/media_platform/bilibili/core.py @@ -164,8 +164,10 @@ class BilibiliCrawler(AbstractCrawler): task_list = [] try: task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list] - finally: - pass + except Exception as e : + utils.logger.error( + f"[BilibiliCrawler.search] {comments_res}" + ) video_items = await asyncio.gather(*task_list) for video_item in video_items: if video_item: From 413d91a520ae413707e2781b40edb20d5c70d347 Mon Sep 17 00:00:00 2001 From: crpa33 Date: Wed, 2 Apr 2025 11:52:36 +0800 Subject: [PATCH 5/7] =?UTF-8?q?=E8=BE=93=E5=87=BA=E5=88=B0=E6=97=A5?= =?UTF-8?q?=E5=BF=97-author=E8=A2=AB=E5=B0=81=E7=A6=81=E6=88=96=E5=AD=98?= =?UTF-8?q?=E5=9C=A8=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- media_platform/zhihu/help.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/media_platform/zhihu/help.py b/media_platform/zhihu/help.py index 34381e3..1513429 100644 --- a/media_platform/zhihu/help.py +++ b/media_platform/zhihu/help.py @@ -202,8 +202,11 @@ class ZhihuExtractor: res.user_nickname = author.get("name") res.user_avatar = author.get("avatar_url") res.url_token = author.get("url_token") - finally: - pass + + except Exception as e : + utils.logger.warning( + f"[ZhihuExtractor._extract_content_or_comment_author] User Maybe Blocked. {e}" + ) return res def extract_comments(self, page_content: ZhihuContent, comments: List[Dict]) -> List[ZhihuComment]: From a39b571d271328cad69b03b7a640ec7499cc9727 Mon Sep 17 00:00:00 2001 From: crpa33 Date: Wed, 2 Apr 2025 11:57:28 +0800 Subject: [PATCH 6/7] =?UTF-8?q?=E8=BE=93=E5=87=BA=E5=88=B0=E6=97=A5?= =?UTF-8?q?=E5=BF=97-=E5=A4=84=E7=90=86=E8=A7=86=E9=A2=91=E6=90=9C?= =?UTF-8?q?=E7=B4=A2=E9=A1=B5=E4=BB=BB=E5=8A=A1=E5=88=97=E8=A1=A8=E6=9E=84?= =?UTF-8?q?=E9=80=A0=E7=9A=84=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- media_platform/bilibili/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/media_platform/bilibili/core.py b/media_platform/bilibili/core.py index cf6cde6..649fe19 100644 --- a/media_platform/bilibili/core.py +++ b/media_platform/bilibili/core.py @@ -165,8 +165,8 @@ class BilibiliCrawler(AbstractCrawler): try: task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list] except Exception as e : - utils.logger.error( - f"[BilibiliCrawler.search] {comments_res}" + utils.logger.warning( + f"[BilibiliCrawler.search] error in the task list. The video for this page will not be included. {e}" ) video_items = await asyncio.gather(*task_list) for video_item in video_items: From 274d64aefcba42bb4fa02989b4a0c6aec80657d6 Mon Sep 17 00:00:00 2001 From: crpa33 Date: Wed, 2 Apr 2025 11:59:27 +0800 Subject: [PATCH 7/7] =?UTF-8?q?=E5=A4=84=E7=90=86xhs=E6=84=8F=E5=A4=96?= =?UTF-8?q?=E7=9A=84=E8=AF=84=E8=AE=BA=E4=BF=A1=E6=81=AF=E4=B8=BA=E7=A9=BA?= =?UTF-8?q?=E7=9A=84=E6=83=85=E5=86=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 报错就会打断我,我没辙 --- media_platform/xhs/client.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/media_platform/xhs/client.py b/media_platform/xhs/client.py index f3f0828..c1757f2 100644 --- a/media_platform/xhs/client.py +++ b/media_platform/xhs/client.py @@ -415,6 +415,12 @@ class XiaoHongShuClient(AbstractApiClient): num=10, cursor=sub_comment_cursor, ) + + if comments_res is None: + utils.logger.info( + f"[XiaoHongShuClient.get_comments_all_sub_comments] No response found for note_id: {note_id}" + ) + continue sub_comment_has_more = comments_res.get("has_more", False) sub_comment_cursor = comments_res.get("cursor", "") if "comments" not in comments_res: