diff --git a/media_platform/bilibili/core.py b/media_platform/bilibili/core.py index de2809a..649fe19 100644 --- a/media_platform/bilibili/core.py +++ b/media_platform/bilibili/core.py @@ -161,7 +161,13 @@ class BilibiliCrawler(AbstractCrawler): video_list: List[Dict] = videos_res.get("result") semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM) - task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list] + task_list = [] + try: + task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list] + except Exception as e : + utils.logger.warning( + f"[BilibiliCrawler.search] error in the task list. The video for this page will not be included. {e}" + ) video_items = await asyncio.gather(*task_list) for video_item in video_items: if video_item: @@ -199,7 +205,11 @@ class BilibiliCrawler(AbstractCrawler): video_list: List[Dict] = videos_res.get("result") semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM) - task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list] + task_list = [] + try: + task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list] + finally: + pass video_items = await asyncio.gather(*task_list) for video_item in video_items: if video_item: diff --git a/media_platform/douyin/core.py b/media_platform/douyin/core.py index 426b33b..78c2a90 100644 --- a/media_platform/douyin/core.py +++ b/media_platform/douyin/core.py @@ -108,6 +108,9 @@ class DouYinCrawler(AbstractCrawler): publish_time=PublishTimeType(config.PUBLISH_TIME_TYPE), search_id=dy_search_id ) + if posts_res.get("data") is None or posts_res.get("data") == []: + utils.logger.info(f"[DouYinCrawler.search] search douyin keyword: {keyword}, page: {page} is empty,{posts_res.get('data')}`") + break except DataFetchError: utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed") break diff --git a/media_platform/xhs/client.py b/media_platform/xhs/client.py index f3f0828..c1757f2 100644 --- a/media_platform/xhs/client.py +++ b/media_platform/xhs/client.py @@ -415,6 +415,12 @@ class XiaoHongShuClient(AbstractApiClient): num=10, cursor=sub_comment_cursor, ) + + if comments_res is None: + utils.logger.info( + f"[XiaoHongShuClient.get_comments_all_sub_comments] No response found for note_id: {note_id}" + ) + continue sub_comment_has_more = comments_res.get("has_more", False) sub_comment_cursor = comments_res.get("cursor", "") if "comments" not in comments_res: diff --git a/media_platform/zhihu/help.py b/media_platform/zhihu/help.py index b1454b2..1513429 100644 --- a/media_platform/zhihu/help.py +++ b/media_platform/zhihu/help.py @@ -192,15 +192,21 @@ class ZhihuExtractor: """ res = ZhihuCreator() - if not author: - return res - if not author.get("id"): - author = author.get("member") - res.user_id = author.get("id") - res.user_link = f"{zhihu_constant.ZHIHU_URL}/people/{author.get('url_token')}" - res.user_nickname = author.get("name") - res.user_avatar = author.get("avatar_url") - res.url_token = author.get("url_token") + try: + if not author: + return res + if not author.get("id"): + author = author.get("member") + res.user_id = author.get("id") + res.user_link = f"{zhihu_constant.ZHIHU_URL}/people/{author.get('url_token')}" + res.user_nickname = author.get("name") + res.user_avatar = author.get("avatar_url") + res.url_token = author.get("url_token") + + except Exception as e : + utils.logger.warning( + f"[ZhihuExtractor._extract_content_or_comment_author] User Maybe Blocked. {e}" + ) return res def extract_comments(self, page_content: ZhihuContent, comments: List[Dict]) -> List[ZhihuComment]: