Merge pull request #572 from crpa33/main

避免预料之外的数据为None的情况终止进程
This commit is contained in:
程序员阿江(Relakkes) 2025-04-02 13:34:41 +08:00 committed by GitHub
commit afbd4ec1bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 36 additions and 11 deletions

View File

@ -161,7 +161,13 @@ class BilibiliCrawler(AbstractCrawler):
video_list: List[Dict] = videos_res.get("result")
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
task_list = []
try:
task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
except Exception as e :
utils.logger.warning(
f"[BilibiliCrawler.search] error in the task list. The video for this page will not be included. {e}"
)
video_items = await asyncio.gather(*task_list)
for video_item in video_items:
if video_item:
@ -199,7 +205,11 @@ class BilibiliCrawler(AbstractCrawler):
video_list: List[Dict] = videos_res.get("result")
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
task_list = []
try:
task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
finally:
pass
video_items = await asyncio.gather(*task_list)
for video_item in video_items:
if video_item:

View File

@ -108,6 +108,9 @@ class DouYinCrawler(AbstractCrawler):
publish_time=PublishTimeType(config.PUBLISH_TIME_TYPE),
search_id=dy_search_id
)
if posts_res.get("data") is None or posts_res.get("data") == []:
utils.logger.info(f"[DouYinCrawler.search] search douyin keyword: {keyword}, page: {page} is empty,{posts_res.get('data')}`")
break
except DataFetchError:
utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed")
break

View File

@ -415,6 +415,12 @@ class XiaoHongShuClient(AbstractApiClient):
num=10,
cursor=sub_comment_cursor,
)
if comments_res is None:
utils.logger.info(
f"[XiaoHongShuClient.get_comments_all_sub_comments] No response found for note_id: {note_id}"
)
continue
sub_comment_has_more = comments_res.get("has_more", False)
sub_comment_cursor = comments_res.get("cursor", "")
if "comments" not in comments_res:

View File

@ -192,6 +192,7 @@ class ZhihuExtractor:
"""
res = ZhihuCreator()
try:
if not author:
return res
if not author.get("id"):
@ -201,6 +202,11 @@ class ZhihuExtractor:
res.user_nickname = author.get("name")
res.user_avatar = author.get("avatar_url")
res.url_token = author.get("url_token")
except Exception as e :
utils.logger.warning(
f"[ZhihuExtractor._extract_content_or_comment_author] User Maybe Blocked. {e}"
)
return res
def extract_comments(self, page_content: ZhihuContent, comments: List[Dict]) -> List[ZhihuComment]: