mirror of
https://github.com/RYDE-WORK/MediaCrawler.git
synced 2026-02-05 16:36:44 +08:00
commit
afbd4ec1bf
@ -161,7 +161,13 @@ class BilibiliCrawler(AbstractCrawler):
|
|||||||
video_list: List[Dict] = videos_res.get("result")
|
video_list: List[Dict] = videos_res.get("result")
|
||||||
|
|
||||||
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
|
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
|
||||||
task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
|
task_list = []
|
||||||
|
try:
|
||||||
|
task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
|
||||||
|
except Exception as e :
|
||||||
|
utils.logger.warning(
|
||||||
|
f"[BilibiliCrawler.search] error in the task list. The video for this page will not be included. {e}"
|
||||||
|
)
|
||||||
video_items = await asyncio.gather(*task_list)
|
video_items = await asyncio.gather(*task_list)
|
||||||
for video_item in video_items:
|
for video_item in video_items:
|
||||||
if video_item:
|
if video_item:
|
||||||
@ -199,7 +205,11 @@ class BilibiliCrawler(AbstractCrawler):
|
|||||||
video_list: List[Dict] = videos_res.get("result")
|
video_list: List[Dict] = videos_res.get("result")
|
||||||
|
|
||||||
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
|
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
|
||||||
task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
|
task_list = []
|
||||||
|
try:
|
||||||
|
task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
|
||||||
|
finally:
|
||||||
|
pass
|
||||||
video_items = await asyncio.gather(*task_list)
|
video_items = await asyncio.gather(*task_list)
|
||||||
for video_item in video_items:
|
for video_item in video_items:
|
||||||
if video_item:
|
if video_item:
|
||||||
|
|||||||
@ -108,6 +108,9 @@ class DouYinCrawler(AbstractCrawler):
|
|||||||
publish_time=PublishTimeType(config.PUBLISH_TIME_TYPE),
|
publish_time=PublishTimeType(config.PUBLISH_TIME_TYPE),
|
||||||
search_id=dy_search_id
|
search_id=dy_search_id
|
||||||
)
|
)
|
||||||
|
if posts_res.get("data") is None or posts_res.get("data") == []:
|
||||||
|
utils.logger.info(f"[DouYinCrawler.search] search douyin keyword: {keyword}, page: {page} is empty,{posts_res.get('data')}`")
|
||||||
|
break
|
||||||
except DataFetchError:
|
except DataFetchError:
|
||||||
utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed")
|
utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed")
|
||||||
break
|
break
|
||||||
|
|||||||
@ -415,6 +415,12 @@ class XiaoHongShuClient(AbstractApiClient):
|
|||||||
num=10,
|
num=10,
|
||||||
cursor=sub_comment_cursor,
|
cursor=sub_comment_cursor,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if comments_res is None:
|
||||||
|
utils.logger.info(
|
||||||
|
f"[XiaoHongShuClient.get_comments_all_sub_comments] No response found for note_id: {note_id}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
sub_comment_has_more = comments_res.get("has_more", False)
|
sub_comment_has_more = comments_res.get("has_more", False)
|
||||||
sub_comment_cursor = comments_res.get("cursor", "")
|
sub_comment_cursor = comments_res.get("cursor", "")
|
||||||
if "comments" not in comments_res:
|
if "comments" not in comments_res:
|
||||||
|
|||||||
@ -192,15 +192,21 @@ class ZhihuExtractor:
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
res = ZhihuCreator()
|
res = ZhihuCreator()
|
||||||
if not author:
|
try:
|
||||||
return res
|
if not author:
|
||||||
if not author.get("id"):
|
return res
|
||||||
author = author.get("member")
|
if not author.get("id"):
|
||||||
res.user_id = author.get("id")
|
author = author.get("member")
|
||||||
res.user_link = f"{zhihu_constant.ZHIHU_URL}/people/{author.get('url_token')}"
|
res.user_id = author.get("id")
|
||||||
res.user_nickname = author.get("name")
|
res.user_link = f"{zhihu_constant.ZHIHU_URL}/people/{author.get('url_token')}"
|
||||||
res.user_avatar = author.get("avatar_url")
|
res.user_nickname = author.get("name")
|
||||||
res.url_token = author.get("url_token")
|
res.user_avatar = author.get("avatar_url")
|
||||||
|
res.url_token = author.get("url_token")
|
||||||
|
|
||||||
|
except Exception as e :
|
||||||
|
utils.logger.warning(
|
||||||
|
f"[ZhihuExtractor._extract_content_or_comment_author] User Maybe Blocked. {e}"
|
||||||
|
)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def extract_comments(self, page_content: ZhihuContent, comments: List[Dict]) -> List[ZhihuComment]:
|
def extract_comments(self, page_content: ZhihuContent, comments: List[Dict]) -> List[ZhihuComment]:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user