From 79bf9fc05d60221f5bcd6152771be102e2b49b36 Mon Sep 17 00:00:00 2001 From: Relakkes Date: Thu, 26 Dec 2024 18:28:23 +0800 Subject: [PATCH] chore: add xhs field comment for issue #526 --- store/xhs/__init__.py | 88 +++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/store/xhs/__init__.py b/store/xhs/__init__.py index a31389e..eb0a14e 100644 --- a/store/xhs/__init__.py +++ b/store/xhs/__init__.py @@ -87,27 +87,27 @@ async def update_xhs_note(note_item: Dict): video_url = ','.join(get_video_url_arr(note_item)) local_db_item = { - "note_id": note_item.get("note_id"), - "type": note_item.get("type"), - "title": note_item.get("title") or note_item.get("desc", "")[:255], - "desc": note_item.get("desc", ""), - "video_url": video_url, - "time": note_item.get("time"), - "last_update_time": note_item.get("last_update_time", 0), - "user_id": user_info.get("user_id"), - "nickname": user_info.get("nickname"), - "avatar": user_info.get("avatar"), - "liked_count": interact_info.get("liked_count"), - "collected_count": interact_info.get("collected_count"), - "comment_count": interact_info.get("comment_count"), - "share_count": interact_info.get("share_count"), - "ip_location": note_item.get("ip_location", ""), - "image_list": ','.join([img.get('url', '') for img in image_list]), - "tag_list": ','.join([tag.get('name', '') for tag in tag_list if tag.get('type') == 'topic']), - "last_modify_ts": utils.get_current_timestamp(), - "note_url": f"https://www.xiaohongshu.com/explore/{note_id}?xsec_token={note_item.get('xsec_token')}&xsec_source=pc_search", - "source_keyword": source_keyword_var.get(), - "xsec_token": note_item.get("xsec_token"), + "note_id": note_item.get("note_id"), # 帖子id + "type": note_item.get("type"), # 帖子类型 + "title": note_item.get("title") or note_item.get("desc", "")[:255], # 帖子标题 + "desc": note_item.get("desc", ""), # 帖子描述 + "video_url": video_url, # 帖子视频url + "time": note_item.get("time"), # 帖子发布时间 + "last_update_time": note_item.get("last_update_time", 0), # 帖子最后更新时间 + "user_id": user_info.get("user_id"), # 用户id + "nickname": user_info.get("nickname"), # 用户昵称 + "avatar": user_info.get("avatar"), # 用户头像 + "liked_count": interact_info.get("liked_count"), # 点赞数 + "collected_count": interact_info.get("collected_count"), # 收藏数 + "comment_count": interact_info.get("comment_count"), # 评论数 + "share_count": interact_info.get("share_count"), # 分享数 + "ip_location": note_item.get("ip_location", ""), # ip地址 + "image_list": ','.join([img.get('url', '') for img in image_list]), # 图片url + "tag_list": ','.join([tag.get('name', '') for tag in tag_list if tag.get('type') == 'topic']), # 标签 + "last_modify_ts": utils.get_current_timestamp(), # 最后更新时间戳(MediaCrawler程序生成的,主要用途在db存储的时候记录一条记录最新更新时间) + "note_url": f"https://www.xiaohongshu.com/explore/{note_id}?xsec_token={note_item.get('xsec_token')}&xsec_source=pc_search", # 帖子url + "source_keyword": source_keyword_var.get(), # 搜索关键词 + "xsec_token": note_item.get("xsec_token"), # xsec_token } utils.logger.info(f"[store.xhs.update_xhs_note] xhs note: {local_db_item}") await XhsStoreFactory.create_store().store_content(local_db_item) @@ -144,18 +144,18 @@ async def update_xhs_note_comment(note_id: str, comment_item: Dict): comment_pictures = [item.get("url_default", "") for item in comment_item.get("pictures", [])] target_comment = comment_item.get("target_comment", {}) local_db_item = { - "comment_id": comment_id, - "create_time": comment_item.get("create_time"), - "ip_location": comment_item.get("ip_location"), - "note_id": note_id, - "content": comment_item.get("content"), - "user_id": user_info.get("user_id"), - "nickname": user_info.get("nickname"), - "avatar": user_info.get("image"), - "sub_comment_count": comment_item.get("sub_comment_count", 0), - "pictures": ",".join(comment_pictures), - "parent_comment_id": target_comment.get("id", 0), - "last_modify_ts": utils.get_current_timestamp(), + "comment_id": comment_id, # 评论id + "create_time": comment_item.get("create_time"), # 评论时间 + "ip_location": comment_item.get("ip_location"), # ip地址 + "note_id": note_id, # 帖子id + "content": comment_item.get("content"), # 评论内容 + "user_id": user_info.get("user_id"), # 用户id + "nickname": user_info.get("nickname"), # 用户昵称 + "avatar": user_info.get("image"), # 用户头像 + "sub_comment_count": comment_item.get("sub_comment_count", 0), # 子评论数 + "pictures": ",".join(comment_pictures), # 评论图片 + "parent_comment_id": target_comment.get("id", 0), # 父评论id + "last_modify_ts": utils.get_current_timestamp(), # 最后更新时间戳(MediaCrawler程序生成的,主要用途在db存储的时候记录一条记录最新更新时间) "like_count": comment_item.get("like_count", 0), } utils.logger.info(f"[store.xhs.update_xhs_note_comment] xhs note comment:{local_db_item}") @@ -186,18 +186,18 @@ async def save_creator(user_id: str, creator: Dict): interaction = i.get('count') local_db_item = { - 'user_id': user_id, - 'nickname': user_info.get('nickname'), - 'gender': '女' if user_info.get('gender') == 1 else '男', - 'avatar': user_info.get('images'), - 'desc': user_info.get('desc'), - 'ip_location': user_info.get('ipLocation'), - 'follows': follows, - 'fans': fans, - 'interaction': interaction, + 'user_id': user_id, # 用户id + 'nickname': user_info.get('nickname'), # 昵称 + 'gender': '女' if user_info.get('gender') == 1 else '男', # 性别 + 'avatar': user_info.get('images'), # 头像 + 'desc': user_info.get('desc'), # 个人描述 + 'ip_location': user_info.get('ipLocation'), # ip地址 + 'follows': follows, # 关注数 + 'fans': fans, # 粉丝数 + 'interaction': interaction, # 互动数 'tag_list': json.dumps({tag.get('tagType'): tag.get('name') for tag in creator.get('tags')}, - ensure_ascii=False), - "last_modify_ts": utils.get_current_timestamp(), + ensure_ascii=False), # 标签 + "last_modify_ts": utils.get_current_timestamp(), # 最后更新时间戳(MediaCrawler程序生成的,主要用途在db存储的时候记录一条记录最新更新时间) } utils.logger.info(f"[store.xhs.save_creator] creator:{local_db_item}") await XhsStoreFactory.create_store().store_creator(local_db_item)