From 68a60faa7f37d9545f05af69c07626eb4ec25313 Mon Sep 17 00:00:00 2001 From: leantli Date: Thu, 4 Apr 2024 00:11:22 +0800 Subject: [PATCH] =?UTF-8?q?chore:=20=E7=AE=80=E5=8C=96=E5=88=A4=E6=96=AD?= =?UTF-8?q?=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- media_platform/bilibili/core.py | 4 +++- media_platform/douyin/core.py | 4 +++- media_platform/kuaishou/core.py | 4 +++- media_platform/weibo/core.py | 4 +++- media_platform/xhs/core.py | 4 +++- 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/media_platform/bilibili/core.py b/media_platform/bilibili/core.py index 5c31b16..758ae0f 100644 --- a/media_platform/bilibili/core.py +++ b/media_platform/bilibili/core.py @@ -93,7 +93,9 @@ class BilibiliCrawler(AbstractCrawler): :return: """ utils.logger.info("[BilibiliCrawler.search] Begin search bilibli keywords") - bili_limit_count = min(20, max(1, config.CRAWLER_MAX_NOTES_COUNT)) # bilibili limit page fixed value + bili_limit_count =20 # bilibili limit page fixed value + if config.CRAWLER_MAX_NOTES_COUNT < bili_limit_count: + config.CRAWLER_MAX_NOTES_COUNT = bili_limit_count for keyword in config.KEYWORDS.split(","): utils.logger.info(f"[BilibiliCrawler.search] Current search keyword: {keyword}") page = 1 diff --git a/media_platform/douyin/core.py b/media_platform/douyin/core.py index e21a605..50c2a18 100644 --- a/media_platform/douyin/core.py +++ b/media_platform/douyin/core.py @@ -81,10 +81,12 @@ class DouYinCrawler(AbstractCrawler): async def search(self) -> None: utils.logger.info("[DouYinCrawler.search] Begin search douyin keywords") + dy_limit_count = 10 # douyin limit page fixed value + if config.CRAWLER_MAX_NOTES_COUNT < dy_limit_count: + config.CRAWLER_MAX_NOTES_COUNT = dy_limit_count for keyword in config.KEYWORDS.split(","): utils.logger.info(f"[DouYinCrawler.search] Current keyword: {keyword}") aweme_list: List[str] = [] - dy_limit_count = min(10, max(1, config.CRAWLER_MAX_NOTES_COUNT)) page = 0 while (page + 1) * dy_limit_count <= config.CRAWLER_MAX_NOTES_COUNT: try: diff --git a/media_platform/kuaishou/core.py b/media_platform/kuaishou/core.py index 4ec396f..4eb7796 100644 --- a/media_platform/kuaishou/core.py +++ b/media_platform/kuaishou/core.py @@ -85,7 +85,9 @@ class KuaishouCrawler(AbstractCrawler): async def search(self): utils.logger.info("[KuaishouCrawler.search] Begin search kuaishou keywords") - ks_limit_count = min(20, max(1, config.CRAWLER_MAX_NOTES_COUNT)) # kuaishou limit page fixed value + ks_limit_count = 20 # kuaishou limit page fixed value + if config.CRAWLER_MAX_NOTES_COUNT < ks_limit_count: + config.CRAWLER_MAX_NOTES_COUNT = ks_limit_count for keyword in config.KEYWORDS.split(","): utils.logger.info(f"[KuaishouCrawler.search] Current search keyword: {keyword}") page = 1 diff --git a/media_platform/weibo/core.py b/media_platform/weibo/core.py index 42fb7e5..2ec0eae 100644 --- a/media_platform/weibo/core.py +++ b/media_platform/weibo/core.py @@ -104,7 +104,9 @@ class WeiboCrawler(AbstractCrawler): :return: """ utils.logger.info("[WeiboCrawler.search] Begin search weibo keywords") - weibo_limit_count = min(10, max(1, config.CRAWLER_MAX_NOTES_COUNT)) + weibo_limit_count = 10 # weibo limit page fixed value + if config.CRAWLER_MAX_NOTES_COUNT < weibo_limit_count: + config.CRAWLER_MAX_NOTES_COUNT = weibo_limit_count for keyword in config.KEYWORDS.split(","): utils.logger.info(f"[WeiboCrawler.search] Current search keyword: {keyword}") page = 1 diff --git a/media_platform/xhs/core.py b/media_platform/xhs/core.py index 2855af8..5b67d6d 100644 --- a/media_platform/xhs/core.py +++ b/media_platform/xhs/core.py @@ -96,7 +96,9 @@ class XiaoHongShuCrawler(AbstractCrawler): async def search(self) -> None: """Search for notes and retrieve their comment information.""" utils.logger.info("[XiaoHongShuCrawler.search] Begin search xiaohongshu keywords") - xhs_limit_count = min(20, max(1, config.CRAWLER_MAX_NOTES_COUNT)) # xhs limit page fixed value + xhs_limit_count = 20 # xhs limit page fixed value + if config.CRAWLER_MAX_NOTES_COUNT < xhs_limit_count: + config.CRAWLER_MAX_NOTES_COUNT = xhs_limit_count for keyword in config.KEYWORDS.split(","): utils.logger.info(f"[XiaoHongShuCrawler.search] Current search keyword: {keyword}") page = 1