feat: 微博二维码登录done

This commit is contained in:
Relakkes 2023-12-30 18:54:21 +08:00
parent 27a2041929
commit 38d6f10bf0
6 changed files with 69 additions and 25 deletions

View File

@ -23,8 +23,8 @@
| 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ | | 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
| 抖音 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | 抖音 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| 快手 | ✅ | ✅ | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ | | 快手 | ✅ | ✅ | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
| B 站 | ✅ | ✅ | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ | | B 站 | ✅ | ✅ | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
| 微博 | ✅ | | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ | | 微博 | ✅ | | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
## 使用方法 ## 使用方法

View File

@ -14,8 +14,8 @@ IP_PROXY_POOL_COUNT = 2
# 重试时间 # 重试时间
RETRY_INTERVAL = 60 * 30 # 30 minutes RETRY_INTERVAL = 60 * 30 # 30 minutes
# 无头浏览器的标识True:开启 False 关闭(会打开一个浏览器 # 设置为True不会打开浏览器无头浏览器设置False会打开一个浏览器小红书如果一直扫码登录不通过打开浏览器手动过一下滑动验证码
HEADLESS = False HEADLESS = True
# 是否保存登录状态 # 是否保存登录状态
SAVE_LOGIN_STATE = True SAVE_LOGIN_STATE = True

View File

@ -69,9 +69,12 @@ class WeiboClient:
utils.logger.info("[WeiboClient.pong] Begin pong weibo...") utils.logger.info("[WeiboClient.pong] Begin pong weibo...")
ping_flag = False ping_flag = False
try: try:
pass uri = "/api/config"
resp_data: Dict = await self.request(method="GET", url=f"{self._host}{uri}", headers=self.headers)
if resp_data.get("login"):
ping_flag = True
except Exception as e: except Exception as e:
utils.logger.error(f"[BilibiliClient.pong] Pong weibo failed: {e}, and try to login again...") utils.logger.error(f"[WeiboClient.pong] Pong weibo failed: {e}, and try to login again...")
ping_flag = False ping_flag = False
return ping_flag return ping_flag

View File

@ -7,9 +7,8 @@
import asyncio import asyncio
import os import os
import random import random
import time
from asyncio import Task from asyncio import Task
from typing import Dict, List, Optional, Tuple, Union from typing import Dict, List, Optional, Tuple
from playwright.async_api import (BrowserContext, BrowserType, Page, from playwright.async_api import (BrowserContext, BrowserType, Page,
async_playwright) async_playwright)
@ -19,7 +18,7 @@ from base.base_crawler import AbstractCrawler
from models import weibo from models import weibo
from proxy.proxy_ip_pool import IpInfoModel, create_ip_pool from proxy.proxy_ip_pool import IpInfoModel, create_ip_pool
from tools import utils from tools import utils
from var import comment_tasks_var, crawler_type_var from var import crawler_type_var
from .client import WeiboClient from .client import WeiboClient
from .exception import DataFetchError from .exception import DataFetchError
@ -37,8 +36,8 @@ class WeiboCrawler(AbstractCrawler):
browser_context: BrowserContext browser_context: BrowserContext
def __init__(self): def __init__(self):
self.index_url = "https://m.weibo.cn" self.index_url = "https://www.weibo.com"
self.user_agent = utils.get_mobile_user_agent() self.user_agent = utils.get_user_agent()
def init_config(self, platform: str, login_type: str, crawler_type: str): def init_config(self, platform: str, login_type: str, crawler_type: str):
self.platform = platform self.platform = platform
@ -215,7 +214,7 @@ class WeiboCrawler(AbstractCrawler):
weibo_client_obj = WeiboClient( weibo_client_obj = WeiboClient(
proxies=httpx_proxy, proxies=httpx_proxy,
headers={ headers={
"User-Agent": self.user_agent, "User-Agent": utils.get_mobile_user_agent(),
"Cookie": cookie_str, "Cookie": cookie_str,
"Origin": "https://m.weibo.cn", "Origin": "https://m.weibo.cn",
"Referer": "https://m.weibo.cn", "Referer": "https://m.weibo.cn",

View File

@ -32,7 +32,7 @@ class WeiboLogin(AbstractLogin):
async def begin(self): async def begin(self):
"""Start login weibo""" """Start login weibo"""
utils.logger.info("[WeiboLogin.begin] Begin login Bilibili ...") utils.logger.info("[WeiboLogin.begin] Begin login weibo ...")
if self.login_type == "qrcode": if self.login_type == "qrcode":
await self.login_by_qrcode() await self.login_by_qrcode()
elif self.login_type == "phone": elif self.login_type == "phone":
@ -44,7 +44,7 @@ class WeiboLogin(AbstractLogin):
"[WeiboLogin.begin] Invalid Login Type Currently only supported qrcode or phone or cookie ...") "[WeiboLogin.begin] Invalid Login Type Currently only supported qrcode or phone or cookie ...")
@retry(stop=stop_after_attempt(20), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False)) @retry(stop=stop_after_attempt(20), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False))
async def check_login_state(self) -> bool: async def check_login_state(self, no_logged_in_session: str) -> bool:
""" """
Check if the current login status is successful and return True otherwise return False Check if the current login status is successful and return True otherwise return False
retry decorator will retry 20 times if the return value is False, and the retry interval is 1 second retry decorator will retry 20 times if the return value is False, and the retry interval is 1 second
@ -52,22 +52,45 @@ class WeiboLogin(AbstractLogin):
""" """
current_cookie = await self.browser_context.cookies() current_cookie = await self.browser_context.cookies()
_, cookie_dict = utils.convert_cookies(current_cookie) _, cookie_dict = utils.convert_cookies(current_cookie)
if cookie_dict.get("SESSDATA", "") or cookie_dict.get("DedeUserID"): current_web_session = cookie_dict.get("WBPSESS")
if current_web_session != no_logged_in_session:
return True return True
return False return False
async def popup_login_dialog(self):
"""If the login dialog box does not pop up automatically, we will manually click the login button"""
dialog_selector = "xpath=//div[@class='woo-modal-main']"
try:
# check dialog box is auto popup and wait for 10 seconds
await self.context_page.wait_for_selector(dialog_selector, timeout=1000 * 10)
except Exception as e:
utils.logger.error(
f"[WeiboLogin.popup_login_dialog] login dialog box does not pop up automatically, error: {e}")
utils.logger.info(
"[WeiboLogin.popup_login_dialog] login dialog box does not pop up automatically, we will manually click the login button")
# 向下滚动1000像素
await self.context_page.mouse.wheel(0,500)
await asyncio.sleep(2)
try:
# click login button
login_button_ele = self.context_page.locator(
"xpath=//a[text()='登录']"
)
await login_button_ele.click()
await asyncio.sleep(0.5)
except Exception as e:
utils.logger.info(f"[WeiboLogin.popup_login_dialog] manually click the login button faield maybe login dialog Appear{e}")
async def login_by_qrcode(self): async def login_by_qrcode(self):
"""login weibo website and keep webdriver login state""" """login weibo website and keep webdriver login state"""
utils.logger.info("[WeiboLogin.login_by_qrcode] Begin login weibo by qrcode ...") utils.logger.info("[WeiboLogin.login_by_qrcode] Begin login weibo by qrcode ...")
# click login button await self.popup_login_dialog()
login_button_ele = self.context_page.locator(
"xpath=//div[@class='right-entry__outside go-login-btn']//div"
)
await login_button_ele.click()
# find login qrcode # find login qrcode
qrcode_img_selector = "//div[@class='login-scan-box']//img" qrcode_img_selector = "//div[@class='woo-modal-main']//img"
base64_qrcode_img = await utils.find_login_qrcode( base64_qrcode_img = await utils.find_login_qrcode(
self.context_page, self.context_page,
selector=qrcode_img_selector selector=qrcode_img_selector
@ -81,8 +104,14 @@ class WeiboLogin(AbstractLogin):
asyncio.get_running_loop().run_in_executor(executor=None, func=partial_show_qrcode) asyncio.get_running_loop().run_in_executor(executor=None, func=partial_show_qrcode)
utils.logger.info(f"[WeiboLogin.login_by_qrcode] Waiting for scan code login, remaining time is 20s") utils.logger.info(f"[WeiboLogin.login_by_qrcode] Waiting for scan code login, remaining time is 20s")
# get not logged session
current_cookie = await self.browser_context.cookies()
_, cookie_dict = utils.convert_cookies(current_cookie)
no_logged_in_session = cookie_dict.get("WBPSESS")
try: try:
await self.check_login_state() await self.check_login_state(no_logged_in_session)
except RetryError: except RetryError:
utils.logger.info("[WeiboLogin.login_by_qrcode] Login weibo failed by qrcode login method ...") utils.logger.info("[WeiboLogin.login_by_qrcode] Login weibo failed by qrcode login method ...")
sys.exit() sys.exit()

View File

@ -9,9 +9,12 @@ import re
from io import BytesIO from io import BytesIO
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple
import httpx
from PIL import Image, ImageDraw from PIL import Image, ImageDraw
from playwright.async_api import Cookie, Page from playwright.async_api import Cookie, Page
from . import utils
async def find_login_qrcode(page: Page, selector: str) -> str: async def find_login_qrcode(page: Page, selector: str) -> str:
"""find login qrcode image from target selector""" """find login qrcode image from target selector"""
@ -19,8 +22,17 @@ async def find_login_qrcode(page: Page, selector: str) -> str:
elements = await page.wait_for_selector( elements = await page.wait_for_selector(
selector=selector, selector=selector,
) )
login_qrcode_img = await elements.get_property("src") # type: ignore login_qrcode_img = str(await elements.get_property("src")) # type: ignore
return str(login_qrcode_img) if "http://" in login_qrcode_img or "https://" in login_qrcode_img:
async with httpx.AsyncClient(follow_redirects=True) as client:
utils.logger.info(f"[find_login_qrcode] get qrcode by url:{login_qrcode_img}")
resp = await client.get(login_qrcode_img, headers={"User-Agent": get_user_agent()})
if resp.status_code == 200:
image_data = resp.content
base64_image = base64.b64encode(image_data).decode('utf-8')
return base64_image
raise Exception(f"fetch login image url failed, response message:{resp.text}")
return login_qrcode_img
except Exception as e: except Exception as e:
print(e) print(e)
@ -29,7 +41,8 @@ async def find_login_qrcode(page: Page, selector: str) -> str:
def show_qrcode(qr_code) -> None: # type: ignore def show_qrcode(qr_code) -> None: # type: ignore
"""parse base64 encode qrcode image and show it""" """parse base64 encode qrcode image and show it"""
qr_code = qr_code.split(",")[1] if "," in qr_code:
qr_code = qr_code.split(",")[1]
qr_code = base64.b64decode(qr_code) qr_code = base64.b64decode(qr_code)
image = Image.open(BytesIO(qr_code)) image = Image.open(BytesIO(qr_code))