From 1dc76ffc20f8f5a4e4fda21672e92807cef22311 Mon Sep 17 00:00:00 2001 From: johnathan <952508490@qq.com> Date: Thu, 17 Apr 2025 16:43:13 +0800 Subject: [PATCH] feature: split 2 patterns of search page & declare wanderDetailPage function --- .gitignore | 3 + .prettierrc | 3 +- .vscode/extensions.json | 8 +- .vscode/launch.json | 16 ++++ .vscode/settings.json | 5 +- src/logic/execute-script.ts | 17 ++++ src/logic/page-worker/index.ts | 159 +++++++++++++++++++++---------- src/logic/page-worker/types.d.ts | 23 +++-- src/sidepanel/App.vue | 11 +++ src/sidepanel/Sidepanel.vue | 89 ++++++++++++++--- src/sidepanel/main.ts | 2 +- vite.config.mts | 13 +-- 12 files changed, 256 insertions(+), 93 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 src/logic/execute-script.ts create mode 100644 src/sidepanel/App.vue diff --git a/.gitignore b/.gitignore index 453b76d..41502ef 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,6 @@ node_modules src/auto-imports.d.ts src/components.d.ts .eslintcache + +**/test_data.ts +**/TestPanel.vue \ No newline at end of file diff --git a/.prettierrc b/.prettierrc index 544138b..554f2a3 100644 --- a/.prettierrc +++ b/.prettierrc @@ -1,3 +1,4 @@ { - "singleQuote": true + "singleQuote": true, + "printWidth": 100 } diff --git a/.vscode/extensions.json b/.vscode/extensions.json index 96f65d5..b338073 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -1,9 +1,3 @@ { - "recommendations": [ - "vue.volar" - // "antfu.iconify", - // "antfu.unocss", - // "dbaeumer.vscode-eslint", - // "csstools.postcss" - ] + "recommendations": ["vue.volar"] } diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..9e22072 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "msedge", + "request": "attach", + "name": "Attach to side panel", + "webRoot": "${workspaceFolder}/src/", + "port": 9222, + "urlFilter": "chrome-extension://*" + } + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json index fb068a7..4463aef 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,12 +1,11 @@ { - "cSpell.words": ["Vitesse"], "typescript.tsdk": "node_modules/typescript/lib", - "vite.autoStart": false, "editor.codeActionsOnSave": { "source.fixAll.eslint": "explicit" }, "files.associations": { "*.css": "postcss" }, - "prettier.tabWidth": 2 + "prettier.tabWidth": 2, + "prettier.printWidth": 100 } diff --git a/src/logic/execute-script.ts b/src/logic/execute-script.ts new file mode 100644 index 0000000..b32620b --- /dev/null +++ b/src/logic/execute-script.ts @@ -0,0 +1,17 @@ +/** + * + * @param tabId + * @param func + * @returns + */ +export async function executeScript(tabId: number, func: () => Promise): Promise { + const injectResults = await browser.scripting.executeScript({ + target: { tabId }, + func, + }); + const ret = injectResults.pop(); + if (ret?.error) { + console.error('注入脚本时发生错误', ret.error); + } + return ret?.result as T | null; +} diff --git a/src/logic/page-worker/index.ts b/src/logic/page-worker/index.ts index d72172b..eac5949 100644 --- a/src/logic/page-worker/index.ts +++ b/src/logic/page-worker/index.ts @@ -1,5 +1,7 @@ import Emittery from 'emittery'; -import { AmazonPageWorker, AmazonPageWorkerEvents } from './types'; +import type { AmazonGoodsLinkItem, AmazonPageWorker, AmazonPageWorkerEvents } from './types'; +import Browser from 'webextension-polyfill'; +import { executeScript } from '../execute-script'; class AmazonPageWorkerImpl implements AmazonPageWorker { readonly channel = new Emittery(); @@ -12,70 +14,123 @@ class AmazonPageWorkerImpl implements AmazonPageWorker { .query({ active: true, currentWindow: true }) .then((tabs) => tabs[0]); const currentUrl = new URL(tab.url!); - if ( - currentUrl.hostname !== url.hostname || - currentUrl.searchParams.get('k') !== keywords - ) { + if (currentUrl.hostname !== url.hostname || currentUrl.searchParams.get('k') !== keywords) { await browser.tabs.update(tab.id, { url: url.toString() }); + await new Promise((resolve) => setTimeout(resolve, 1000)); } return url.toString(); } - private async wanderSearchSinglePage() { - const tab = await browser.tabs - .query({ active: true, currentWindow: true }) - .then((tabs) => tabs[0]); - const results = await browser.scripting.executeScript({ - target: { tabId: tab.id! }, - func: async () => { - try { - await new Promise((resolve) => - setTimeout(resolve, 500 + ~~(500 * Math.random())), - ); - while (!document.querySelector('.s-pagination-strip')) { - window.scrollBy(0, ~~(Math.random() * 500) + 500); - await new Promise((resolve) => setTimeout(resolve, 10)); - } - const items = document.querySelectorAll( - '.a-section.a-spacing-small.puis-padding-left-small', - ); - const links: string[] = []; - items.forEach((el) => { - const link = - el.querySelector('a.a-link-normal')?.href; - link && links.push(link); - }); - const nextButton = - document.querySelector('.s-pagination-next'); - if ( - nextButton && - !nextButton.classList.contains('s-pagination-disabled') - ) { - await new Promise((resolve) => - setTimeout(resolve, 500 + ~~(500 * Math.random())), - ); - nextButton.click(); - } else { - return null; - } - return links; - } catch (e) { - return null; - } - }, + private async wanderSearchSinglePage(tab: Browser.Tabs.Tab) { + const tabId = tab.id!; + // #region Wait for the Next button to appear, indicating that the product items have finished loading + await executeScript(tabId, async () => { + await new Promise((resolve) => setTimeout(resolve, 500 + ~~(500 * Math.random()))); + while (!document.querySelector('.s-pagination-strip')) { + window.scrollBy(0, ~~(Math.random() * 500) + 500); + await new Promise((resolve) => setTimeout(resolve, 10)); + } }); + // #endregion + // #region Determine the type of product search page https://github.com/primedigitaltech/azon_seeker/issues/1 + const pagePattern = await executeScript(tabId, async () => { + return [ + ...(document.querySelectorAll( + '.a-section.a-spacing-small.puis-padding-left-small', + ) as unknown as HTMLDivElement[]), + ].filter((e) => e.getClientRects().length > 0).length === 0 + ? 'pattern-1' + : 'pattern-2'; + }); + if (typeof pagePattern !== 'string') { + this.channel.emit('error', { message: '无法判断商品搜索页类型', url: tab.url }); + throw new Error('无法判断商品搜索页类型'); + } + // #endregion + // #region Retrieve key nodes and their information from the critical product search page + let data: AmazonGoodsLinkItem[] | null = null; + switch (pagePattern) { + // 处理商品以列表形式展示的情况 + case 'pattern-1': + data = await executeScript(tabId, async () => { + const items = [ + ...(document.querySelectorAll( + '.a-section.a-spacing-small.a-spacing-top-small:not(.a-text-right)', + ) as unknown as HTMLDivElement[]), + ].filter((e) => e.getClientRects().length > 0); + const linkObjs = items.reduce((objs, el) => { + const link = el.querySelector('a')?.href; + const title = el + .querySelector('h2.a-color-base') + ?.getAttribute('aria-label'); + link && objs.push({ link, title: title || '' }); + return objs; + }, []); + return linkObjs; + }); + break; + // 处理商品以二维图片格展示的情况 + case 'pattern-2': + data = await executeScript(tabId, async () => { + const items = [ + ...(document.querySelectorAll( + '.a-section.a-spacing-small.puis-padding-left-small', + ) as unknown as HTMLDivElement[]), + ].filter((e) => e.getClientRects().length > 0); + const linkObjs = items.reduce((objs, el) => { + const link = el.querySelector('a.a-link-normal')?.href; + const title = el.querySelector('h2.a-color-base')?.innerText; + link && objs.push({ link, title: title || '' }); + return objs; + }, []); + return linkObjs; + }); + break; + default: + break; + } + // #endregion + // #region Determine if it is the last page, otherwise navigate to the next page + const hasNextPage = await executeScript(tabId, async () => { + const nextButton = document.querySelector('.s-pagination-next'); + if (nextButton) { + if (!nextButton.classList.contains('s-pagination-disabled')) { + await new Promise((resolve) => setTimeout(resolve, 500 + ~~(500 * Math.random()))); + nextButton.click(); + return true; + } else { + return false; + } + } else { + throw new Error('Error: next page button not found'); + } + }); + // #endregion await new Promise((resolve) => setTimeout(resolve, 1000)); - return results.pop()?.result as string[] | null; + if (data === null || typeof hasNextPage !== 'boolean') { + this.channel.emit('error', { message: '爬取单页信息失败', url: tab.url }); + throw new Error('爬取单页信息失败'); + } + return { data, hasNextPage }; } public async wanderSearchList(): Promise { - let links = await this.wanderSearchSinglePage(); - while (links) { - this.channel.emit('item-links-collected', { links }); - links = await this.wanderSearchSinglePage(); + const tab = await browser.tabs + .query({ active: true, currentWindow: true }) + .then((tabs) => tabs[0]); + let stopSignal = false; + let result = { hasNextPage: true, data: [] as AmazonGoodsLinkItem[] }; + while (result.hasNextPage && !stopSignal) { + result = await this.wanderSearchSinglePage(tab); + this.channel.emit('item-links-collected', { objs: result.data }); + this.channel.on('error', () => { + stopSignal = true; + }); } return new Promise((resolve) => setTimeout(resolve, 1000)); } + + public async wanderDetailPage(): Promise {} } class PageWorkerFactory { diff --git a/src/logic/page-worker/types.d.ts b/src/logic/page-worker/types.d.ts index a51f085..50c0028 100644 --- a/src/logic/page-worker/types.d.ts +++ b/src/logic/page-worker/types.d.ts @@ -1,13 +1,18 @@ import type Emittery from 'emittery'; +type AmazonGoodsLinkItem = { link: string; title: string }; + interface AmazonPageWorkerEvents { /** - * Emitted when a new item is found on the Amazon page. - * @param link - The item link that was found. + * This event is used to collect links to items on the Amazon search page. */ - ['item-links-collected']: { links: string[] }; -} + ['item-links-collected']: { objs: AmazonGoodsLinkItem[] }; + /** + * Error event that occurs when there is an issue with the Amazon page worker. + */ + ['error']: { message: string; url?: string }; +} interface AmazonPageWorker { /** @@ -17,15 +22,19 @@ interface AmazonPageWorker { readonly channel: Emittery; /** - * Search for a list of items on Amazon + * Search for a list of goods on Amazon * @param keywords - The keywords to search for on Amazon. * @returns A promise that resolves to a string representing the search URL. */ doSearch(keywords: string): Promise; /** - * Browsing item search page and collect links to those items. - * @param entryUrl - The URL of the Amazon search page to start from. + * Browsing goods search page and collect links to those goods. */ wanderSearchList(): Promise; + + /** + * Browsing goods detail page and collect target information. + */ + wanderDetailPage(): Promise; } diff --git a/src/sidepanel/App.vue b/src/sidepanel/App.vue new file mode 100644 index 0000000..311d10d --- /dev/null +++ b/src/sidepanel/App.vue @@ -0,0 +1,11 @@ + + + diff --git a/src/sidepanel/Sidepanel.vue b/src/sidepanel/Sidepanel.vue index e1dcb38..4cc3b87 100644 --- a/src/sidepanel/Sidepanel.vue +++ b/src/sidepanel/Sidepanel.vue @@ -1,28 +1,85 @@ @@ -58,13 +120,16 @@ const onSearch = async () => {