diff --git a/src/components/ProgressReport.vue b/src/components/ProgressReport.vue index 1adc7d5..a0295b1 100644 --- a/src/components/ProgressReport.vue +++ b/src/components/ProgressReport.vue @@ -13,7 +13,7 @@ defineProps<{ & { hidden?: boolean })[] = [ key: 'keywords', minWidth: 120, }, + { + title: '页码', + key: 'page', + minWidth: 60, + }, { title: '排位', key: 'rank', diff --git a/src/logic/error-handler.ts b/src/logic/error-handler.ts index 7cae595..87bdb41 100644 --- a/src/logic/error-handler.ts +++ b/src/logic/error-handler.ts @@ -22,6 +22,7 @@ export function withErrorHandling( throw error; } }; + Object.defineProperty(decoratedMethod, 'name', { value: originalMethod.name }); // 返回装饰后的方法 return decoratedMethod; } diff --git a/src/logic/execute-script.ts b/src/logic/execute-script.ts index df54932..6482738 100644 --- a/src/logic/execute-script.ts +++ b/src/logic/execute-script.ts @@ -21,26 +21,31 @@ * console.log(result); // Outputs: 42 * ``` */ -export async function exec(tabId: number, func: () => Promise): Promise; +export async function exec(tabId: number, func: () => Promise): Promise; export async function exec>( tabId: number, func: (payload: P) => Promise, payload: P, -): Promise; +): Promise; export async function exec>( tabId: number, func: (payload?: P) => Promise, payload?: P, -): Promise { - const injectResults = await browser.scripting.executeScript({ - target: { tabId }, - func, - args: payload ? [payload] : undefined, +): Promise { + const { timeout } = { + timeout: 30000, + }; + return new Promise(async (resolve, reject) => { + setTimeout(() => reject('脚本运行超时'), timeout); + const injectResults = await browser.scripting.executeScript({ + target: { tabId }, + func, + args: payload ? [payload] : undefined, + }); + const ret = injectResults.pop(); + if (ret?.error) { + reject(`注入脚本时发生错误: ${ret.error}`); + } + resolve(ret!.result as T); }); - const ret = injectResults.pop(); - if (ret?.error) { - console.error('注入脚本时发生错误', ret.error); - throw new Error('注入脚本时发生错误'); - } - return ret?.result as T | null; } diff --git a/src/logic/page-worker/index.ts b/src/logic/page-worker/index.ts index fdfc7e7..23fcc7b 100644 --- a/src/logic/page-worker/index.ts +++ b/src/logic/page-worker/index.ts @@ -2,14 +2,13 @@ import Emittery from 'emittery'; import type { AmazonDetailItem, AmazonPageWorker, AmazonPageWorkerEvents } from './types'; import type { Tabs } from 'webextension-polyfill'; import { exec } from '../execute-script'; -import { TaskController, TaskQueue, taskUnit } from '../task-queue'; import { withErrorHandling } from '../error-handler'; /** * AmazonPageWorkerImpl can run on background & sidepanel & popup, * **can't** run on content script! */ -class AmazonPageWorkerImpl implements AmazonPageWorker, TaskController { +class AmazonPageWorkerImpl implements AmazonPageWorker { //#region Singleton private static _instance: AmazonPageWorker | null = null; public static getInstance() { @@ -18,18 +17,12 @@ class AmazonPageWorkerImpl implements AmazonPageWorker, TaskController { } return this._instance; } - private constructor() {} //#endregion - /** - * The channel for communication with the Amazon page worker. - */ - readonly channel = new Emittery(); + private constructor() {} - /** - * The Task queue - */ - readonly taskQueue = new TaskQueue(); + private _controlChannel = new Emittery<{ interrupt: undefined }>(); + public readonly channel = new Emittery(); private async getCurrentTab(): Promise { const tab = await browser.tabs @@ -52,11 +45,20 @@ class AmazonPageWorkerImpl implements AmazonPageWorker, TaskController { await exec(tabId, async () => { await new Promise((resolve) => setTimeout(resolve, 500 + ~~(500 * Math.random()))); while (true) { - const target = document.querySelector('.s-pagination-strip'); + const targetNode = document.querySelector('.s-pagination-next'); window.scrollBy(0, ~~(Math.random() * 500) + 500); await new Promise((resolve) => setTimeout(resolve, ~~(Math.random() * 50) + 500)); - if (target || document.readyState === 'complete') { - target?.scrollIntoView({ behavior: 'smooth', block: 'center' }); + if (targetNode || document.readyState === 'complete') { + targetNode?.scrollIntoView({ behavior: 'smooth', block: 'center' }); + break; + } + } + while (true) { + await new Promise((resolve) => setTimeout(resolve, 500 + ~~(500 * Math.random()))); + const spins = Array.from(document.querySelectorAll('.a-spinner')).filter( + (e) => e.getClientRects().length > 0, + ); + if (spins.length === 0) { break; } } @@ -128,6 +130,14 @@ class AmazonPageWorkerImpl implements AmazonPageWorker, TaskController { break; } // #endregion + // #region get current page + const page = (await exec(tab.id!, async () => { + const node = document.querySelector( + '.s-pagination-item.s-pagination-selected', + ); + return node ? Number(node.innerText) : 1; + }))!; + // #endregion // #region Determine if it is the last page, otherwise navigate to the next page const hasNextPage = await exec(tabId, async () => { const nextButton = document.querySelector('.s-pagination-next'); @@ -149,18 +159,17 @@ class AmazonPageWorkerImpl implements AmazonPageWorker, TaskController { this.channel.emit('error', { message: '爬取单页信息失败', url: tab.url }); throw new Error('爬取单页信息失败'); } - return { data, hasNextPage }; + return { data, hasNextPage, page }; } @withErrorHandling - @taskUnit public async doSearch(keywords: string): Promise { const url = new URL('https://www.amazon.com/s'); url.searchParams.append('k', keywords); - - const tab = await browser.tabs - .query({ active: true, currentWindow: true }) - .then((tabs) => tabs[0]); + let tab = await this.getCurrentTab(); + if (!tab.url?.startsWith('http')) { + tab = await this.createNewTab('https://www.amazon.com/'); + } const currentUrl = new URL(tab.url!); if (currentUrl.hostname !== url.hostname || currentUrl.searchParams.get('k') !== keywords) { await browser.tabs.update(tab.id, { url: url.toString() }); @@ -170,16 +179,16 @@ class AmazonPageWorkerImpl implements AmazonPageWorker, TaskController { } @withErrorHandling - @taskUnit public async wanderSearchPage(): Promise { - const tab = await this.getCurrentTab(); + let tab = await this.getCurrentTab(); let offset = 0; while (true) { - const { hasNextPage, data } = await this.wanderSearchSinglePage(tab); + const { hasNextPage, data, page } = await this.wanderSearchSinglePage(tab); const keywords = new URL(tab.url!).searchParams.get('k')!; const objs = data.map((r, i) => ({ ...r, keywords, + page, rank: offset + 1 + i, createTime: new Date().toLocaleString(), asin: /(?<=\/dp\/)[A-Z0-9]{10}/.exec(r.link as string)![0], @@ -190,13 +199,11 @@ class AmazonPageWorkerImpl implements AmazonPageWorker, TaskController { break; } } - this.channel.off('error', stop); return new Promise((resolve) => setTimeout(resolve, 1000)); } @withErrorHandling - @taskUnit - public async wanderDetailPage(entry: string): Promise { + public async wanderDetailPage(entry: string) { //#region Initial Meta Info const params = { asin: '', url: '' }; if (entry.match(/^https?:\/\/www\.amazon\.com.*\/dp\/[A-Z0-9]{10}/)) { @@ -220,15 +227,22 @@ class AmazonPageWorkerImpl implements AmazonPageWorker, TaskController { await exec(tab.id!, async () => { while (true) { window.scrollBy(0, ~~(Math.random() * 500) + 500); - await new Promise((resolve) => setTimeout(resolve, ~~(Math.random() * 50) + 50)); + await new Promise((resolve) => setTimeout(resolve, ~~(Math.random() * 100) + 200)); const targetNode = document.querySelector( '#prodDetails:has(td), #detailBulletsWrapper_feature_div:has(li), .av-page-desktop', ); + const exceptionalNodeSelectors = ['music-detail-header', '.avu-retail-page']; + for (const selector of exceptionalNodeSelectors) { + if (document.querySelector(selector)) { + return false; + } + } if (targetNode && document.readyState !== 'loading') { targetNode.scrollIntoView({ behavior: 'smooth', block: 'center' }); - return targetNode.getAttribute('id') === 'prodDetails' ? 'pattern-1' : 'pattern-2'; + break; } } + return true; }); await new Promise((resolve) => setTimeout(resolve, 3000)); // Wait 3 seconds. //#endregion @@ -306,9 +320,9 @@ class AmazonPageWorkerImpl implements AmazonPageWorker, TaskController { //#endregion //#region Fetch Goods' Images const imageUrls = await exec(tab.id!, async () => { - let urls = [ - ...(document.querySelectorAll('.imageThumbnail img') as unknown as HTMLImageElement[]), - ].map((e) => e.src); + let urls = Array.from(document.querySelectorAll('.imageThumbnail img')).map( + (e) => e.src, + ); //#region process more images https://github.com/primedigitaltech/azon_seeker/issues/4 const overlay = document.querySelector('.overlayRestOfImages'); if (overlay) { @@ -344,17 +358,52 @@ class AmazonPageWorkerImpl implements AmazonPageWorker, TaskController { //#endregion return urls; }); - imageUrls && + imageUrls.length > 0 && this.channel.emit('item-images-collected', { asin: params.asin, - imageUrls, + imageUrls: Array.from(new Set(imageUrls)), }); - //#endregion await new Promise((resolve) => setTimeout(resolve, 2000)); // Wait 2 seconds. + //#endregion + } + + public async runSearchPageTask( + keywordsList: string[], + progress?: (remains: string[]) => Promise, + ): Promise { + let remains = [...keywordsList]; + let interrupt = false; + const unsubscribe = this._controlChannel.on('interrupt', () => { + interrupt = true; + }); + while (remains.length > 0 && !interrupt) { + const kw = remains.shift()!; + await this.doSearch(kw); + await this.wanderSearchPage(); + progress && progress(remains); + } + unsubscribe(); + } + + public async runDetaiPageTask( + asins: string[], + progress?: (remains: string[]) => Promise, + ): Promise { + let remains = [...asins]; + let interrupt = false; + const unsubscribe = this._controlChannel.on('interrupt', () => { + interrupt = true; + }); + while (remains.length > 0 && !interrupt) { + const asin = remains.shift()!; + await this.wanderDetailPage(asin); + progress && progress(remains); + } + unsubscribe(); } public async stop(): Promise { - this.taskQueue.clear(); + this._controlChannel.emit('interrupt'); } } diff --git a/src/logic/page-worker/types.d.ts b/src/logic/page-worker/types.d.ts index 0f5567f..dd1d5cd 100644 --- a/src/logic/page-worker/types.d.ts +++ b/src/logic/page-worker/types.d.ts @@ -3,6 +3,7 @@ import { TaskQueue } from '../task-queue'; type AmazonSearchItem = { keywords: string; + page: number; link: string; title: string; asin: string; @@ -56,23 +57,22 @@ interface AmazonPageWorker { */ readonly channel: Emittery; - /** - * Search for a list of goods on Amazon - * @param keywords - The keywords to search for on Amazon. - * @returns A promise that resolves to a string representing the search URL. - */ - doSearch(keywords: string): Promise; - /** * Browsing goods search page and collect links to those goods. + * @param keywordsList - The keywords list to search for on Amazon. + * @param progress The callback that receive remaining keywords as the parameter. */ - wanderSearchPage(): Promise; + runSearchPageTask( + keywordsList: string[], + progress?: (remains: string[]) => Promise, + ): Promise; /** * Browsing goods detail page and collect target information. - * @param entry Product link or Amazon Standard Identification Number. + * @param asins Amazon Standard Identification Numbers. + * @param progress The callback that receive remaining asins as the parameter. */ - wanderDetailPage(entry: string | string[]): Promise; + runDetaiPageTask(asins: string[], progress?: (remains: string[]) => Promise): Promise; /** * Stop the worker. diff --git a/src/logic/task-queue.ts b/src/logic/task-queue.ts index 1b6468f..775cf6e 100644 --- a/src/logic/task-queue.ts +++ b/src/logic/task-queue.ts @@ -12,22 +12,16 @@ export type TaskExecutionResult = message: string; }; -export interface TaskInit< - T = undefined, - F extends (...args: unknown[]) => Promise = (...args: unknown[]) => Promise, -> { - func: F; - args?: Parameters; +export interface TaskInit { + func: (...args: P) => Promise; + args?: P; callback?: (result: TaskExecutionResult) => Promise | void; } -export class Task< - T = undefined, - F extends (...args: unknown[]) => Promise = (...args: unknown[]) => Promise, -> { +export class Task { private _name: string; - private _func: F; - private _args: Parameters; + private _func: (...args: P) => Promise; + private _args: P; private _status: 'initialization' | 'running' | 'success' | 'failure' = 'initialization'; private _result: TaskExecutionResult | null = null; private _callback: ((result: TaskExecutionResult) => Promise | void) | undefined; @@ -44,10 +38,10 @@ export class Task< return this._result; } - constructor(name: string, init: TaskInit) { + constructor(name: string, init: TaskInit) { this._name = name; this._func = init.func; - this._args = init.args ?? ([] as unknown as Parameters); + this._args = init.args ?? ([] as unknown as P); this._callback = init.callback; } @@ -81,7 +75,7 @@ export class Task< } export class TaskQueue { - private _queue: Task[] = []; + private _queue: Task[] = []; private _running = false; private _channel: Emittery<{ interrupt: undefined; start: undefined; stop: undefined }> = new Emittery(); @@ -99,7 +93,11 @@ export class TaskQueue { return this._running; } - public add(task: Task) { + public get channel() { + return this._channel; + } + + public add(task: Task) { this._queue.push(task); } @@ -141,41 +139,3 @@ export interface TaskController { */ readonly taskQueue: TaskQueue; } - -/** - * A decorator function that wraps a method to manage its execution as a task in a task queue. - * - * This function takes a method and returns a new method that, when called, will create a - * `Task` and add it to the `taskQueue` of the `TaskController`. The original method will be - * executed asynchronously, and the result will be resolved or rejected based on the task's - * outcome. - */ -export function taskUnit( - target: (this: TaskController, ...args: any[]) => Promise, - context: ClassMethodDecoratorContext, -): (this: TaskController, ...args: any[]) => Promise { - // target 就是当前被装饰的 class 方法 - const originalMethod = target; - // 定义一个新方法 - const decoratedMethod = async function (this: TaskController, ...args: any[]) { - return new Promise((resolve, reject) => { - const task = new Task(context.name.toString(), { - func: (o, ...a) => originalMethod.call(o, ...a), - args: [this, ...args], - callback: (r) => { - if (r.status === 'success') { - resolve(r.result); - } else if (r.status === 'failure') { - reject(r.message); - } - }, - }); - this.taskQueue.add(task); - if (!this.taskQueue.running) { - this.taskQueue.start(); - } - }); - }; - // 返回装饰后的方法 - return decoratedMethod; -} diff --git a/src/options/index.html b/src/options/index.html index c1bafd5..67d2cd4 100644 --- a/src/options/index.html +++ b/src/options/index.html @@ -3,7 +3,7 @@ - Options + 结果页
diff --git a/src/sidepanel/DetailPageWorker.vue b/src/sidepanel/DetailPageWorker.vue index f73521e..1ecef0b 100644 --- a/src/sidepanel/DetailPageWorker.vue +++ b/src/sidepanel/DetailPageWorker.vue @@ -111,11 +111,9 @@ const handleFetchInfoFromPage = () => { content: '开始数据采集', }, ]; - while (asinList.length > 0) { - const asin = asinList.shift()!; - await worker.wanderDetailPage(asin); - asinInputText.value = asinList.join('\n'); // Update Input Text - } + await worker.runDetaiPageTask(asinList, async (remains) => { + asinInputText.value = remains.join('\n'); + }); timelines.value.push({ type: 'info', title: '结束', @@ -135,6 +133,12 @@ const handleFetchInfoFromPage = () => { }); }; +const handleInterrupt = () => { + if (!running.value) return; + worker.stop(); + message.info('已触发中断,正在等待当前任务完成。', { duration: 2000 }); +}; + const createOrUpdateDetailItem = (info: AmazonDetailItem) => { const targetIndex = detailItems.value.findLastIndex((item) => info.asin === item.asin); if (targetIndex > -1) { @@ -154,17 +158,17 @@ const createOrUpdateDetailItem = (info: AmazonDetailItem) => {

Detail Page

-
+
- + 导入 - + @@ -178,20 +182,27 @@ const createOrUpdateDetailItem = (info: AmazonDetailItem) => { style="padding-top: 0px" > - + 开始 + + + 停止 +
-
+
警告,在插件运行期间请勿与浏览器交互。
@@ -230,14 +241,14 @@ const createOrUpdateDetailItem = (info: AmazonDetailItem) => { } .running-tip-section { - margin: 0 0 10px 0; + margin: 10px 0 0 0; height: 100px; border-radius: 10px; cursor: wait; } .progress-report { - margin-top: 20px; + margin-top: 10px; width: 95%; } } diff --git a/src/sidepanel/SearchPageWorker.vue b/src/sidepanel/SearchPageWorker.vue index 87099db..6c602dd 100644 --- a/src/sidepanel/SearchPageWorker.vue +++ b/src/sidepanel/SearchPageWorker.vue @@ -40,28 +40,45 @@ const timelines = ref< >([]); const handleFetchInfoFromPage = async () => { + if (keywordsList.value.length === 0) { + return; + } + const kws = unref(keywordsList); running.value = true; - timelines.value = []; - for (const keywords of keywordsList.value.filter((k) => k.trim() !== '')) { - timelines.value.push({ + timelines.value = [ + { type: 'info', title: '开始', time: new Date().toLocaleString(), - content: `开始关键词:${keywords} 数据采集`, - }); - //#region start page worker - await worker.doSearch(keywords); - await worker.wanderSearchPage(); - //#endregion - timelines.value.push({ - type: 'info', - title: '结束', - time: new Date().toLocaleString(), - content: `关键词: ${keywords} 数据采集完成`, - }); - } + content: `关键词: ${kws[0]} 数据采集开始`, + }, + ]; + timelines.value.push(); + await worker.runSearchPageTask(kws, async (remains) => { + if (remains.length > 0) { + timelines.value.push({ + type: 'info', + title: '开始', + time: new Date().toLocaleString(), + content: `关键词: ${remains[0]} 数据采集开始`, + }); + keywordsList.value = remains; + } + }); + timelines.value.push({ + type: 'info', + title: '结束', + time: new Date().toLocaleString(), + content: `搜索任务结束`, + }); running.value = false; }; + +const handleInterrupt = () => { + if (!running.value) return; + worker.stop(); + message.info('已触发中断,正在等待当前任务完成。', { duration: 2000 }); +}; diff --git a/src/sidepanel/index.html b/src/sidepanel/index.html index ecc7f1d..3be584b 100644 --- a/src/sidepanel/index.html +++ b/src/sidepanel/index.html @@ -5,7 +5,7 @@ Sidepanel - +
diff --git a/vite.config.mts b/vite.config.mts index 14bfd29..2b6f92c 100644 --- a/vite.config.mts +++ b/vite.config.mts @@ -95,7 +95,9 @@ export default defineConfig(({ command }) => ({ rollupOptions: { input: { sidepanel: r('src/sidepanel/index.html'), + options: r('src/options/index.html'), }, + output: {}, }, }, test: {