diff --git a/.gitignore b/.gitignore index 41502ef..cedc91d 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,5 @@ src/components.d.ts .eslintcache **/test_data.ts -**/TestPanel.vue \ No newline at end of file +**/TestPanel.vue +**/test_notebook.ipynb \ No newline at end of file diff --git a/shim.d.ts b/shim.d.ts index 5019d15..c7dabe1 100644 --- a/shim.d.ts +++ b/shim.d.ts @@ -4,5 +4,6 @@ declare module 'webext-bridge' { export interface ProtocolMap { // define message protocol types // see https://github.com/antfu/webext-bridge#type-safe-protocols + 'tab-update': { tabId: number }; } } diff --git a/src/logic/page-worker/index.ts b/src/logic/page-worker/index.ts index 692a086..1f2cfed 100644 --- a/src/logic/page-worker/index.ts +++ b/src/logic/page-worker/index.ts @@ -3,6 +3,10 @@ import type { AmazonGoodsLinkItem, AmazonPageWorker, AmazonPageWorkerEvents } fr import Browser from 'webextension-polyfill'; import { exec } from '../execute-script'; +/** + * AmazonPageWorkerImpl can run on background & sidepanel & popup, + * **can't** run on content script! + */ class AmazonPageWorkerImpl implements AmazonPageWorker { private static _instance: AmazonPageWorker | null = null; public static getInstance() { @@ -15,6 +19,13 @@ class AmazonPageWorkerImpl implements AmazonPageWorker { readonly channel = new Emittery(); + private async getCurrentTab(): Promise { + const tab = await browser.tabs + .query({ active: true, currentWindow: true }) + .then((tabs) => tabs[0]); + return tab; + } + public async doSearch(keywords: string): Promise { const url = new URL('https://www.amazon.com/s'); url.searchParams.append('k', keywords); @@ -37,7 +48,7 @@ class AmazonPageWorkerImpl implements AmazonPageWorker { await new Promise((resolve) => setTimeout(resolve, 500 + ~~(500 * Math.random()))); while (!document.querySelector('.s-pagination-strip')) { window.scrollBy(0, ~~(Math.random() * 500) + 500); - await new Promise((resolve) => setTimeout(resolve, 10)); + await new Promise((resolve) => setTimeout(resolve, ~~(Math.random() * 50) + 50)); } }); // #endregion @@ -124,22 +135,112 @@ class AmazonPageWorkerImpl implements AmazonPageWorker { } public async wanderSearchList(): Promise { - const tab = await browser.tabs - .query({ active: true, currentWindow: true }) - .then((tabs) => tabs[0]); + const tab = await this.getCurrentTab(); let stopSignal = false; + const stop = async (_: unknown): Promise => { + stopSignal = true; + }; + this.channel.on('error', stop); let result = { hasNextPage: true, data: [] as AmazonGoodsLinkItem[] }; while (result.hasNextPage && !stopSignal) { result = await this.wanderSearchSinglePage(tab); this.channel.emit('item-links-collected', { objs: result.data }); - this.channel.on('error', () => { - stopSignal = true; - }); } + this.channel.off('error', stop); return new Promise((resolve) => setTimeout(resolve, 1000)); } - public async wanderDetailPage(): Promise {} + public async wanderDetailPage(asin: string): Promise { + const tab = await this.getCurrentTab(); + if (!tab.url?.includes(`/dp/${asin}`)) { + await browser.tabs.update(tab.id!, { + url: `https://www.amazon.com/dp/${asin}?th=1`, + }); + } + //#region Await Production Introduction Element Loaded and Determine Page Pattern + const pattern = await exec(tab.id!, async () => { + let targetNode = document.querySelector('#prodDetails, #detailBulletsWrapper_feature_div'); + while (!targetNode) { + window.scrollBy(0, ~~(Math.random() * 500) + 500); + await new Promise((resolve) => setTimeout(resolve, ~~(Math.random() * 50) + 50)); + targetNode = document.querySelector('#prodDetails, #detailBulletsWrapper_feature_div'); + } + return targetNode.getAttribute('id') === 'prodDetails' ? 'pattern-1' : 'pattern-2'; + }); + //#endregion + //#region Fetch Rating Info + const ratingInfo = await exec(tab.id!, async () => { + const review = document.querySelector('#averageCustomerReviews'); + const rating = Number( + review?.querySelector('#acrPopover')?.getAttribute('title')?.split(' ')[0], + ); + const ratingCount = Number( + review + ?.querySelector('#acrCustomerReviewText') + ?.getAttribute('aria-label') + ?.split(' ')[0] + ?.replace(',', ''), + ); + return { + rating: isNaN(rating) || rating == 0 ? 0 : rating, + ratingCount: isNaN(ratingCount) || ratingCount == 0 ? 0 : ratingCount, + }; + }); + if (ratingInfo && (ratingInfo.rating !== 0 || ratingInfo.ratingCount !== 0)) { + this.channel.emit('item-rating-collected', { + asin, + ...ratingInfo, + }); + } + //#endregion + //#region Fetch Category Rank Info + let rawRankingText: string | null = null; + switch (pattern) { + case 'pattern-1': + rawRankingText = await exec(tab.id!, async () => { + const xpathExp = `//div[@id='prodDetails']//table/tbody/tr[th[1][contains(text(), 'Best Sellers Rank')]]/td`; + const targetNode = document.evaluate( + xpathExp, + document, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null, + ).singleNodeValue as HTMLDivElement | null; + return targetNode?.innerText || null; + }); + break; + case 'pattern-2': + rawRankingText = await exec(tab.id!, async () => { + const xpathExp = `//div[@id='detailBulletsWrapper_feature_div']//ul[.//li[contains(., 'Best Sellers Rank')]]//span[@class='a-list-item']`; + const targetNode = document.evaluate( + xpathExp, + document, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null, + ).singleNodeValue as HTMLDivElement | null; + return targetNode?.innerText || null; + }); + break; + } + if (rawRankingText) { + const [category1Statement, category2Statement] = rawRankingText.split('\n'); + const category1Ranking = Number(/(?<=#)\d+/.exec(category1Statement)?.[0]) || null; + const category1Name = /(?<=in\s).+(?=\s\(See)/.exec(category1Statement)?.[0] || null; + const category2Ranking = Number(/(?<=#)\d+/.exec(category2Statement)?.[0]) || null; + const category2Name = /(?<=in\s).+/.exec(category2Statement)?.[0] || null; + this.channel.emit('item-category-rank-collected', { + asin, + category1: ![category1Name, category1Ranking].includes(null) + ? { name: category1Name!, rank: category1Ranking! } + : undefined, + category2: ![category2Name, category2Ranking].includes(null) + ? { name: category2Name!, rank: category2Ranking! } + : undefined, + }); + } + //#endregion + } } class PageWorkerFactory { diff --git a/src/logic/page-worker/types.d.ts b/src/logic/page-worker/types.d.ts index 50c0028..010fc9b 100644 --- a/src/logic/page-worker/types.d.ts +++ b/src/logic/page-worker/types.d.ts @@ -4,10 +4,28 @@ type AmazonGoodsLinkItem = { link: string; title: string }; interface AmazonPageWorkerEvents { /** - * This event is used to collect links to items on the Amazon search page. + * The event is fired when worker collected links to items on the Amazon search page. */ ['item-links-collected']: { objs: AmazonGoodsLinkItem[] }; + /** + * The event is fired when worker collected goods' rating on the Amazon detail page. + */ + ['item-rating-collected']: { + asin: string; + rating: number; + ratingCount: number; + }; + + /** + * The event is fired when worker + */ + ['item-category-rank-collected']: { + asin: string; + category1?: { name: string; rank: number }; + category2?: { name: string; rank: number }; + }; + /** * Error event that occurs when there is an issue with the Amazon page worker. */ @@ -35,6 +53,7 @@ interface AmazonPageWorker { /** * Browsing goods detail page and collect target information. + * @param asin Product indentification */ - wanderDetailPage(): Promise; + wanderDetailPage(asin: string): Promise; } diff --git a/src/sidepanel/App.vue b/src/sidepanel/App.vue index 311d10d..652a479 100644 --- a/src/sidepanel/App.vue +++ b/src/sidepanel/App.vue @@ -1,11 +1,14 @@