From a2402423316c3b557e5b7dbd026307cb9a1d8767 Mon Sep 17 00:00:00 2001 From: johnathan <952508490@qq.com> Date: Thu, 12 Jun 2025 10:06:25 +0800 Subject: [PATCH] Update --- src/composables/useCurrentUrl.ts | 21 +++ src/composables/usePageContext.ts | 4 +- src/logic/page-worker/amazon.ts | 284 ++++++++++++++++++++++++++++ src/logic/page-worker/homedepot.ts | 75 ++++++++ src/logic/page-worker/index.ts | 287 +---------------------------- src/logic/page-worker/types.d.ts | 50 ++++- src/logic/web-injectors.ts | 103 ++++++++--- src/sidepanel/DetailPageEntry.vue | 2 +- src/sidepanel/ReviewPageEntry.vue | 2 +- src/sidepanel/SearchPageEntry.vue | 2 +- 10 files changed, 504 insertions(+), 326 deletions(-) create mode 100644 src/composables/useCurrentUrl.ts create mode 100644 src/logic/page-worker/amazon.ts create mode 100644 src/logic/page-worker/homedepot.ts diff --git a/src/composables/useCurrentUrl.ts b/src/composables/useCurrentUrl.ts new file mode 100644 index 0000000..5c90301 --- /dev/null +++ b/src/composables/useCurrentUrl.ts @@ -0,0 +1,21 @@ +export function useCurrentUrl() { + const currentUrl = ref(''); + + const updateUrl = async () => { + const tab = await browser.tabs.query({ active: true, currentWindow: true }).then((ts) => ts[0]); + currentUrl.value = tab.url || ''; + }; + + onMounted(() => { + updateUrl(); + browser.tabs.onUpdated.addListener(updateUrl); + browser.tabs.onHighlighted.addListener(updateUrl); + }); + + onUnmounted(() => { + browser.tabs.onUpdated.removeListener(updateUrl); + browser.tabs.onHighlighted.removeListener(updateUrl); + }); + + return { currentUrl }; +} diff --git a/src/composables/usePageContext.ts b/src/composables/usePageContext.ts index 5c6cf7e..6202dfc 100644 --- a/src/composables/usePageContext.ts +++ b/src/composables/usePageContext.ts @@ -1,4 +1,4 @@ -export const usePageContext = () => { +export function usePageContext() { const pageContext = document.location.pathname.split('/')[2] as 'sidepanel' | 'options'; return { pageContext }; -}; +} diff --git a/src/logic/page-worker/amazon.ts b/src/logic/page-worker/amazon.ts new file mode 100644 index 0000000..18cd5a3 --- /dev/null +++ b/src/logic/page-worker/amazon.ts @@ -0,0 +1,284 @@ +import Emittery from 'emittery'; +import type { AmazonDetailItem, AmazonPageWorker, AmazonPageWorkerEvents } from './types'; +import type { Tabs } from 'webextension-polyfill'; +import { withErrorHandling } from '../error-handler'; +import { + AmazonDetailPageInjector, + AmazonReviewPageInjector, + AmazonSearchPageInjector, +} from '../web-injectors'; +import { isForbiddenUrl } from '~/env'; + +/** + * AmazonPageWorkerImpl can run on background & sidepanel & popup, + * **can't** run on content script! + */ +class AmazonPageWorkerImpl implements AmazonPageWorker { + //#region Singleton + private static _instance: AmazonPageWorker | null = null; + public static getInstance() { + if (this._instance === null) { + this._instance = new AmazonPageWorkerImpl(); + } + return this._instance; + } + //#endregion + + private constructor() {} + + private readonly _controlChannel = new Emittery<{ interrupt: undefined }>(); + public readonly channel = new Emittery(); + + private async getCurrentTab(): Promise { + const tab = await browser.tabs + .query({ active: true, currentWindow: true }) + .then((tabs) => tabs[0]); + return tab; + } + + private async createNewTab(url: string): Promise { + const tab = await browser.tabs.create({ url, active: true }); + return tab; + } + + private async wanderSearchSinglePage(tab: Tabs.Tab) { + const injector = new AmazonSearchPageInjector(tab); + // #region Wait for the Next button to appear, indicating that the product items have finished loading + await injector.waitForPageLoaded(); + // #endregion + // #region Determine the type of product search page https://github.com/primedigitaltech/azon_seeker/issues/1 + const pagePattern = await injector.getPagePattern(); + // #endregion + // #region Retrieve key nodes and their information from the critical product search page + const data = await injector.getPageData(pagePattern); + // #endregion + // #region get current page + const page = await injector.getCurrentPage(); + // #endregion + // #region Determine if it is the last page, otherwise navigate to the next page + const hasNextPage = await injector.determineHasNextPage(); + // #endregion + await new Promise((resolve) => setTimeout(resolve, 1000)); + if (data === null || typeof hasNextPage !== 'boolean') { + this.channel.emit('error', { message: '爬取单页信息失败', url: tab.url }); + throw new Error('爬取单页信息失败'); + } + return { data, hasNextPage, page }; + } + + @withErrorHandling + public async doSearch(keywords: string): Promise { + const url = new URL('https://www.amazon.com/s'); + url.searchParams.append('k', keywords); + let tab = await this.getCurrentTab(); + if (!tab.url?.startsWith('http')) { + tab = await this.createNewTab('https://www.amazon.com/'); + tab.url = 'https://www.amazon.com/'; + } + const currentUrl = new URL(tab.url!); + if (currentUrl.hostname !== url.hostname || currentUrl.searchParams.get('k') !== keywords) { + tab = await browser.tabs.update(tab.id, { url: url.toString(), active: true }); + await new Promise((resolve) => setTimeout(resolve, 1000)); + } + return url.toString(); + } + + @withErrorHandling + public async wanderSearchPage(): Promise { + const tab = await this.getCurrentTab(); + let offset = 0; + while (true) { + const { hasNextPage, data, page } = await this.wanderSearchSinglePage(tab); + const keywords = new URL(tab.url!).searchParams.get('k')!; + const objs = data.map((r, i) => ({ + ...r, + keywords, + page, + rank: offset + 1 + i, + createTime: new Date().toLocaleString(), + asin: /(?<=\/dp\/)[A-Z0-9]{10}/.exec(r.link as string)![0], + })); + this.channel.emit('item-links-collected', { objs }); + offset += data.length; + if (!hasNextPage) { + break; + } + } + return new Promise((resolve) => setTimeout(resolve, 1000)); + } + + @withErrorHandling + public async wanderDetailPage(entry: string) { + //#region Initial Meta Info + const params = { asin: '', url: '' }; + if (entry.match(/^https?:\/\/www\.amazon\.com.*\/dp\/[A-Z0-9]{10}/)) { + const [asin] = /\/\/dp\/[A-Z0-9]{10}/.exec(entry)!; + params.asin = asin; + params.url = entry; + } else if (entry.match(/^[A-Z0-9]{10}$/)) { + params.asin = entry; + params.url = `https://www.amazon.com/dp/${entry}`; + } + let tab = await this.getCurrentTab(); + if (!tab.url || isForbiddenUrl(tab.url)) { + tab = await this.createNewTab(params.url); + } else { + tab = await browser.tabs.update(tab.id, { + url: params.url, + }); + } + const injector = new AmazonDetailPageInjector(tab); + //#endregion + //#region Await Production Introduction Element Loaded + await injector.waitForPageLoaded(); + await new Promise((resolve) => setTimeout(resolve, 3000)); // Wait 3 seconds. + //#endregion + //#region Fetch Base Info + const baseInfo = await injector.getBaseInfo(); + this.channel.emit('item-base-info-collected', { + asin: params.asin, + title: baseInfo.title, + price: baseInfo.price, + }); + //#endregion + //#region Fetch Rating Info + const ratingInfo = await injector.getRatingInfo(); + if (ratingInfo && (ratingInfo.rating !== 0 || ratingInfo.ratingCount !== 0)) { + this.channel.emit('item-rating-collected', { + asin: params.asin, + ...ratingInfo, + }); + } + //#endregion + //#region Fetch Category Rank Info + let rawRankingText: string | null = await injector.getRankText(); + if (rawRankingText) { + const info: Pick = {}; + let statement = /#[0-9,]+\sin\s\S[\s\w',\.&\(\)\-]+/.exec(rawRankingText)?.[0]; + if (statement) { + const name = /(?<=in\s).+/.exec(statement)?.[0].replace(/\s\(See\sTop.+\)/, ''); + const rank = Number(/(?<=#)[0-9,]+/.exec(statement)?.[0].replaceAll(',', '')); + if (name && !Number.isNaN(rank)) { + info['category1'] = { name, rank }; + } + rawRankingText = rawRankingText.replace(statement, ''); + } + statement = /#[0-9,]+\sin\s\S[\s\w',\.&\(\)\-]+/.exec(rawRankingText)?.[0]; + if (statement) { + const name = /(?<=in\s).+/.exec(statement)?.[0].replace(/[\s]+$/, ''); + const rank = Number(/(?<=#)[0-9,]+/.exec(statement)?.[0].replaceAll(',', '')); + if (name && !Number.isNaN(rank)) { + info['category2'] = { name, rank }; + } + } + this.channel.emit('item-category-rank-collected', { + asin: params.asin, + ...info, + }); + } + //#endregion + //#region Fetch Goods' Images + const imageUrls = await injector.getImageUrls(); + imageUrls.length > 0 && + this.channel.emit('item-images-collected', { + asin: params.asin, + imageUrls: Array.from(new Set(imageUrls)), + }); + await new Promise((resolve) => setTimeout(resolve, 2000)); // Wait 2 seconds. + //#endregion + //#region Fetch Top Reviews + // const reviews = await injector.getTopReviews(); + // reviews.length > 0 && + // this.channel.emit('item-top-reviews-collected', { + // asin: params.asin, + // topReviews: reviews, + // }); + //#endregion + } + + @withErrorHandling + public async wanderReviewPage(asin: string) { + const url = new URL( + `https://www.amazon.com/product-reviews/${asin}/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews`, + ); + const tab = await this.createNewTab(url.toString()); + const injector = new AmazonReviewPageInjector(tab); + await injector.waitForPageLoad(); + for (let star = 1; star <= 5; star++) { + await injector.showStarsDropDownMenu(); + await injector.selectStar(star); + while (true) { + await injector.waitForPageLoad(); + const reviews = await injector.getSinglePageReviews(); + reviews.length > 0 && this.channel.emit('item-review-collected', { asin, reviews }); + const hasNextPage = await injector.jumpToNextPageIfExist(); + if (!hasNextPage) { + break; + } + } + } + setTimeout(() => browser.tabs.remove(tab.id!), 1000); + } + + public async runSearchPageTask( + keywordsList: string[], + progress?: (remains: string[]) => Promise, + ): Promise { + let remains = [...keywordsList]; + let interrupt = false; + const unsubscribe = this._controlChannel.on('interrupt', () => { + interrupt = true; + }); + while (remains.length > 0 && !interrupt) { + const kw = remains.shift()!; + await this.doSearch(kw); + await this.wanderSearchPage(); + progress && progress(remains); + } + unsubscribe(); + } + + public async runDetaiPageTask( + asins: string[], + progress?: (remains: string[]) => Promise, + ): Promise { + const remains = [...asins]; + let interrupt = false; + const unsubscribe = this._controlChannel.on('interrupt', () => { + interrupt = true; + }); + while (remains.length > 0 && !interrupt) { + const asin = remains.shift()!; + await this.wanderDetailPage(asin); + progress && progress(remains); + } + unsubscribe(); + } + + public async runReviewPageTask( + asins: string[], + progress?: (remains: string[]) => Promise, + ): Promise { + const remains = [...asins]; + let interrupt = false; + const unsubscribe = this._controlChannel.on('interrupt', () => { + interrupt = true; + }); + while (remains.length > 0 && !interrupt) { + const asin = remains.shift()!; + await this.wanderReviewPage(asin); + progress && progress(remains); + } + unsubscribe(); + } + + public stop(): Promise { + return this._controlChannel.emit('interrupt'); + } +} + +export default { + useAmazonPageWorker(): AmazonPageWorker { + return AmazonPageWorkerImpl.getInstance(); + }, +}; diff --git a/src/logic/page-worker/homedepot.ts b/src/logic/page-worker/homedepot.ts new file mode 100644 index 0000000..8dbf648 --- /dev/null +++ b/src/logic/page-worker/homedepot.ts @@ -0,0 +1,75 @@ +import Emittery from 'emittery'; +import { HomedepotEvents, HomedepotWorker } from './types'; +import { Tabs } from 'webextension-polyfill'; +import { isForbiddenUrl } from '~/env'; +import { withErrorHandling } from '../error-handler'; +import { HomedepotDetailPageInjector } from '../web-injectors'; + +class HomedepotWorkerImpl implements HomedepotWorker { + private static _instance: HomedepotWorker | null = null; + public static getInstance() { + if (!HomedepotWorkerImpl._instance) { + HomedepotWorkerImpl._instance = new HomedepotWorkerImpl(); + } + return HomedepotWorkerImpl._instance as HomedepotWorker; + } + private constructor() {} + + readonly channel: Emittery = new Emittery(); + + private readonly _controlChannel = new Emittery<{ interrupt: undefined }>(); + + private async getCurrentTab(): Promise { + const tab = await browser.tabs + .query({ active: true, currentWindow: true }) + .then((tabs) => tabs[0]); + return tab; + } + + private async createNewTab(url?: string): Promise { + const tab = await browser.tabs.create({ url, active: true }); + return tab; + } + + @withErrorHandling + private async wanderingDetailPage(OSMID: string) { + const url = `https://www.homedepot.com/p/${OSMID}`; + let tab = await this.getCurrentTab(); + if (!tab.url || isForbiddenUrl(tab.url)) { + tab = await this.createNewTab(url); + } else { + await browser.tabs.update(tab.id!, { url }); + } + const injector = new HomedepotDetailPageInjector(tab); + await injector.waitForPageLoad(); + const info = await injector.getInfo(); + this.channel.emit('detail-item-collected', { item: { OSMID, ...info } }); + } + + async runDetailPageTask( + OSMIDs: string[], + progress?: (remains: string[]) => Promise | void, + ): Promise { + const remains = [...OSMIDs]; + let interrupt = false; + const unsubscribe = this._controlChannel.on('interrupt', () => { + interrupt = true; + }); + while (remains.length > 0 && !interrupt) { + const OSMIDs = remains.shift()!; + await this.wanderingDetailPage(OSMIDs); + progress && progress(remains); + } + unsubscribe(); + } + + stop(): Promise { + return this._controlChannel.emit('interrupt'); + } +} + +export default { + useHomedepotWorker() { + return HomedepotWorkerImpl.getInstance(); + }, +}; diff --git a/src/logic/page-worker/index.ts b/src/logic/page-worker/index.ts index f32aa4a..579e91a 100644 --- a/src/logic/page-worker/index.ts +++ b/src/logic/page-worker/index.ts @@ -1,285 +1,4 @@ -import Emittery from 'emittery'; -import type { AmazonDetailItem, AmazonPageWorker, AmazonPageWorkerEvents } from './types'; -import type { Tabs } from 'webextension-polyfill'; -import { withErrorHandling } from '../error-handler'; -import { - AmazonDetailPageInjector, - AmazonReviewPageInjector, - AmazonSearchPageInjector, -} from '../web-injectors'; +import amazon from './amazon'; +import homedepot from './homedepot'; -/** - * AmazonPageWorkerImpl can run on background & sidepanel & popup, - * **can't** run on content script! - */ -class AmazonPageWorkerImpl implements AmazonPageWorker { - //#region Singleton - private static _instance: AmazonPageWorker | null = null; - public static getInstance() { - if (this._instance === null) { - this._instance = new AmazonPageWorkerImpl(); - } - return this._instance; - } - //#endregion - - private constructor() {} - - private _controlChannel = new Emittery<{ interrupt: undefined }>(); - public readonly channel = new Emittery(); - - private async getCurrentTab(): Promise { - const tab = await browser.tabs - .query({ active: true, currentWindow: true }) - .then((tabs) => tabs[0]); - return tab; - } - - private async createNewTab(url: string): Promise { - const tab = await browser.tabs.create({ - url, - active: true, - }); - return tab; - } - - private async wanderSearchSinglePage(tab: Tabs.Tab) { - const injector = new AmazonSearchPageInjector(tab); - // #region Wait for the Next button to appear, indicating that the product items have finished loading - await injector.waitForPageLoaded(); - // #endregion - // #region Determine the type of product search page https://github.com/primedigitaltech/azon_seeker/issues/1 - const pagePattern = await injector.getPagePattern(); - // #endregion - // #region Retrieve key nodes and their information from the critical product search page - const data = await injector.getPageData(pagePattern); - // #endregion - // #region get current page - const page = await injector.getCurrentPage(); - // #endregion - // #region Determine if it is the last page, otherwise navigate to the next page - const hasNextPage = await injector.determineHasNextPage(); - // #endregion - await new Promise((resolve) => setTimeout(resolve, 1000)); - if (data === null || typeof hasNextPage !== 'boolean') { - this.channel.emit('error', { message: '爬取单页信息失败', url: tab.url }); - throw new Error('爬取单页信息失败'); - } - return { data, hasNextPage, page }; - } - - @withErrorHandling - public async doSearch(keywords: string): Promise { - const url = new URL('https://www.amazon.com/s'); - url.searchParams.append('k', keywords); - let tab = await this.getCurrentTab(); - if (!tab.url?.startsWith('http')) { - tab = await this.createNewTab('https://www.amazon.com/'); - tab.url = 'https://www.amazon.com/'; - } - const currentUrl = new URL(tab.url!); - if (currentUrl.hostname !== url.hostname || currentUrl.searchParams.get('k') !== keywords) { - tab = await browser.tabs.update(tab.id, { url: url.toString(), active: true }); - await new Promise((resolve) => setTimeout(resolve, 1000)); - } - return url.toString(); - } - - @withErrorHandling - public async wanderSearchPage(): Promise { - let tab = await this.getCurrentTab(); - let offset = 0; - while (true) { - const { hasNextPage, data, page } = await this.wanderSearchSinglePage(tab); - const keywords = new URL(tab.url!).searchParams.get('k')!; - const objs = data.map((r, i) => ({ - ...r, - keywords, - page, - rank: offset + 1 + i, - createTime: new Date().toLocaleString(), - asin: /(?<=\/dp\/)[A-Z0-9]{10}/.exec(r.link as string)![0], - })); - this.channel.emit('item-links-collected', { objs }); - offset += data.length; - if (!hasNextPage) { - break; - } - } - return new Promise((resolve) => setTimeout(resolve, 1000)); - } - - @withErrorHandling - public async wanderDetailPage(entry: string) { - //#region Initial Meta Info - const params = { asin: '', url: '' }; - if (entry.match(/^https?:\/\/www\.amazon\.com.*\/dp\/[A-Z0-9]{10}/)) { - const [asin] = /\/\/dp\/[A-Z0-9]{10}/.exec(entry)!; - params.asin = asin; - params.url = entry; - } else if (entry.match(/^[A-Z0-9]{10}$/)) { - params.asin = entry; - params.url = `https://www.amazon.com/dp/${entry}`; - } - let tab = await this.getCurrentTab(); - if (!tab.url || !tab.url.startsWith('http')) { - tab = await this.createNewTab(params.url); - } else { - tab = await browser.tabs.update(tab.id, { - url: params.url, - }); - } - const injector = new AmazonDetailPageInjector(tab); - //#endregion - //#region Await Production Introduction Element Loaded - await injector.waitForPageLoaded(); - await new Promise((resolve) => setTimeout(resolve, 3000)); // Wait 3 seconds. - //#endregion - //#region Fetch Base Info - const baseInfo = await injector.getBaseInfo(); - this.channel.emit('item-base-info-collected', { - asin: params.asin, - title: baseInfo.title, - price: baseInfo.price, - }); - //#endregion - //#region Fetch Rating Info - const ratingInfo = await injector.getRatingInfo(); - if (ratingInfo && (ratingInfo.rating !== 0 || ratingInfo.ratingCount !== 0)) { - this.channel.emit('item-rating-collected', { - asin: params.asin, - ...ratingInfo, - }); - } - //#endregion - //#region Fetch Category Rank Info - let rawRankingText: string | null = await injector.getRankText(); - if (rawRankingText) { - const info: Pick = {}; - let statement = /#[0-9,]+\sin\s\S[\s\w',\.&\(\)\-]+/.exec(rawRankingText)?.[0]; - if (statement) { - const name = /(?<=in\s).+/.exec(statement)?.[0].replace(/\s\(See\sTop.+\)/, ''); - const rank = Number(/(?<=#)[0-9,]+/.exec(statement)?.[0].replaceAll(',', '')); - if (name && !Number.isNaN(rank)) { - info['category1'] = { name, rank }; - } - rawRankingText = rawRankingText.replace(statement, ''); - } - statement = /#[0-9,]+\sin\s\S[\s\w',\.&\(\)\-]+/.exec(rawRankingText)?.[0]; - if (statement) { - const name = /(?<=in\s).+/.exec(statement)?.[0].replace(/[\s]+$/, ''); - const rank = Number(/(?<=#)[0-9,]+/.exec(statement)?.[0].replaceAll(',', '')); - if (name && !Number.isNaN(rank)) { - info['category2'] = { name, rank }; - } - } - this.channel.emit('item-category-rank-collected', { - asin: params.asin, - ...info, - }); - } - //#endregion - //#region Fetch Goods' Images - const imageUrls = await injector.getImageUrls(); - imageUrls.length > 0 && - this.channel.emit('item-images-collected', { - asin: params.asin, - imageUrls: Array.from(new Set(imageUrls)), - }); - await new Promise((resolve) => setTimeout(resolve, 2000)); // Wait 2 seconds. - //#endregion - //#region Fetch Top Reviews - const reviews = await injector.getTopReviews(); - reviews.length > 0 && - this.channel.emit('item-top-reviews-collected', { - asin: params.asin, - topReviews: reviews, - }); - //#endregion - } - - @withErrorHandling - public async wanderReviewPage(asin: string) { - const url = new URL( - `https://www.amazon.com/product-reviews/${asin}/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews`, - ); - const tab = await this.createNewTab(url.toString()); - const injector = new AmazonReviewPageInjector(tab); - await injector.waitForPageLoad(); - for (let star = 1; star <= 5; star++) { - await injector.showStarsDropDownMenu(); - await injector.selectStar(star); - while (true) { - await injector.waitForPageLoad(); - const reviews = await injector.getSinglePageReviews(); - reviews.length > 0 && this.channel.emit('item-review-collected', { asin, reviews }); - const hasNextPage = await injector.jumpToNextPageIfExist(); - if (!hasNextPage) { - break; - } - } - } - setTimeout(() => browser.tabs.remove(tab.id!), 1000); - } - - public async runSearchPageTask( - keywordsList: string[], - progress?: (remains: string[]) => Promise, - ): Promise { - let remains = [...keywordsList]; - let interrupt = false; - const unsubscribe = this._controlChannel.on('interrupt', () => { - interrupt = true; - }); - while (remains.length > 0 && !interrupt) { - const kw = remains.shift()!; - await this.doSearch(kw); - await this.wanderSearchPage(); - progress && progress(remains); - } - unsubscribe(); - } - - public async runDetaiPageTask( - asins: string[], - progress?: (remains: string[]) => Promise, - ): Promise { - let remains = [...asins]; - let interrupt = false; - const unsubscribe = this._controlChannel.on('interrupt', () => { - interrupt = true; - }); - while (remains.length > 0 && !interrupt) { - const asin = remains.shift()!; - await this.wanderDetailPage(asin); - progress && progress(remains); - } - unsubscribe(); - } - - public async runReviewPageTask( - asins: string[], - progress?: (remains: string[]) => Promise, - ): Promise { - let remains = [...asins]; - while (remains.length > 0) { - const asin = remains.shift()!; - await this.wanderReviewPage(asin); - progress && progress(remains); - } - } - - public async stop(): Promise { - this._controlChannel.emit('interrupt'); - } -} - -class PageWorker { - public useAmazonPageWorker(): AmazonPageWorker { - return AmazonPageWorkerImpl.getInstance(); - } -} - -const pageWorker = new PageWorker(); - -export default pageWorker; +export { amazon, homedepot }; diff --git a/src/logic/page-worker/types.d.ts b/src/logic/page-worker/types.d.ts index a353cbf..0359acc 100644 --- a/src/logic/page-worker/types.d.ts +++ b/src/logic/page-worker/types.d.ts @@ -39,48 +39,64 @@ type AmazonItem = Pick & Partial & Partial & { hasDetail: boolean }; +type HomedepotDetailItem = { + OSMID: string; + link: string; + brandName: string; + title: string; + price: string; + rate: string; + innerText: string; + reviewCount: number; + mainImageUrl: string; +}; + interface AmazonPageWorkerEvents { /** * The event is fired when worker collected links to items on the Amazon search page. */ ['item-links-collected']: { objs: AmazonSearchItem[] }; - /** * The event is fired when worker collected goods' base info on the Amazon detail page. */ ['item-base-info-collected']: Pick; - /** * The event is fired when worker collected goods' rating on the Amazon detail page. */ ['item-rating-collected']: Pick; - /** * The event is fired when worker */ ['item-category-rank-collected']: Pick; - /** * The event is fired when images collected */ ['item-images-collected']: Pick; - /** * The event is fired when top reviews collected in detail page */ ['item-top-reviews-collected']: Pick; - /** * The event is fired when reviews collected in all review page */ ['item-review-collected']: { asin: string; reviews: AmazonReview[] }; - /** * Error event that occurs when there is an issue with the Amazon page worker */ ['error']: { message: string; url?: string }; } +interface HomedepotEvents { + /** + * The event is fired when detail items collect + */ + ['detail-item-collected']: { item: HomedepotDetailItem }; + /** + * The event is fired when error occurs. + */ + ['error']: { message: string; url?: string }; +} + interface AmazonPageWorker { /** * The channel for communication with the Amazon page worker. @@ -120,3 +136,23 @@ interface AmazonPageWorker { */ stop(): Promise; } + +interface HomedepotWorker { + /** + * The channel for communication with the Homedepot page worker. + */ + readonly channel: Emittery; + + /** + * Browsing goods detail page and collect target information + */ + runDetailPageTask( + OSMIDs: string[], + progress?: (remains: string[]) => Promise | void, + ): Promise; + + /** + * Stop the worker. + */ + stop(): Promise; +} diff --git a/src/logic/web-injectors.ts b/src/logic/web-injectors.ts index 452609d..a9d77da 100644 --- a/src/logic/web-injectors.ts +++ b/src/logic/web-injectors.ts @@ -1,6 +1,6 @@ import { exec } from './execute-script'; import type { Tabs } from 'webextension-polyfill'; -import type { AmazonReview, AmazonSearchItem } from './page-worker/types'; +import type { AmazonReview, AmazonSearchItem, HomedepotDetailItem } from './page-worker/types'; class BaseInjector { readonly _tab: Tabs.Tab; @@ -235,40 +235,24 @@ export class AmazonDetailPageInjector extends BaseInjector { public async getImageUrls() { return this.run(async () => { - let urls = Array.from(document.querySelectorAll('.imageThumbnail img')).map( - (e) => e.src, - ); - //#region process more images https://github.com/primedigitaltech/azon_seeker/issues/4 const overlay = document.querySelector('.overlayRestOfImages'); if (overlay) { if (document.querySelector('#ivThumbs')!.getClientRects().length === 0) { overlay.click(); await new Promise((resolve) => setTimeout(resolve, 1000)); } - urls = Array.from( - document.querySelectorAll('#ivThumbs .ivThumbImage[style]'), - ).map((e) => e.style.background); - urls = urls.map((s) => { - const [url] = /(?<=url\(").+(?=")/.exec(s)!; - return url; - }); - await new Promise((resolve) => setTimeout(resolve, 1000)); - document - .querySelector(".a-popover button[data-action='a-popover-close']") - ?.click(); } - //#endregion - //#region post-process image urls - urls = urls.map((rawUrl) => { - const imgUrl = new URL(rawUrl); - const paths = imgUrl.pathname.split('/'); - const chunks = paths[paths.length - 1].split('.'); - const [name, ext] = [chunks[0], chunks[chunks.length - 1]]; - paths[paths.length - 1] = `${name}.${ext}`; - imgUrl.pathname = paths.join('/'); - return imgUrl.toString(); - }); - //#endregion + const script = document.evaluate( + `//script[starts-with(text(), "\nP.when(\'A\').register")]`, + document, + null, + XPathResult.STRING_TYPE, + ).stringValue; + const urls = [ + ...script.matchAll( + /(?<="hiRes":")https:\/\/m.media-amazon.com\/images\/I\/[\w\d\.\-+]+(?=")/g, + ), + ].map((e) => e[0]); return urls; }); } @@ -372,8 +356,6 @@ export class AmazonReviewPageInjector extends BaseInjector { ); const items: AmazonReview[] = []; for (let i = 0; i < xResult.snapshotLength; i++) { - console.log('handling', i); - const commentNode = xResult.snapshotItem(i) as HTMLDivElement; if (!commentNode) { continue; @@ -459,3 +441,64 @@ export class AmazonReviewPageInjector extends BaseInjector { ); } } + +export class HomedepotDetailPageInjector extends BaseInjector { + public waitForPageLoad() { + return this.run(async () => { + while (true) { + document + .querySelector( + `#product-section-overview div[role='button'][aria-expanded='false']`, + ) + ?.click(); + const reviewPlaceholderEl = document.querySelector( + `[data-component^="ratings-and-reviews"] [class^="placeholder"]`, + ); + reviewPlaceholderEl?.scrollIntoView({ behavior: 'smooth' }); + if (document.readyState === 'complete' && !reviewPlaceholderEl) { + await new Promise((resolve) => setTimeout(resolve, 1000)); + document + .querySelector(`#product-section-rr`) + ?.scrollIntoView({ behavior: 'smooth', block: 'center' }); + break; + } + } + }); + } + + public getInfo() { + return this.run(async () => { + const link = document.location.toString(); + const brandName = document.querySelector( + `[data-component^="product-details:ProductDetailsBrandCollection"]`, + )!.innerText; + const title = document.querySelector( + `[data-component^="product-details:ProductDetailsTitle"]`, + )!.innerText; + const price = document.querySelector(`#standard-price`)!.innerText; + const rate = /\d\.\d/.exec( + document.querySelector(`[data-component^="ratings-and-reviews"] .sui-mr-1`)! + .innerText, + )![0]; + const reviewCount = Number( + /[\d]+/.exec( + document.querySelector( + `[data-component^="ratings-and-reviews"] button > span:last-child`, + )!.innerText, + )![0], + ); + const mainImageUrl = document.querySelector( + `.mediagallery__mainimage img`, + )!.src; + return { + link, + brandName, + title, + price, + rate, + reviewCount, + mainImageUrl, + } as Omit; + }); + } +} diff --git a/src/sidepanel/DetailPageEntry.vue b/src/sidepanel/DetailPageEntry.vue index f0c8ca3..dc11852 100644 --- a/src/sidepanel/DetailPageEntry.vue +++ b/src/sidepanel/DetailPageEntry.vue @@ -1,6 +1,6 @@