diff --git a/src/components/DetailDescription.vue b/src/components/DetailDescription.vue index 0efec53..d316721 100644 --- a/src/components/DetailDescription.vue +++ b/src/components/DetailDescription.vue @@ -33,12 +33,14 @@ const props = defineProps<{ model: AmazonDetailItem }>(); {{ link }} - +
-
{{ review.username }}:
+

{{ review.username }}: {{ review.title }}

+
{{ review.rating }}
{{ paragraph }}
+
{{ review.dateInfo }}
diff --git a/src/components/ResultTable.vue b/src/components/ResultTable.vue index 2965570..23b698a 100644 --- a/src/components/ResultTable.vue +++ b/src/components/ResultTable.vue @@ -24,7 +24,7 @@ const filterFormItems = computed(() => { params: { options: [ ...records.reduce((o, c) => { - o.add(c.keywords); + c.keywords && o.add(c.keywords); return o; }, new Set()), ].map((opt) => ({ @@ -67,6 +67,11 @@ const columns: (TableColumn & { hidden?: boolean })[] = [ key: 'rank', minWidth: 60, }, + { + title: 'ASIN', + key: 'asin', + minWidth: 130, + }, { title: '标题', key: 'title', @@ -75,9 +80,9 @@ const columns: (TableColumn & { hidden?: boolean })[] = [ }, }, { - title: 'ASIN', - key: 'asin', - minWidth: 130, + title: '价格', + key: 'price', + minWidth: 100, }, { title: '封面图', @@ -141,6 +146,12 @@ const extraHeaders: Header[] = [ formatOutputValue: (val?: string[]) => val?.join(';'), parseImportValue: (val?: string) => val?.split(';'), }, + { + prop: 'topReviews', + label: '精选评论', + formatOutputValue: (val?: Record[]) => JSON.stringify(val), + parseImportValue: (val?: string) => val && JSON.parse(val), + }, ]; const filterItemData = (data: AmazonItem[]): AmazonItem[] => { @@ -148,7 +159,7 @@ const filterItemData = (data: AmazonItem[]): AmazonItem[] => { if (search.trim() !== '') { data = data.filter((r) => { return [r.title, r.asin, r.keywords].some((field) => - field.toLowerCase().includes(search.toLowerCase()), + field?.toLowerCase().includes(search.toLowerCase()), ); }); } diff --git a/src/logic/page-worker/index.ts b/src/logic/page-worker/index.ts index b5844c5..2ea7e2f 100644 --- a/src/logic/page-worker/index.ts +++ b/src/logic/page-worker/index.ts @@ -2,7 +2,11 @@ import Emittery from 'emittery'; import type { AmazonDetailItem, AmazonPageWorker, AmazonPageWorkerEvents } from './types'; import type { Tabs } from 'webextension-polyfill'; import { withErrorHandling } from '../error-handler'; -import { AmazonDetailPageInjector, AmazonSearchPageInjector } from '../web-injectors'; +import { + AmazonDetailPageInjector, + AmazonReviewPageInjector, + AmazonSearchPageInjector, +} from '../web-injectors'; /** * AmazonPageWorkerImpl can run on background & sidepanel & popup, @@ -75,7 +79,7 @@ class AmazonPageWorkerImpl implements AmazonPageWorker { } const currentUrl = new URL(tab.url!); if (currentUrl.hostname !== url.hostname || currentUrl.searchParams.get('k') !== keywords) { - await browser.tabs.update(tab.id, { url: url.toString() }); + tab = await browser.tabs.update(tab.id, { url: url.toString(), active: true }); await new Promise((resolve) => setTimeout(resolve, 1000)); } return url.toString(); @@ -127,10 +131,18 @@ class AmazonPageWorkerImpl implements AmazonPageWorker { } const injector = new AmazonDetailPageInjector(tab); //#endregion - //#region Await Production Introduction Element Loaded and Determine Page Pattern + //#region Await Production Introduction Element Loaded await injector.waitForPageLoaded(); await new Promise((resolve) => setTimeout(resolve, 3000)); // Wait 3 seconds. //#endregion + //#region Fetch Base Info + const baseInfo = await injector.getBaseInfo(); + this.channel.emit('item-base-info-collected', { + asin: params.asin, + title: baseInfo.title, + price: baseInfo.price, + }); + //#endregion //#region Fetch Rating Info const ratingInfo = await injector.getRatingInfo(); if (ratingInfo && (ratingInfo.rating !== 0 || ratingInfo.ratingCount !== 0)) { @@ -181,11 +193,28 @@ class AmazonPageWorkerImpl implements AmazonPageWorker { reviews.length > 0 && this.channel.emit('item-top-reviews-collected', { asin: params.asin, - topReviews: reviews.map((r) => ({ asin: params.asin, ...r })), + topReviews: reviews, }); //#endregion } + @withErrorHandling + public async wanderReviewPage(asin: string) { + const baseUrl = `https://www.amazon.com/product-reviews/${asin}/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews`; + const tab = await this.createNewTab(baseUrl); + const injector = new AmazonReviewPageInjector(tab); + while (true) { + await injector.waitForPageLoad(); + const reviews = await injector.getSinglePageReviews(); + reviews.length > 0 && this.channel.emit('item-review-collected', { asin, reviews }); + const hasNextPage = await injector.jumpToNextPageIfExist(); + if (!hasNextPage) { + break; + } + } + setTimeout(() => browser.tabs.remove(tab.id!), 1000); + } + public async runSearchPageTask( keywordsList: string[], progress?: (remains: string[]) => Promise, @@ -221,17 +250,29 @@ class AmazonPageWorkerImpl implements AmazonPageWorker { unsubscribe(); } + public async runReviewPageTask( + asins: string[], + progress?: (remains: string[]) => Promise, + ): Promise { + let remains = [...asins]; + while (remains.length > 0) { + const asin = remains.shift()!; + await this.wanderReviewPage(asin); + progress && progress(remains); + } + } + public async stop(): Promise { this._controlChannel.emit('interrupt'); } } -class PageWorkerFactory { +class PageWorker { public useAmazonPageWorker(): AmazonPageWorker { return AmazonPageWorkerImpl.getInstance(); } } -const pageWorkerFactory = new PageWorkerFactory(); +const pageWorker = new PageWorker(); -export default pageWorkerFactory; +export default pageWorker; diff --git a/src/logic/page-worker/types.d.ts b/src/logic/page-worker/types.d.ts index 6e7805d..971ea9a 100644 --- a/src/logic/page-worker/types.d.ts +++ b/src/logic/page-worker/types.d.ts @@ -8,12 +8,15 @@ type AmazonSearchItem = { title: string; asin: string; rank: number; + price?: string; imageSrc: string; createTime: string; }; type AmazonDetailItem = { asin: string; + title: string; + price?: string; rating?: number; ratingCount?: number; category1?: { name: string; rank: number }; @@ -23,7 +26,7 @@ type AmazonDetailItem = { }; type AmazonReview = { - asin: string; + id: string; username: string; title: string; rating: string; @@ -31,7 +34,9 @@ type AmazonReview = { content: string; }; -type AmazonItem = AmazonSearchItem & Partial & { hasDetail: boolean }; +type AmazonItem = Pick & + Partial & + Partial & { hasDetail: boolean }; interface AmazonPageWorkerEvents { /** @@ -39,6 +44,11 @@ interface AmazonPageWorkerEvents { */ ['item-links-collected']: { objs: AmazonSearchItem[] }; + /** + * The event is fired when worker collected goods' base info on the Amazon detail page. + */ + ['item-base-info-collected']: Pick; + /** * The event is fired when worker collected goods' rating on the Amazon detail page. */ @@ -55,12 +65,17 @@ interface AmazonPageWorkerEvents { ['item-images-collected']: Pick; /** - * The event is fired when top reviews collected + * The event is fired when top reviews collected in detail page */ ['item-top-reviews-collected']: Pick; /** - * Error event that occurs when there is an issue with the Amazon page worker. + * The event is fired when reviews collected in all review page + */ + ['item-review-collected']: { asin: string; reviews: AmazonReview[] }; + + /** + * Error event that occurs when there is an issue with the Amazon page worker */ ['error']: { message: string; url?: string }; } @@ -89,6 +104,16 @@ interface AmazonPageWorker { */ runDetaiPageTask(asins: string[], progress?: (remains: string[]) => Promise): Promise; + /** + * Browsing goods review page and collect target information. + * @param asins Amazon Standard Identification Numbers. + * @param progress The callback that receive remaining asins as the parameter. + */ + runReviewPageTask( + asins: string[], + progress?: (remains: string[]) => Promise, + ): Promise; + /** * Stop the worker. */ diff --git a/src/logic/storage.ts b/src/logic/storage.ts index f06ca1d..e755f1c 100644 --- a/src/logic/storage.ts +++ b/src/logic/storage.ts @@ -7,27 +7,32 @@ export const asinInputText = useWebExtensionStorage('asinInputText', '') export const searchItems = useWebExtensionStorage('searchItems', []); -export const detailItems = useWebExtensionStorage('detailItems', []); +export const detailItems = useWebExtensionStorage>( + 'detailItems', + new Map(), + { + listenToStorageChanges: false, + }, +); export const allItems = computed({ get() { const sItems = searchItems.value; - const dItems = detailItems.value.reduce>( - (m, c) => (m.set(c.asin, c), m), - new Map(), - ); + const dItems = detailItems.value; return sItems.map((si) => { const asin = si.asin; const dItem = dItems.get(asin); - return dItem ? { ...si, ...dItem, hasDetail: true } : { ...si, hasDetail: false }; + return dItems.has(asin) ? { ...si, ...dItem, hasDetail: true } : { ...si, hasDetail: false }; }); }, set(newValue) { const searchItemProps: (keyof AmazonSearchItem)[] = [ 'keywords', 'asin', + 'page', 'title', 'imageSrc', + 'price', 'link', 'rank', 'createTime', @@ -45,14 +50,17 @@ export const allItems = computed({ 'imageUrls', 'rating', 'ratingCount', + 'topReviews', ]; detailItems.value = newValue .filter((row) => row.hasDetail) - .map((row) => { - const entries: [string, unknown][] = Object.entries(row).filter(([key]) => + .reduce>((m, row) => { + const entries = Object.entries(row).filter(([key]) => detailItemsProps.includes(key as keyof AmazonDetailItem), ); - return Object.fromEntries(entries) as AmazonSearchItem; - }); + const obj = Object.fromEntries(entries) as AmazonDetailItem; + m.set(obj.asin, obj); + return m; + }, new Map()); }, }); diff --git a/src/logic/web-injectors.ts b/src/logic/web-injectors.ts index af08aa8..6ff47c5 100644 --- a/src/logic/web-injectors.ts +++ b/src/logic/web-injectors.ts @@ -1,16 +1,22 @@ import { exec } from './execute-script'; import type { Tabs } from 'webextension-polyfill'; -import type { AmazonReview } from './page-worker/types'; +import type { AmazonReview, AmazonSearchItem } from './page-worker/types'; -export class AmazonSearchPageInjector { +class BaseInjector { readonly _tab: Tabs.Tab; constructor(tab: Tabs.Tab) { this._tab = tab; } - public async waitForPageLoaded() { - return exec(this._tab.id!, async () => { + run(func: (payload?: any) => Promise, payload?: any): Promise { + return exec(this._tab.id!, func, payload); + } +} + +export class AmazonSearchPageInjector extends BaseInjector { + public waitForPageLoaded() { + return this.run(async () => { await new Promise((resolve) => setTimeout(resolve, 500 + ~~(500 * Math.random()))); while (true) { const targetNode = document.querySelector('.s-pagination-next'); @@ -34,61 +40,81 @@ export class AmazonSearchPageInjector { } public async getPagePattern() { - return exec(this._tab.id!, async () => { - return [ - ...(document.querySelectorAll( + return this.run(async () => { + return Array.from( + document.querySelectorAll( '.puisg-row:has(.a-section.a-spacing-small.a-spacing-top-small:not(.a-text-right))', - ) as unknown as HTMLDivElement[]), - ].filter((e) => e.getClientRects().length > 0).length > 0 + ), + ).filter((e) => e.getClientRects().length > 0).length > 0 ? 'pattern-1' : 'pattern-2'; }); } public async getPageData(pattern: 'pattern-1' | 'pattern-2') { - let data: { link: string; title: string; imageSrc: string }[] | null = null; + let data: Pick[] | null = null; switch (pattern) { // 处理商品以列表形式展示的情况 case 'pattern-1': - data = await exec(this._tab.id!, async () => { + data = await this.run(async () => { const items = Array.from( document.querySelectorAll( '.puisg-row:has(.a-section.a-spacing-small.a-spacing-top-small:not(.a-text-right))', ), ).filter((e) => e.getClientRects().length > 0); - const linkObjs = items.reduce<{ link: string; title: string; imageSrc: string }[]>( - (objs, el) => { - const link = el.querySelector('a')?.href; - const title = el - .querySelector('h2.a-color-base')! - .getAttribute('aria-label')!; - const imageSrc = el.querySelector('img.s-image')!.src!; - link && objs.push({ link, title, imageSrc }); - return objs; - }, - [], - ); + const linkObjs = items.reduce< + Pick[] + >((objs, el) => { + const link = el.querySelector('a')?.href; + const title = el + .querySelector('h2.a-color-base')! + .getAttribute('aria-label')!; + const imageSrc = el.querySelector('img.s-image')!.src!; + const price = + el.querySelector('.a-price:not(.a-text-price) .a-offscreen') + ?.innerText || + ( + document.evaluate( + `.//div[@data-cy="secondary-offer-recipe"]//span[@class='a-color-base' and contains(., '$') and not(*)]`, + el, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + ).singleNodeValue as HTMLSpanElement | null + )?.innerText; + link && objs.push({ link, title, imageSrc, price }); + return objs; + }, []); return linkObjs; }); break; // 处理商品以二维图片格展示的情况 case 'pattern-2': - data = await exec(this._tab.id!, async () => { + data = await this.run(async () => { const items = Array.from( document.querySelectorAll( '.puis-card-container:has(.a-section.a-spacing-small.puis-padding-left-small)', ) as unknown as HTMLDivElement[], ).filter((e) => e.getClientRects().length > 0); - const linkObjs = items.reduce<{ link: string; title: string; imageSrc: string }[]>( - (objs, el) => { - const link = el.querySelector('a.a-link-normal')?.href; - const title = el.querySelector('h2.a-color-base')!.innerText; - const imageSrc = el.querySelector('img.s-image')!.src!; - link && objs.push({ link, title, imageSrc }); - return objs; - }, - [], - ); + const linkObjs = items.reduce< + Pick[] + >((objs, el) => { + const link = el.querySelector('a.a-link-normal')?.href; + const title = el.querySelector('h2.a-color-base')!.innerText; + const imageSrc = el.querySelector('img.s-image')!.src!; + const price = + el.querySelector('.a-price:not(.a-text-price) .a-offscreen') + ?.innerText || + ( + document.evaluate( + `.//div[@data-cy="secondary-offer-recipe"]//span[@class='a-color-base' and contains(., '$') and not(*)]`, + el, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + ).singleNodeValue as HTMLSpanElement | null + )?.innerText; + link && objs.push({ link, title, imageSrc, price }); + return objs; + }, []); return linkObjs; }); break; @@ -100,7 +126,7 @@ export class AmazonSearchPageInjector { } public async getCurrentPage() { - return exec(this._tab.id!, async () => { + return this.run(async () => { const node = document.querySelector( '.s-pagination-item.s-pagination-selected', ); @@ -109,7 +135,7 @@ export class AmazonSearchPageInjector { } public async determineHasNextPage() { - return exec(this._tab.id!, async () => { + return this.run(async () => { const nextButton = document.querySelector('.s-pagination-next'); if (nextButton) { if (!nextButton.classList.contains('s-pagination-disabled')) { @@ -126,15 +152,9 @@ export class AmazonSearchPageInjector { } } -export class AmazonDetailPageInjector { - readonly _tab: Tabs.Tab; - - constructor(tab: Tabs.Tab) { - this._tab = tab; - } - +export class AmazonDetailPageInjector extends BaseInjector { public async waitForPageLoaded() { - return exec(this._tab.id!, async () => { + return this.run(async () => { while (true) { window.scrollBy(0, ~~(Math.random() * 500) + 500); await new Promise((resolve) => setTimeout(resolve, ~~(Math.random() * 100) + 200)); @@ -156,8 +176,18 @@ export class AmazonDetailPageInjector { }); } + public async getBaseInfo() { + return this.run(async () => { + const title = document.querySelector('#title')!.innerText; + const price = document.querySelector( + '.a-price:not(.a-text-price) .a-offscreen', + )?.innerText; + return { title, price }; + }); + } + public async getRatingInfo() { - return await exec(this._tab.id!, async () => { + return this.run(async () => { const review = document.querySelector('#averageCustomerReviews'); const rating = Number( review?.querySelector('#acrPopover')?.getAttribute('title')?.split(' ')[0], @@ -177,9 +207,9 @@ export class AmazonDetailPageInjector { } public async getRankText() { - return exec(this._tab.id!, async () => { + return this.run(async () => { const xpathExps = [ - `//div[@id='detailBulletsWrapper_feature_div']//ul[.//li[contains(., 'Best Sellers Rank')]]//span[@class='a-list-item']`, + `//div[@id='detailBulletsWrapper_feature_div']//ul[.//li[contains(., 'Best Sellers Rank')]]//span[@class='a-list-item' and contains(., 'Best Sellers Rank')]`, `//div[@id='prodDetails']//table/tbody/tr[th[1][contains(text(), 'Best Sellers Rank')]]/td`, `//div[@id='productDetails_db_sections']//table/tbody/tr[th[1][contains(text(), 'Best Sellers Rank')]]/td`, ]; @@ -201,7 +231,7 @@ export class AmazonDetailPageInjector { } public async getImageUrls() { - return exec(this._tab.id!, async () => { + return this.run(async () => { let urls = Array.from(document.querySelectorAll('.imageThumbnail img')).map( (e) => e.src, ); @@ -212,11 +242,9 @@ export class AmazonDetailPageInjector { overlay.click(); await new Promise((resolve) => setTimeout(resolve, 1000)); } - urls = [ - ...(document.querySelectorAll( - '#ivThumbs .ivThumbImage[style]', - ) as unknown as HTMLDivElement[]), - ].map((e) => e.style.background); + urls = Array.from( + document.querySelectorAll('#ivThumbs .ivThumbImage[style]'), + ).map((e) => e.style.background); urls = urls.map((s) => { const [url] = /(?<=url\(").+(?=")/.exec(s)!; return url; @@ -243,7 +271,7 @@ export class AmazonDetailPageInjector { } public async getTopReviews() { - return exec[]>(this._tab.id!, async () => { + return this.run(async () => { const targetNode = document.querySelector('.cr-widget-FocalReviews'); if (!targetNode) { return []; @@ -258,18 +286,19 @@ export class AmazonDetailPageInjector { null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, ); - const items: Omit[] = []; + const items: AmazonReview[] = []; for (let i = 0; i < xResult.snapshotLength; i++) { const commentNode = xResult.snapshotItem(i) as HTMLDivElement | null; if (!commentNode) { continue; } + const id = commentNode.id.split('-')[0]; const username = commentNode.querySelector('.a-profile-name')!.innerText; const title = commentNode.querySelector( '[data-hook="review-title"] > span:not(.a-letter-space)', )!.innerText; const rating = commentNode.querySelector( - '[data-hook="review-star-rating"]', + '[data-hook*="review-star-rating"]', )!.innerText; const dateInfo = commentNode.querySelector( '[data-hook="review-date"]', @@ -277,9 +306,101 @@ export class AmazonDetailPageInjector { const content = commentNode.querySelector( '[data-hook="review-body"]', )!.innerText; - items.push({ username, title, rating, dateInfo, content }); + items.push({ id, username, title, rating, dateInfo, content }); } return items; }); } } + +export class AmazonReviewPageInjector extends BaseInjector { + public async waitForPageLoad() { + return this.run(async () => { + await new Promise((resolve) => setTimeout(resolve, 1000)); + while (true) { + const targetNode = document.querySelector( + '#cm_cr-review_list .reviews-content,ul[role="list"]:not(.histogram)', + ); + targetNode?.scrollIntoView({ behavior: 'smooth', block: 'center' }); + if ( + targetNode && + targetNode.getClientRects().length > 0 && + document.readyState !== 'loading' + ) { + break; + } + await new Promise((resolve) => setTimeout(resolve, 1000)); + } + while (true) { + const loadingNode = document.querySelector('.reviews-loading'); + if (loadingNode && loadingNode.getClientRects().length === 0) { + break; + } + await new Promise((resolve) => setTimeout(resolve, 1000)); + } + }); + } + + public async getSinglePageReviews() { + return this.run(async () => { + const targetNode = document.querySelector('#cm_cr-review_list'); + if (!targetNode) { + return []; + } + // targetNode.scrollIntoView({ behavior: "smooth", block: "end" }) + await new Promise((resolve) => setTimeout(resolve, 1000)); + const xResult = document.evaluate( + `.//div[contains(@id, 'review-card')]`, + targetNode, + null, + XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, + ); + const items: AmazonReview[] = []; + for (let i = 0; i < xResult.snapshotLength; i++) { + console.log('handling', i); + + const commentNode = xResult.snapshotItem(i) as HTMLDivElement; + if (!commentNode) { + continue; + } + const id = commentNode.id.split('-')[0]; + const username = commentNode.querySelector('.a-profile-name')!.innerText; + const title = commentNode.querySelector( + '[data-hook="review-title"] > span:not(.a-letter-space)', + )!.innerText; + const rating = commentNode.querySelector( + '[data-hook*="review-star-rating"]', + )!.innerText; + const dateInfo = commentNode.querySelector( + '[data-hook="review-date"]', + )!.innerText; + const content = commentNode.querySelector( + '[data-hook="review-body"]', + )!.innerText; + items.push({ id, username, title, rating, dateInfo, content }); + } + return items; + }); + } + + public jumpToNextPageIfExist() { + return this.run(async () => { + const latestReview = document.evaluate( + `//*[@id='cm_cr-review_list']//li[@data-hook='review'][last()]`, + document.body, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + ).singleNodeValue as HTMLElement | null; + latestReview?.scrollIntoView({ behavior: 'smooth', block: 'center' }); + await new Promise((resolve) => setTimeout(resolve, 1000)); + const nextPageNode = document.querySelector( + '[data-hook="pagination-bar"] .a-pagination > *:nth-of-type(2)', + ); + nextPageNode?.scrollIntoView({ behavior: 'smooth', block: 'center' }); + await new Promise((resolve) => setTimeout(resolve, 1000)); + const ret = nextPageNode && !nextPageNode.classList.contains('a-disabled'); + ret && nextPageNode?.querySelector('a')?.click(); + return ret; + }); + } +} diff --git a/src/options/Options.vue b/src/options/Options.vue index f9eb1f3..f56ce6a 100644 --- a/src/options/Options.vue +++ b/src/options/Options.vue @@ -14,7 +14,7 @@ main { align-items: center; .result-table { - width: 90%; + width: 95%; } } diff --git a/src/sidepanel/DetailPageEntry.vue b/src/sidepanel/DetailPageEntry.vue index 2dccaba..7798983 100644 --- a/src/sidepanel/DetailPageEntry.vue +++ b/src/sidepanel/DetailPageEntry.vue @@ -1,6 +1,6 @@