This commit is contained in:
johnathan 2025-06-12 10:06:25 +08:00
parent 74315474f8
commit a240242331
10 changed files with 504 additions and 326 deletions

View File

@ -0,0 +1,21 @@
export function useCurrentUrl() {
const currentUrl = ref('');
const updateUrl = async () => {
const tab = await browser.tabs.query({ active: true, currentWindow: true }).then((ts) => ts[0]);
currentUrl.value = tab.url || '';
};
onMounted(() => {
updateUrl();
browser.tabs.onUpdated.addListener(updateUrl);
browser.tabs.onHighlighted.addListener(updateUrl);
});
onUnmounted(() => {
browser.tabs.onUpdated.removeListener(updateUrl);
browser.tabs.onHighlighted.removeListener(updateUrl);
});
return { currentUrl };
}

View File

@ -1,4 +1,4 @@
export const usePageContext = () => {
export function usePageContext() {
const pageContext = document.location.pathname.split('/')[2] as 'sidepanel' | 'options';
return { pageContext };
};
}

View File

@ -0,0 +1,284 @@
import Emittery from 'emittery';
import type { AmazonDetailItem, AmazonPageWorker, AmazonPageWorkerEvents } from './types';
import type { Tabs } from 'webextension-polyfill';
import { withErrorHandling } from '../error-handler';
import {
AmazonDetailPageInjector,
AmazonReviewPageInjector,
AmazonSearchPageInjector,
} from '../web-injectors';
import { isForbiddenUrl } from '~/env';
/**
* AmazonPageWorkerImpl can run on background & sidepanel & popup,
* **can't** run on content script!
*/
class AmazonPageWorkerImpl implements AmazonPageWorker {
//#region Singleton
private static _instance: AmazonPageWorker | null = null;
public static getInstance() {
if (this._instance === null) {
this._instance = new AmazonPageWorkerImpl();
}
return this._instance;
}
//#endregion
private constructor() {}
private readonly _controlChannel = new Emittery<{ interrupt: undefined }>();
public readonly channel = new Emittery<AmazonPageWorkerEvents>();
private async getCurrentTab(): Promise<Tabs.Tab> {
const tab = await browser.tabs
.query({ active: true, currentWindow: true })
.then((tabs) => tabs[0]);
return tab;
}
private async createNewTab(url: string): Promise<Tabs.Tab> {
const tab = await browser.tabs.create({ url, active: true });
return tab;
}
private async wanderSearchSinglePage(tab: Tabs.Tab) {
const injector = new AmazonSearchPageInjector(tab);
// #region Wait for the Next button to appear, indicating that the product items have finished loading
await injector.waitForPageLoaded();
// #endregion
// #region Determine the type of product search page https://github.com/primedigitaltech/azon_seeker/issues/1
const pagePattern = await injector.getPagePattern();
// #endregion
// #region Retrieve key nodes and their information from the critical product search page
const data = await injector.getPageData(pagePattern);
// #endregion
// #region get current page
const page = await injector.getCurrentPage();
// #endregion
// #region Determine if it is the last page, otherwise navigate to the next page
const hasNextPage = await injector.determineHasNextPage();
// #endregion
await new Promise((resolve) => setTimeout(resolve, 1000));
if (data === null || typeof hasNextPage !== 'boolean') {
this.channel.emit('error', { message: '爬取单页信息失败', url: tab.url });
throw new Error('爬取单页信息失败');
}
return { data, hasNextPage, page };
}
@withErrorHandling
public async doSearch(keywords: string): Promise<string> {
const url = new URL('https://www.amazon.com/s');
url.searchParams.append('k', keywords);
let tab = await this.getCurrentTab();
if (!tab.url?.startsWith('http')) {
tab = await this.createNewTab('https://www.amazon.com/');
tab.url = 'https://www.amazon.com/';
}
const currentUrl = new URL(tab.url!);
if (currentUrl.hostname !== url.hostname || currentUrl.searchParams.get('k') !== keywords) {
tab = await browser.tabs.update(tab.id, { url: url.toString(), active: true });
await new Promise<void>((resolve) => setTimeout(resolve, 1000));
}
return url.toString();
}
@withErrorHandling
public async wanderSearchPage(): Promise<void> {
const tab = await this.getCurrentTab();
let offset = 0;
while (true) {
const { hasNextPage, data, page } = await this.wanderSearchSinglePage(tab);
const keywords = new URL(tab.url!).searchParams.get('k')!;
const objs = data.map((r, i) => ({
...r,
keywords,
page,
rank: offset + 1 + i,
createTime: new Date().toLocaleString(),
asin: /(?<=\/dp\/)[A-Z0-9]{10}/.exec(r.link as string)![0],
}));
this.channel.emit('item-links-collected', { objs });
offset += data.length;
if (!hasNextPage) {
break;
}
}
return new Promise((resolve) => setTimeout(resolve, 1000));
}
@withErrorHandling
public async wanderDetailPage(entry: string) {
//#region Initial Meta Info
const params = { asin: '', url: '' };
if (entry.match(/^https?:\/\/www\.amazon\.com.*\/dp\/[A-Z0-9]{10}/)) {
const [asin] = /\/\/dp\/[A-Z0-9]{10}/.exec(entry)!;
params.asin = asin;
params.url = entry;
} else if (entry.match(/^[A-Z0-9]{10}$/)) {
params.asin = entry;
params.url = `https://www.amazon.com/dp/${entry}`;
}
let tab = await this.getCurrentTab();
if (!tab.url || isForbiddenUrl(tab.url)) {
tab = await this.createNewTab(params.url);
} else {
tab = await browser.tabs.update(tab.id, {
url: params.url,
});
}
const injector = new AmazonDetailPageInjector(tab);
//#endregion
//#region Await Production Introduction Element Loaded
await injector.waitForPageLoaded();
await new Promise((resolve) => setTimeout(resolve, 3000)); // Wait 3 seconds.
//#endregion
//#region Fetch Base Info
const baseInfo = await injector.getBaseInfo();
this.channel.emit('item-base-info-collected', {
asin: params.asin,
title: baseInfo.title,
price: baseInfo.price,
});
//#endregion
//#region Fetch Rating Info
const ratingInfo = await injector.getRatingInfo();
if (ratingInfo && (ratingInfo.rating !== 0 || ratingInfo.ratingCount !== 0)) {
this.channel.emit('item-rating-collected', {
asin: params.asin,
...ratingInfo,
});
}
//#endregion
//#region Fetch Category Rank Info
let rawRankingText: string | null = await injector.getRankText();
if (rawRankingText) {
const info: Pick<AmazonDetailItem, 'category1' | 'category2'> = {};
let statement = /#[0-9,]+\sin\s\S[\s\w',\.&\(\)\-]+/.exec(rawRankingText)?.[0];
if (statement) {
const name = /(?<=in\s).+/.exec(statement)?.[0].replace(/\s\(See\sTop.+\)/, '');
const rank = Number(/(?<=#)[0-9,]+/.exec(statement)?.[0].replaceAll(',', ''));
if (name && !Number.isNaN(rank)) {
info['category1'] = { name, rank };
}
rawRankingText = rawRankingText.replace(statement, '');
}
statement = /#[0-9,]+\sin\s\S[\s\w',\.&\(\)\-]+/.exec(rawRankingText)?.[0];
if (statement) {
const name = /(?<=in\s).+/.exec(statement)?.[0].replace(/[\s]+$/, '');
const rank = Number(/(?<=#)[0-9,]+/.exec(statement)?.[0].replaceAll(',', ''));
if (name && !Number.isNaN(rank)) {
info['category2'] = { name, rank };
}
}
this.channel.emit('item-category-rank-collected', {
asin: params.asin,
...info,
});
}
//#endregion
//#region Fetch Goods' Images
const imageUrls = await injector.getImageUrls();
imageUrls.length > 0 &&
this.channel.emit('item-images-collected', {
asin: params.asin,
imageUrls: Array.from(new Set(imageUrls)),
});
await new Promise((resolve) => setTimeout(resolve, 2000)); // Wait 2 seconds.
//#endregion
//#region Fetch Top Reviews
// const reviews = await injector.getTopReviews();
// reviews.length > 0 &&
// this.channel.emit('item-top-reviews-collected', {
// asin: params.asin,
// topReviews: reviews,
// });
//#endregion
}
@withErrorHandling
public async wanderReviewPage(asin: string) {
const url = new URL(
`https://www.amazon.com/product-reviews/${asin}/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews`,
);
const tab = await this.createNewTab(url.toString());
const injector = new AmazonReviewPageInjector(tab);
await injector.waitForPageLoad();
for (let star = 1; star <= 5; star++) {
await injector.showStarsDropDownMenu();
await injector.selectStar(star);
while (true) {
await injector.waitForPageLoad();
const reviews = await injector.getSinglePageReviews();
reviews.length > 0 && this.channel.emit('item-review-collected', { asin, reviews });
const hasNextPage = await injector.jumpToNextPageIfExist();
if (!hasNextPage) {
break;
}
}
}
setTimeout(() => browser.tabs.remove(tab.id!), 1000);
}
public async runSearchPageTask(
keywordsList: string[],
progress?: (remains: string[]) => Promise<void>,
): Promise<void> {
let remains = [...keywordsList];
let interrupt = false;
const unsubscribe = this._controlChannel.on('interrupt', () => {
interrupt = true;
});
while (remains.length > 0 && !interrupt) {
const kw = remains.shift()!;
await this.doSearch(kw);
await this.wanderSearchPage();
progress && progress(remains);
}
unsubscribe();
}
public async runDetaiPageTask(
asins: string[],
progress?: (remains: string[]) => Promise<void>,
): Promise<void> {
const remains = [...asins];
let interrupt = false;
const unsubscribe = this._controlChannel.on('interrupt', () => {
interrupt = true;
});
while (remains.length > 0 && !interrupt) {
const asin = remains.shift()!;
await this.wanderDetailPage(asin);
progress && progress(remains);
}
unsubscribe();
}
public async runReviewPageTask(
asins: string[],
progress?: (remains: string[]) => Promise<void>,
): Promise<void> {
const remains = [...asins];
let interrupt = false;
const unsubscribe = this._controlChannel.on('interrupt', () => {
interrupt = true;
});
while (remains.length > 0 && !interrupt) {
const asin = remains.shift()!;
await this.wanderReviewPage(asin);
progress && progress(remains);
}
unsubscribe();
}
public stop(): Promise<void> {
return this._controlChannel.emit('interrupt');
}
}
export default {
useAmazonPageWorker(): AmazonPageWorker {
return AmazonPageWorkerImpl.getInstance();
},
};

View File

@ -0,0 +1,75 @@
import Emittery from 'emittery';
import { HomedepotEvents, HomedepotWorker } from './types';
import { Tabs } from 'webextension-polyfill';
import { isForbiddenUrl } from '~/env';
import { withErrorHandling } from '../error-handler';
import { HomedepotDetailPageInjector } from '../web-injectors';
class HomedepotWorkerImpl implements HomedepotWorker {
private static _instance: HomedepotWorker | null = null;
public static getInstance() {
if (!HomedepotWorkerImpl._instance) {
HomedepotWorkerImpl._instance = new HomedepotWorkerImpl();
}
return HomedepotWorkerImpl._instance as HomedepotWorker;
}
private constructor() {}
readonly channel: Emittery<HomedepotEvents> = new Emittery();
private readonly _controlChannel = new Emittery<{ interrupt: undefined }>();
private async getCurrentTab(): Promise<Tabs.Tab> {
const tab = await browser.tabs
.query({ active: true, currentWindow: true })
.then((tabs) => tabs[0]);
return tab;
}
private async createNewTab(url?: string): Promise<Tabs.Tab> {
const tab = await browser.tabs.create({ url, active: true });
return tab;
}
@withErrorHandling
private async wanderingDetailPage(OSMID: string) {
const url = `https://www.homedepot.com/p/${OSMID}`;
let tab = await this.getCurrentTab();
if (!tab.url || isForbiddenUrl(tab.url)) {
tab = await this.createNewTab(url);
} else {
await browser.tabs.update(tab.id!, { url });
}
const injector = new HomedepotDetailPageInjector(tab);
await injector.waitForPageLoad();
const info = await injector.getInfo();
this.channel.emit('detail-item-collected', { item: { OSMID, ...info } });
}
async runDetailPageTask(
OSMIDs: string[],
progress?: (remains: string[]) => Promise<void> | void,
): Promise<void> {
const remains = [...OSMIDs];
let interrupt = false;
const unsubscribe = this._controlChannel.on('interrupt', () => {
interrupt = true;
});
while (remains.length > 0 && !interrupt) {
const OSMIDs = remains.shift()!;
await this.wanderingDetailPage(OSMIDs);
progress && progress(remains);
}
unsubscribe();
}
stop(): Promise<void> {
return this._controlChannel.emit('interrupt');
}
}
export default {
useHomedepotWorker() {
return HomedepotWorkerImpl.getInstance();
},
};

View File

@ -1,285 +1,4 @@
import Emittery from 'emittery';
import type { AmazonDetailItem, AmazonPageWorker, AmazonPageWorkerEvents } from './types';
import type { Tabs } from 'webextension-polyfill';
import { withErrorHandling } from '../error-handler';
import {
AmazonDetailPageInjector,
AmazonReviewPageInjector,
AmazonSearchPageInjector,
} from '../web-injectors';
import amazon from './amazon';
import homedepot from './homedepot';
/**
* AmazonPageWorkerImpl can run on background & sidepanel & popup,
* **can't** run on content script!
*/
class AmazonPageWorkerImpl implements AmazonPageWorker {
//#region Singleton
private static _instance: AmazonPageWorker | null = null;
public static getInstance() {
if (this._instance === null) {
this._instance = new AmazonPageWorkerImpl();
}
return this._instance;
}
//#endregion
private constructor() {}
private _controlChannel = new Emittery<{ interrupt: undefined }>();
public readonly channel = new Emittery<AmazonPageWorkerEvents>();
private async getCurrentTab(): Promise<Tabs.Tab> {
const tab = await browser.tabs
.query({ active: true, currentWindow: true })
.then((tabs) => tabs[0]);
return tab;
}
private async createNewTab(url: string): Promise<Tabs.Tab> {
const tab = await browser.tabs.create({
url,
active: true,
});
return tab;
}
private async wanderSearchSinglePage(tab: Tabs.Tab) {
const injector = new AmazonSearchPageInjector(tab);
// #region Wait for the Next button to appear, indicating that the product items have finished loading
await injector.waitForPageLoaded();
// #endregion
// #region Determine the type of product search page https://github.com/primedigitaltech/azon_seeker/issues/1
const pagePattern = await injector.getPagePattern();
// #endregion
// #region Retrieve key nodes and their information from the critical product search page
const data = await injector.getPageData(pagePattern);
// #endregion
// #region get current page
const page = await injector.getCurrentPage();
// #endregion
// #region Determine if it is the last page, otherwise navigate to the next page
const hasNextPage = await injector.determineHasNextPage();
// #endregion
await new Promise((resolve) => setTimeout(resolve, 1000));
if (data === null || typeof hasNextPage !== 'boolean') {
this.channel.emit('error', { message: '爬取单页信息失败', url: tab.url });
throw new Error('爬取单页信息失败');
}
return { data, hasNextPage, page };
}
@withErrorHandling
public async doSearch(keywords: string): Promise<string> {
const url = new URL('https://www.amazon.com/s');
url.searchParams.append('k', keywords);
let tab = await this.getCurrentTab();
if (!tab.url?.startsWith('http')) {
tab = await this.createNewTab('https://www.amazon.com/');
tab.url = 'https://www.amazon.com/';
}
const currentUrl = new URL(tab.url!);
if (currentUrl.hostname !== url.hostname || currentUrl.searchParams.get('k') !== keywords) {
tab = await browser.tabs.update(tab.id, { url: url.toString(), active: true });
await new Promise<void>((resolve) => setTimeout(resolve, 1000));
}
return url.toString();
}
@withErrorHandling
public async wanderSearchPage(): Promise<void> {
let tab = await this.getCurrentTab();
let offset = 0;
while (true) {
const { hasNextPage, data, page } = await this.wanderSearchSinglePage(tab);
const keywords = new URL(tab.url!).searchParams.get('k')!;
const objs = data.map((r, i) => ({
...r,
keywords,
page,
rank: offset + 1 + i,
createTime: new Date().toLocaleString(),
asin: /(?<=\/dp\/)[A-Z0-9]{10}/.exec(r.link as string)![0],
}));
this.channel.emit('item-links-collected', { objs });
offset += data.length;
if (!hasNextPage) {
break;
}
}
return new Promise((resolve) => setTimeout(resolve, 1000));
}
@withErrorHandling
public async wanderDetailPage(entry: string) {
//#region Initial Meta Info
const params = { asin: '', url: '' };
if (entry.match(/^https?:\/\/www\.amazon\.com.*\/dp\/[A-Z0-9]{10}/)) {
const [asin] = /\/\/dp\/[A-Z0-9]{10}/.exec(entry)!;
params.asin = asin;
params.url = entry;
} else if (entry.match(/^[A-Z0-9]{10}$/)) {
params.asin = entry;
params.url = `https://www.amazon.com/dp/${entry}`;
}
let tab = await this.getCurrentTab();
if (!tab.url || !tab.url.startsWith('http')) {
tab = await this.createNewTab(params.url);
} else {
tab = await browser.tabs.update(tab.id, {
url: params.url,
});
}
const injector = new AmazonDetailPageInjector(tab);
//#endregion
//#region Await Production Introduction Element Loaded
await injector.waitForPageLoaded();
await new Promise((resolve) => setTimeout(resolve, 3000)); // Wait 3 seconds.
//#endregion
//#region Fetch Base Info
const baseInfo = await injector.getBaseInfo();
this.channel.emit('item-base-info-collected', {
asin: params.asin,
title: baseInfo.title,
price: baseInfo.price,
});
//#endregion
//#region Fetch Rating Info
const ratingInfo = await injector.getRatingInfo();
if (ratingInfo && (ratingInfo.rating !== 0 || ratingInfo.ratingCount !== 0)) {
this.channel.emit('item-rating-collected', {
asin: params.asin,
...ratingInfo,
});
}
//#endregion
//#region Fetch Category Rank Info
let rawRankingText: string | null = await injector.getRankText();
if (rawRankingText) {
const info: Pick<AmazonDetailItem, 'category1' | 'category2'> = {};
let statement = /#[0-9,]+\sin\s\S[\s\w',\.&\(\)\-]+/.exec(rawRankingText)?.[0];
if (statement) {
const name = /(?<=in\s).+/.exec(statement)?.[0].replace(/\s\(See\sTop.+\)/, '');
const rank = Number(/(?<=#)[0-9,]+/.exec(statement)?.[0].replaceAll(',', ''));
if (name && !Number.isNaN(rank)) {
info['category1'] = { name, rank };
}
rawRankingText = rawRankingText.replace(statement, '');
}
statement = /#[0-9,]+\sin\s\S[\s\w',\.&\(\)\-]+/.exec(rawRankingText)?.[0];
if (statement) {
const name = /(?<=in\s).+/.exec(statement)?.[0].replace(/[\s]+$/, '');
const rank = Number(/(?<=#)[0-9,]+/.exec(statement)?.[0].replaceAll(',', ''));
if (name && !Number.isNaN(rank)) {
info['category2'] = { name, rank };
}
}
this.channel.emit('item-category-rank-collected', {
asin: params.asin,
...info,
});
}
//#endregion
//#region Fetch Goods' Images
const imageUrls = await injector.getImageUrls();
imageUrls.length > 0 &&
this.channel.emit('item-images-collected', {
asin: params.asin,
imageUrls: Array.from(new Set(imageUrls)),
});
await new Promise((resolve) => setTimeout(resolve, 2000)); // Wait 2 seconds.
//#endregion
//#region Fetch Top Reviews
const reviews = await injector.getTopReviews();
reviews.length > 0 &&
this.channel.emit('item-top-reviews-collected', {
asin: params.asin,
topReviews: reviews,
});
//#endregion
}
@withErrorHandling
public async wanderReviewPage(asin: string) {
const url = new URL(
`https://www.amazon.com/product-reviews/${asin}/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews`,
);
const tab = await this.createNewTab(url.toString());
const injector = new AmazonReviewPageInjector(tab);
await injector.waitForPageLoad();
for (let star = 1; star <= 5; star++) {
await injector.showStarsDropDownMenu();
await injector.selectStar(star);
while (true) {
await injector.waitForPageLoad();
const reviews = await injector.getSinglePageReviews();
reviews.length > 0 && this.channel.emit('item-review-collected', { asin, reviews });
const hasNextPage = await injector.jumpToNextPageIfExist();
if (!hasNextPage) {
break;
}
}
}
setTimeout(() => browser.tabs.remove(tab.id!), 1000);
}
public async runSearchPageTask(
keywordsList: string[],
progress?: (remains: string[]) => Promise<void>,
): Promise<void> {
let remains = [...keywordsList];
let interrupt = false;
const unsubscribe = this._controlChannel.on('interrupt', () => {
interrupt = true;
});
while (remains.length > 0 && !interrupt) {
const kw = remains.shift()!;
await this.doSearch(kw);
await this.wanderSearchPage();
progress && progress(remains);
}
unsubscribe();
}
public async runDetaiPageTask(
asins: string[],
progress?: (remains: string[]) => Promise<void>,
): Promise<void> {
let remains = [...asins];
let interrupt = false;
const unsubscribe = this._controlChannel.on('interrupt', () => {
interrupt = true;
});
while (remains.length > 0 && !interrupt) {
const asin = remains.shift()!;
await this.wanderDetailPage(asin);
progress && progress(remains);
}
unsubscribe();
}
public async runReviewPageTask(
asins: string[],
progress?: (remains: string[]) => Promise<void>,
): Promise<void> {
let remains = [...asins];
while (remains.length > 0) {
const asin = remains.shift()!;
await this.wanderReviewPage(asin);
progress && progress(remains);
}
}
public async stop(): Promise<void> {
this._controlChannel.emit('interrupt');
}
}
class PageWorker {
public useAmazonPageWorker(): AmazonPageWorker {
return AmazonPageWorkerImpl.getInstance();
}
}
const pageWorker = new PageWorker();
export default pageWorker;
export { amazon, homedepot };

View File

@ -39,48 +39,64 @@ type AmazonItem = Pick<AmazonSearchItem, 'asin'> &
Partial<AmazonSearchItem> &
Partial<AmazonDetailItem> & { hasDetail: boolean };
type HomedepotDetailItem = {
OSMID: string;
link: string;
brandName: string;
title: string;
price: string;
rate: string;
innerText: string;
reviewCount: number;
mainImageUrl: string;
};
interface AmazonPageWorkerEvents {
/**
* The event is fired when worker collected links to items on the Amazon search page.
*/
['item-links-collected']: { objs: AmazonSearchItem[] };
/**
* The event is fired when worker collected goods' base info on the Amazon detail page.
*/
['item-base-info-collected']: Pick<AmazonDetailItem, 'asin' | 'title' | 'price'>;
/**
* The event is fired when worker collected goods' rating on the Amazon detail page.
*/
['item-rating-collected']: Pick<AmazonDetailItem, 'asin' | 'rating' | 'ratingCount'>;
/**
* The event is fired when worker
*/
['item-category-rank-collected']: Pick<AmazonDetailItem, 'asin' | 'category1' | 'category2'>;
/**
* The event is fired when images collected
*/
['item-images-collected']: Pick<AmazonDetailItem, 'asin' | 'imageUrls'>;
/**
* The event is fired when top reviews collected in detail page
*/
['item-top-reviews-collected']: Pick<AmazonDetailItem, 'asin' | 'topReviews'>;
/**
* The event is fired when reviews collected in all review page
*/
['item-review-collected']: { asin: string; reviews: AmazonReview[] };
/**
* Error event that occurs when there is an issue with the Amazon page worker
*/
['error']: { message: string; url?: string };
}
interface HomedepotEvents {
/**
* The event is fired when detail items collect
*/
['detail-item-collected']: { item: HomedepotDetailItem };
/**
* The event is fired when error occurs.
*/
['error']: { message: string; url?: string };
}
interface AmazonPageWorker {
/**
* The channel for communication with the Amazon page worker.
@ -120,3 +136,23 @@ interface AmazonPageWorker {
*/
stop(): Promise<void>;
}
interface HomedepotWorker {
/**
* The channel for communication with the Homedepot page worker.
*/
readonly channel: Emittery<HomedepotEvents>;
/**
* Browsing goods detail page and collect target information
*/
runDetailPageTask(
OSMIDs: string[],
progress?: (remains: string[]) => Promise<void> | void,
): Promise<void>;
/**
* Stop the worker.
*/
stop(): Promise<void>;
}

View File

@ -1,6 +1,6 @@
import { exec } from './execute-script';
import type { Tabs } from 'webextension-polyfill';
import type { AmazonReview, AmazonSearchItem } from './page-worker/types';
import type { AmazonReview, AmazonSearchItem, HomedepotDetailItem } from './page-worker/types';
class BaseInjector {
readonly _tab: Tabs.Tab;
@ -235,40 +235,24 @@ export class AmazonDetailPageInjector extends BaseInjector {
public async getImageUrls() {
return this.run(async () => {
let urls = Array.from(document.querySelectorAll<HTMLImageElement>('.imageThumbnail img')).map(
(e) => e.src,
);
//#region process more images https://github.com/primedigitaltech/azon_seeker/issues/4
const overlay = document.querySelector<HTMLDivElement>('.overlayRestOfImages');
if (overlay) {
if (document.querySelector<HTMLDivElement>('#ivThumbs')!.getClientRects().length === 0) {
overlay.click();
await new Promise((resolve) => setTimeout(resolve, 1000));
}
urls = Array.from(
document.querySelectorAll<HTMLDivElement>('#ivThumbs .ivThumbImage[style]'),
).map((e) => e.style.background);
urls = urls.map((s) => {
const [url] = /(?<=url\(").+(?=")/.exec(s)!;
return url;
});
await new Promise((resolve) => setTimeout(resolve, 1000));
document
.querySelector<HTMLButtonElement>(".a-popover button[data-action='a-popover-close']")
?.click();
}
//#endregion
//#region post-process image urls
urls = urls.map((rawUrl) => {
const imgUrl = new URL(rawUrl);
const paths = imgUrl.pathname.split('/');
const chunks = paths[paths.length - 1].split('.');
const [name, ext] = [chunks[0], chunks[chunks.length - 1]];
paths[paths.length - 1] = `${name}.${ext}`;
imgUrl.pathname = paths.join('/');
return imgUrl.toString();
});
//#endregion
const script = document.evaluate(
`//script[starts-with(text(), "\nP.when(\'A\').register")]`,
document,
null,
XPathResult.STRING_TYPE,
).stringValue;
const urls = [
...script.matchAll(
/(?<="hiRes":")https:\/\/m.media-amazon.com\/images\/I\/[\w\d\.\-+]+(?=")/g,
),
].map((e) => e[0]);
return urls;
});
}
@ -372,8 +356,6 @@ export class AmazonReviewPageInjector extends BaseInjector {
);
const items: AmazonReview[] = [];
for (let i = 0; i < xResult.snapshotLength; i++) {
console.log('handling', i);
const commentNode = xResult.snapshotItem(i) as HTMLDivElement;
if (!commentNode) {
continue;
@ -459,3 +441,64 @@ export class AmazonReviewPageInjector extends BaseInjector {
);
}
}
export class HomedepotDetailPageInjector extends BaseInjector {
public waitForPageLoad() {
return this.run(async () => {
while (true) {
document
.querySelector<HTMLElement>(
`#product-section-overview div[role='button'][aria-expanded='false']`,
)
?.click();
const reviewPlaceholderEl = document.querySelector(
`[data-component^="ratings-and-reviews"] [class^="placeholder"]`,
);
reviewPlaceholderEl?.scrollIntoView({ behavior: 'smooth' });
if (document.readyState === 'complete' && !reviewPlaceholderEl) {
await new Promise((resolve) => setTimeout(resolve, 1000));
document
.querySelector(`#product-section-rr`)
?.scrollIntoView({ behavior: 'smooth', block: 'center' });
break;
}
}
});
}
public getInfo() {
return this.run(async () => {
const link = document.location.toString();
const brandName = document.querySelector<HTMLDivElement>(
`[data-component^="product-details:ProductDetailsBrandCollection"]`,
)!.innerText;
const title = document.querySelector<HTMLDivElement>(
`[data-component^="product-details:ProductDetailsTitle"]`,
)!.innerText;
const price = document.querySelector<HTMLDivElement>(`#standard-price`)!.innerText;
const rate = /\d\.\d/.exec(
document.querySelector<HTMLDivElement>(`[data-component^="ratings-and-reviews"] .sui-mr-1`)!
.innerText,
)![0];
const reviewCount = Number(
/[\d]+/.exec(
document.querySelector<HTMLDivElement>(
`[data-component^="ratings-and-reviews"] button > span:last-child`,
)!.innerText,
)![0],
);
const mainImageUrl = document.querySelector<HTMLImageElement>(
`.mediagallery__mainimage img`,
)!.src;
return {
link,
brandName,
title,
price,
rate,
reviewCount,
mainImageUrl,
} as Omit<HomedepotDetailItem, 'OSMID'>;
});
}
}

View File

@ -1,6 +1,6 @@
<script setup lang="ts">
import { useLongTask } from '~/composables/useLongTask';
import pageWorker from '~/logic/page-worker';
import { amazon as pageWorker } from '~/logic/page-worker';
import { AmazonDetailItem } from '~/logic/page-worker/types';
import { detailAsinInput, detailItems } from '~/logic/storage';

View File

@ -1,6 +1,6 @@
<script lang="ts" setup>
import { useLongTask } from '~/composables/useLongTask';
import pageWorker from '~/logic/page-worker';
import { amazon as pageWorker } from '~/logic/page-worker';
import type { AmazonReview } from '~/logic/page-worker/types';
import { reviewAsinInput, reviewItems } from '~/logic/storage';

View File

@ -1,6 +1,6 @@
<script setup lang="ts">
import { keywordsList } from '~/logic/storage';
import pageWorker from '~/logic/page-worker';
import { amazon as pageWorker } from '~/logic/page-worker';
import { NButton } from 'naive-ui';
import { searchItems } from '~/logic/storage';
import { useLongTask } from '~/composables/useLongTask';