mirror of
https://github.com/primedigitaltech/azon_seeker.git
synced 2026-01-19 13:13:22 +08:00
Update
This commit is contained in:
parent
74315474f8
commit
a240242331
21
src/composables/useCurrentUrl.ts
Normal file
21
src/composables/useCurrentUrl.ts
Normal file
@ -0,0 +1,21 @@
|
||||
export function useCurrentUrl() {
|
||||
const currentUrl = ref('');
|
||||
|
||||
const updateUrl = async () => {
|
||||
const tab = await browser.tabs.query({ active: true, currentWindow: true }).then((ts) => ts[0]);
|
||||
currentUrl.value = tab.url || '';
|
||||
};
|
||||
|
||||
onMounted(() => {
|
||||
updateUrl();
|
||||
browser.tabs.onUpdated.addListener(updateUrl);
|
||||
browser.tabs.onHighlighted.addListener(updateUrl);
|
||||
});
|
||||
|
||||
onUnmounted(() => {
|
||||
browser.tabs.onUpdated.removeListener(updateUrl);
|
||||
browser.tabs.onHighlighted.removeListener(updateUrl);
|
||||
});
|
||||
|
||||
return { currentUrl };
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
export const usePageContext = () => {
|
||||
export function usePageContext() {
|
||||
const pageContext = document.location.pathname.split('/')[2] as 'sidepanel' | 'options';
|
||||
return { pageContext };
|
||||
};
|
||||
}
|
||||
|
||||
284
src/logic/page-worker/amazon.ts
Normal file
284
src/logic/page-worker/amazon.ts
Normal file
@ -0,0 +1,284 @@
|
||||
import Emittery from 'emittery';
|
||||
import type { AmazonDetailItem, AmazonPageWorker, AmazonPageWorkerEvents } from './types';
|
||||
import type { Tabs } from 'webextension-polyfill';
|
||||
import { withErrorHandling } from '../error-handler';
|
||||
import {
|
||||
AmazonDetailPageInjector,
|
||||
AmazonReviewPageInjector,
|
||||
AmazonSearchPageInjector,
|
||||
} from '../web-injectors';
|
||||
import { isForbiddenUrl } from '~/env';
|
||||
|
||||
/**
|
||||
* AmazonPageWorkerImpl can run on background & sidepanel & popup,
|
||||
* **can't** run on content script!
|
||||
*/
|
||||
class AmazonPageWorkerImpl implements AmazonPageWorker {
|
||||
//#region Singleton
|
||||
private static _instance: AmazonPageWorker | null = null;
|
||||
public static getInstance() {
|
||||
if (this._instance === null) {
|
||||
this._instance = new AmazonPageWorkerImpl();
|
||||
}
|
||||
return this._instance;
|
||||
}
|
||||
//#endregion
|
||||
|
||||
private constructor() {}
|
||||
|
||||
private readonly _controlChannel = new Emittery<{ interrupt: undefined }>();
|
||||
public readonly channel = new Emittery<AmazonPageWorkerEvents>();
|
||||
|
||||
private async getCurrentTab(): Promise<Tabs.Tab> {
|
||||
const tab = await browser.tabs
|
||||
.query({ active: true, currentWindow: true })
|
||||
.then((tabs) => tabs[0]);
|
||||
return tab;
|
||||
}
|
||||
|
||||
private async createNewTab(url: string): Promise<Tabs.Tab> {
|
||||
const tab = await browser.tabs.create({ url, active: true });
|
||||
return tab;
|
||||
}
|
||||
|
||||
private async wanderSearchSinglePage(tab: Tabs.Tab) {
|
||||
const injector = new AmazonSearchPageInjector(tab);
|
||||
// #region Wait for the Next button to appear, indicating that the product items have finished loading
|
||||
await injector.waitForPageLoaded();
|
||||
// #endregion
|
||||
// #region Determine the type of product search page https://github.com/primedigitaltech/azon_seeker/issues/1
|
||||
const pagePattern = await injector.getPagePattern();
|
||||
// #endregion
|
||||
// #region Retrieve key nodes and their information from the critical product search page
|
||||
const data = await injector.getPageData(pagePattern);
|
||||
// #endregion
|
||||
// #region get current page
|
||||
const page = await injector.getCurrentPage();
|
||||
// #endregion
|
||||
// #region Determine if it is the last page, otherwise navigate to the next page
|
||||
const hasNextPage = await injector.determineHasNextPage();
|
||||
// #endregion
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
if (data === null || typeof hasNextPage !== 'boolean') {
|
||||
this.channel.emit('error', { message: '爬取单页信息失败', url: tab.url });
|
||||
throw new Error('爬取单页信息失败');
|
||||
}
|
||||
return { data, hasNextPage, page };
|
||||
}
|
||||
|
||||
@withErrorHandling
|
||||
public async doSearch(keywords: string): Promise<string> {
|
||||
const url = new URL('https://www.amazon.com/s');
|
||||
url.searchParams.append('k', keywords);
|
||||
let tab = await this.getCurrentTab();
|
||||
if (!tab.url?.startsWith('http')) {
|
||||
tab = await this.createNewTab('https://www.amazon.com/');
|
||||
tab.url = 'https://www.amazon.com/';
|
||||
}
|
||||
const currentUrl = new URL(tab.url!);
|
||||
if (currentUrl.hostname !== url.hostname || currentUrl.searchParams.get('k') !== keywords) {
|
||||
tab = await browser.tabs.update(tab.id, { url: url.toString(), active: true });
|
||||
await new Promise<void>((resolve) => setTimeout(resolve, 1000));
|
||||
}
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
@withErrorHandling
|
||||
public async wanderSearchPage(): Promise<void> {
|
||||
const tab = await this.getCurrentTab();
|
||||
let offset = 0;
|
||||
while (true) {
|
||||
const { hasNextPage, data, page } = await this.wanderSearchSinglePage(tab);
|
||||
const keywords = new URL(tab.url!).searchParams.get('k')!;
|
||||
const objs = data.map((r, i) => ({
|
||||
...r,
|
||||
keywords,
|
||||
page,
|
||||
rank: offset + 1 + i,
|
||||
createTime: new Date().toLocaleString(),
|
||||
asin: /(?<=\/dp\/)[A-Z0-9]{10}/.exec(r.link as string)![0],
|
||||
}));
|
||||
this.channel.emit('item-links-collected', { objs });
|
||||
offset += data.length;
|
||||
if (!hasNextPage) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
}
|
||||
|
||||
@withErrorHandling
|
||||
public async wanderDetailPage(entry: string) {
|
||||
//#region Initial Meta Info
|
||||
const params = { asin: '', url: '' };
|
||||
if (entry.match(/^https?:\/\/www\.amazon\.com.*\/dp\/[A-Z0-9]{10}/)) {
|
||||
const [asin] = /\/\/dp\/[A-Z0-9]{10}/.exec(entry)!;
|
||||
params.asin = asin;
|
||||
params.url = entry;
|
||||
} else if (entry.match(/^[A-Z0-9]{10}$/)) {
|
||||
params.asin = entry;
|
||||
params.url = `https://www.amazon.com/dp/${entry}`;
|
||||
}
|
||||
let tab = await this.getCurrentTab();
|
||||
if (!tab.url || isForbiddenUrl(tab.url)) {
|
||||
tab = await this.createNewTab(params.url);
|
||||
} else {
|
||||
tab = await browser.tabs.update(tab.id, {
|
||||
url: params.url,
|
||||
});
|
||||
}
|
||||
const injector = new AmazonDetailPageInjector(tab);
|
||||
//#endregion
|
||||
//#region Await Production Introduction Element Loaded
|
||||
await injector.waitForPageLoaded();
|
||||
await new Promise((resolve) => setTimeout(resolve, 3000)); // Wait 3 seconds.
|
||||
//#endregion
|
||||
//#region Fetch Base Info
|
||||
const baseInfo = await injector.getBaseInfo();
|
||||
this.channel.emit('item-base-info-collected', {
|
||||
asin: params.asin,
|
||||
title: baseInfo.title,
|
||||
price: baseInfo.price,
|
||||
});
|
||||
//#endregion
|
||||
//#region Fetch Rating Info
|
||||
const ratingInfo = await injector.getRatingInfo();
|
||||
if (ratingInfo && (ratingInfo.rating !== 0 || ratingInfo.ratingCount !== 0)) {
|
||||
this.channel.emit('item-rating-collected', {
|
||||
asin: params.asin,
|
||||
...ratingInfo,
|
||||
});
|
||||
}
|
||||
//#endregion
|
||||
//#region Fetch Category Rank Info
|
||||
let rawRankingText: string | null = await injector.getRankText();
|
||||
if (rawRankingText) {
|
||||
const info: Pick<AmazonDetailItem, 'category1' | 'category2'> = {};
|
||||
let statement = /#[0-9,]+\sin\s\S[\s\w',\.&\(\)\-]+/.exec(rawRankingText)?.[0];
|
||||
if (statement) {
|
||||
const name = /(?<=in\s).+/.exec(statement)?.[0].replace(/\s\(See\sTop.+\)/, '');
|
||||
const rank = Number(/(?<=#)[0-9,]+/.exec(statement)?.[0].replaceAll(',', ''));
|
||||
if (name && !Number.isNaN(rank)) {
|
||||
info['category1'] = { name, rank };
|
||||
}
|
||||
rawRankingText = rawRankingText.replace(statement, '');
|
||||
}
|
||||
statement = /#[0-9,]+\sin\s\S[\s\w',\.&\(\)\-]+/.exec(rawRankingText)?.[0];
|
||||
if (statement) {
|
||||
const name = /(?<=in\s).+/.exec(statement)?.[0].replace(/[\s]+$/, '');
|
||||
const rank = Number(/(?<=#)[0-9,]+/.exec(statement)?.[0].replaceAll(',', ''));
|
||||
if (name && !Number.isNaN(rank)) {
|
||||
info['category2'] = { name, rank };
|
||||
}
|
||||
}
|
||||
this.channel.emit('item-category-rank-collected', {
|
||||
asin: params.asin,
|
||||
...info,
|
||||
});
|
||||
}
|
||||
//#endregion
|
||||
//#region Fetch Goods' Images
|
||||
const imageUrls = await injector.getImageUrls();
|
||||
imageUrls.length > 0 &&
|
||||
this.channel.emit('item-images-collected', {
|
||||
asin: params.asin,
|
||||
imageUrls: Array.from(new Set(imageUrls)),
|
||||
});
|
||||
await new Promise((resolve) => setTimeout(resolve, 2000)); // Wait 2 seconds.
|
||||
//#endregion
|
||||
//#region Fetch Top Reviews
|
||||
// const reviews = await injector.getTopReviews();
|
||||
// reviews.length > 0 &&
|
||||
// this.channel.emit('item-top-reviews-collected', {
|
||||
// asin: params.asin,
|
||||
// topReviews: reviews,
|
||||
// });
|
||||
//#endregion
|
||||
}
|
||||
|
||||
@withErrorHandling
|
||||
public async wanderReviewPage(asin: string) {
|
||||
const url = new URL(
|
||||
`https://www.amazon.com/product-reviews/${asin}/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews`,
|
||||
);
|
||||
const tab = await this.createNewTab(url.toString());
|
||||
const injector = new AmazonReviewPageInjector(tab);
|
||||
await injector.waitForPageLoad();
|
||||
for (let star = 1; star <= 5; star++) {
|
||||
await injector.showStarsDropDownMenu();
|
||||
await injector.selectStar(star);
|
||||
while (true) {
|
||||
await injector.waitForPageLoad();
|
||||
const reviews = await injector.getSinglePageReviews();
|
||||
reviews.length > 0 && this.channel.emit('item-review-collected', { asin, reviews });
|
||||
const hasNextPage = await injector.jumpToNextPageIfExist();
|
||||
if (!hasNextPage) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
setTimeout(() => browser.tabs.remove(tab.id!), 1000);
|
||||
}
|
||||
|
||||
public async runSearchPageTask(
|
||||
keywordsList: string[],
|
||||
progress?: (remains: string[]) => Promise<void>,
|
||||
): Promise<void> {
|
||||
let remains = [...keywordsList];
|
||||
let interrupt = false;
|
||||
const unsubscribe = this._controlChannel.on('interrupt', () => {
|
||||
interrupt = true;
|
||||
});
|
||||
while (remains.length > 0 && !interrupt) {
|
||||
const kw = remains.shift()!;
|
||||
await this.doSearch(kw);
|
||||
await this.wanderSearchPage();
|
||||
progress && progress(remains);
|
||||
}
|
||||
unsubscribe();
|
||||
}
|
||||
|
||||
public async runDetaiPageTask(
|
||||
asins: string[],
|
||||
progress?: (remains: string[]) => Promise<void>,
|
||||
): Promise<void> {
|
||||
const remains = [...asins];
|
||||
let interrupt = false;
|
||||
const unsubscribe = this._controlChannel.on('interrupt', () => {
|
||||
interrupt = true;
|
||||
});
|
||||
while (remains.length > 0 && !interrupt) {
|
||||
const asin = remains.shift()!;
|
||||
await this.wanderDetailPage(asin);
|
||||
progress && progress(remains);
|
||||
}
|
||||
unsubscribe();
|
||||
}
|
||||
|
||||
public async runReviewPageTask(
|
||||
asins: string[],
|
||||
progress?: (remains: string[]) => Promise<void>,
|
||||
): Promise<void> {
|
||||
const remains = [...asins];
|
||||
let interrupt = false;
|
||||
const unsubscribe = this._controlChannel.on('interrupt', () => {
|
||||
interrupt = true;
|
||||
});
|
||||
while (remains.length > 0 && !interrupt) {
|
||||
const asin = remains.shift()!;
|
||||
await this.wanderReviewPage(asin);
|
||||
progress && progress(remains);
|
||||
}
|
||||
unsubscribe();
|
||||
}
|
||||
|
||||
public stop(): Promise<void> {
|
||||
return this._controlChannel.emit('interrupt');
|
||||
}
|
||||
}
|
||||
|
||||
export default {
|
||||
useAmazonPageWorker(): AmazonPageWorker {
|
||||
return AmazonPageWorkerImpl.getInstance();
|
||||
},
|
||||
};
|
||||
75
src/logic/page-worker/homedepot.ts
Normal file
75
src/logic/page-worker/homedepot.ts
Normal file
@ -0,0 +1,75 @@
|
||||
import Emittery from 'emittery';
|
||||
import { HomedepotEvents, HomedepotWorker } from './types';
|
||||
import { Tabs } from 'webextension-polyfill';
|
||||
import { isForbiddenUrl } from '~/env';
|
||||
import { withErrorHandling } from '../error-handler';
|
||||
import { HomedepotDetailPageInjector } from '../web-injectors';
|
||||
|
||||
class HomedepotWorkerImpl implements HomedepotWorker {
|
||||
private static _instance: HomedepotWorker | null = null;
|
||||
public static getInstance() {
|
||||
if (!HomedepotWorkerImpl._instance) {
|
||||
HomedepotWorkerImpl._instance = new HomedepotWorkerImpl();
|
||||
}
|
||||
return HomedepotWorkerImpl._instance as HomedepotWorker;
|
||||
}
|
||||
private constructor() {}
|
||||
|
||||
readonly channel: Emittery<HomedepotEvents> = new Emittery();
|
||||
|
||||
private readonly _controlChannel = new Emittery<{ interrupt: undefined }>();
|
||||
|
||||
private async getCurrentTab(): Promise<Tabs.Tab> {
|
||||
const tab = await browser.tabs
|
||||
.query({ active: true, currentWindow: true })
|
||||
.then((tabs) => tabs[0]);
|
||||
return tab;
|
||||
}
|
||||
|
||||
private async createNewTab(url?: string): Promise<Tabs.Tab> {
|
||||
const tab = await browser.tabs.create({ url, active: true });
|
||||
return tab;
|
||||
}
|
||||
|
||||
@withErrorHandling
|
||||
private async wanderingDetailPage(OSMID: string) {
|
||||
const url = `https://www.homedepot.com/p/${OSMID}`;
|
||||
let tab = await this.getCurrentTab();
|
||||
if (!tab.url || isForbiddenUrl(tab.url)) {
|
||||
tab = await this.createNewTab(url);
|
||||
} else {
|
||||
await browser.tabs.update(tab.id!, { url });
|
||||
}
|
||||
const injector = new HomedepotDetailPageInjector(tab);
|
||||
await injector.waitForPageLoad();
|
||||
const info = await injector.getInfo();
|
||||
this.channel.emit('detail-item-collected', { item: { OSMID, ...info } });
|
||||
}
|
||||
|
||||
async runDetailPageTask(
|
||||
OSMIDs: string[],
|
||||
progress?: (remains: string[]) => Promise<void> | void,
|
||||
): Promise<void> {
|
||||
const remains = [...OSMIDs];
|
||||
let interrupt = false;
|
||||
const unsubscribe = this._controlChannel.on('interrupt', () => {
|
||||
interrupt = true;
|
||||
});
|
||||
while (remains.length > 0 && !interrupt) {
|
||||
const OSMIDs = remains.shift()!;
|
||||
await this.wanderingDetailPage(OSMIDs);
|
||||
progress && progress(remains);
|
||||
}
|
||||
unsubscribe();
|
||||
}
|
||||
|
||||
stop(): Promise<void> {
|
||||
return this._controlChannel.emit('interrupt');
|
||||
}
|
||||
}
|
||||
|
||||
export default {
|
||||
useHomedepotWorker() {
|
||||
return HomedepotWorkerImpl.getInstance();
|
||||
},
|
||||
};
|
||||
@ -1,285 +1,4 @@
|
||||
import Emittery from 'emittery';
|
||||
import type { AmazonDetailItem, AmazonPageWorker, AmazonPageWorkerEvents } from './types';
|
||||
import type { Tabs } from 'webextension-polyfill';
|
||||
import { withErrorHandling } from '../error-handler';
|
||||
import {
|
||||
AmazonDetailPageInjector,
|
||||
AmazonReviewPageInjector,
|
||||
AmazonSearchPageInjector,
|
||||
} from '../web-injectors';
|
||||
import amazon from './amazon';
|
||||
import homedepot from './homedepot';
|
||||
|
||||
/**
|
||||
* AmazonPageWorkerImpl can run on background & sidepanel & popup,
|
||||
* **can't** run on content script!
|
||||
*/
|
||||
class AmazonPageWorkerImpl implements AmazonPageWorker {
|
||||
//#region Singleton
|
||||
private static _instance: AmazonPageWorker | null = null;
|
||||
public static getInstance() {
|
||||
if (this._instance === null) {
|
||||
this._instance = new AmazonPageWorkerImpl();
|
||||
}
|
||||
return this._instance;
|
||||
}
|
||||
//#endregion
|
||||
|
||||
private constructor() {}
|
||||
|
||||
private _controlChannel = new Emittery<{ interrupt: undefined }>();
|
||||
public readonly channel = new Emittery<AmazonPageWorkerEvents>();
|
||||
|
||||
private async getCurrentTab(): Promise<Tabs.Tab> {
|
||||
const tab = await browser.tabs
|
||||
.query({ active: true, currentWindow: true })
|
||||
.then((tabs) => tabs[0]);
|
||||
return tab;
|
||||
}
|
||||
|
||||
private async createNewTab(url: string): Promise<Tabs.Tab> {
|
||||
const tab = await browser.tabs.create({
|
||||
url,
|
||||
active: true,
|
||||
});
|
||||
return tab;
|
||||
}
|
||||
|
||||
private async wanderSearchSinglePage(tab: Tabs.Tab) {
|
||||
const injector = new AmazonSearchPageInjector(tab);
|
||||
// #region Wait for the Next button to appear, indicating that the product items have finished loading
|
||||
await injector.waitForPageLoaded();
|
||||
// #endregion
|
||||
// #region Determine the type of product search page https://github.com/primedigitaltech/azon_seeker/issues/1
|
||||
const pagePattern = await injector.getPagePattern();
|
||||
// #endregion
|
||||
// #region Retrieve key nodes and their information from the critical product search page
|
||||
const data = await injector.getPageData(pagePattern);
|
||||
// #endregion
|
||||
// #region get current page
|
||||
const page = await injector.getCurrentPage();
|
||||
// #endregion
|
||||
// #region Determine if it is the last page, otherwise navigate to the next page
|
||||
const hasNextPage = await injector.determineHasNextPage();
|
||||
// #endregion
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
if (data === null || typeof hasNextPage !== 'boolean') {
|
||||
this.channel.emit('error', { message: '爬取单页信息失败', url: tab.url });
|
||||
throw new Error('爬取单页信息失败');
|
||||
}
|
||||
return { data, hasNextPage, page };
|
||||
}
|
||||
|
||||
@withErrorHandling
|
||||
public async doSearch(keywords: string): Promise<string> {
|
||||
const url = new URL('https://www.amazon.com/s');
|
||||
url.searchParams.append('k', keywords);
|
||||
let tab = await this.getCurrentTab();
|
||||
if (!tab.url?.startsWith('http')) {
|
||||
tab = await this.createNewTab('https://www.amazon.com/');
|
||||
tab.url = 'https://www.amazon.com/';
|
||||
}
|
||||
const currentUrl = new URL(tab.url!);
|
||||
if (currentUrl.hostname !== url.hostname || currentUrl.searchParams.get('k') !== keywords) {
|
||||
tab = await browser.tabs.update(tab.id, { url: url.toString(), active: true });
|
||||
await new Promise<void>((resolve) => setTimeout(resolve, 1000));
|
||||
}
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
@withErrorHandling
|
||||
public async wanderSearchPage(): Promise<void> {
|
||||
let tab = await this.getCurrentTab();
|
||||
let offset = 0;
|
||||
while (true) {
|
||||
const { hasNextPage, data, page } = await this.wanderSearchSinglePage(tab);
|
||||
const keywords = new URL(tab.url!).searchParams.get('k')!;
|
||||
const objs = data.map((r, i) => ({
|
||||
...r,
|
||||
keywords,
|
||||
page,
|
||||
rank: offset + 1 + i,
|
||||
createTime: new Date().toLocaleString(),
|
||||
asin: /(?<=\/dp\/)[A-Z0-9]{10}/.exec(r.link as string)![0],
|
||||
}));
|
||||
this.channel.emit('item-links-collected', { objs });
|
||||
offset += data.length;
|
||||
if (!hasNextPage) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
}
|
||||
|
||||
@withErrorHandling
|
||||
public async wanderDetailPage(entry: string) {
|
||||
//#region Initial Meta Info
|
||||
const params = { asin: '', url: '' };
|
||||
if (entry.match(/^https?:\/\/www\.amazon\.com.*\/dp\/[A-Z0-9]{10}/)) {
|
||||
const [asin] = /\/\/dp\/[A-Z0-9]{10}/.exec(entry)!;
|
||||
params.asin = asin;
|
||||
params.url = entry;
|
||||
} else if (entry.match(/^[A-Z0-9]{10}$/)) {
|
||||
params.asin = entry;
|
||||
params.url = `https://www.amazon.com/dp/${entry}`;
|
||||
}
|
||||
let tab = await this.getCurrentTab();
|
||||
if (!tab.url || !tab.url.startsWith('http')) {
|
||||
tab = await this.createNewTab(params.url);
|
||||
} else {
|
||||
tab = await browser.tabs.update(tab.id, {
|
||||
url: params.url,
|
||||
});
|
||||
}
|
||||
const injector = new AmazonDetailPageInjector(tab);
|
||||
//#endregion
|
||||
//#region Await Production Introduction Element Loaded
|
||||
await injector.waitForPageLoaded();
|
||||
await new Promise((resolve) => setTimeout(resolve, 3000)); // Wait 3 seconds.
|
||||
//#endregion
|
||||
//#region Fetch Base Info
|
||||
const baseInfo = await injector.getBaseInfo();
|
||||
this.channel.emit('item-base-info-collected', {
|
||||
asin: params.asin,
|
||||
title: baseInfo.title,
|
||||
price: baseInfo.price,
|
||||
});
|
||||
//#endregion
|
||||
//#region Fetch Rating Info
|
||||
const ratingInfo = await injector.getRatingInfo();
|
||||
if (ratingInfo && (ratingInfo.rating !== 0 || ratingInfo.ratingCount !== 0)) {
|
||||
this.channel.emit('item-rating-collected', {
|
||||
asin: params.asin,
|
||||
...ratingInfo,
|
||||
});
|
||||
}
|
||||
//#endregion
|
||||
//#region Fetch Category Rank Info
|
||||
let rawRankingText: string | null = await injector.getRankText();
|
||||
if (rawRankingText) {
|
||||
const info: Pick<AmazonDetailItem, 'category1' | 'category2'> = {};
|
||||
let statement = /#[0-9,]+\sin\s\S[\s\w',\.&\(\)\-]+/.exec(rawRankingText)?.[0];
|
||||
if (statement) {
|
||||
const name = /(?<=in\s).+/.exec(statement)?.[0].replace(/\s\(See\sTop.+\)/, '');
|
||||
const rank = Number(/(?<=#)[0-9,]+/.exec(statement)?.[0].replaceAll(',', ''));
|
||||
if (name && !Number.isNaN(rank)) {
|
||||
info['category1'] = { name, rank };
|
||||
}
|
||||
rawRankingText = rawRankingText.replace(statement, '');
|
||||
}
|
||||
statement = /#[0-9,]+\sin\s\S[\s\w',\.&\(\)\-]+/.exec(rawRankingText)?.[0];
|
||||
if (statement) {
|
||||
const name = /(?<=in\s).+/.exec(statement)?.[0].replace(/[\s]+$/, '');
|
||||
const rank = Number(/(?<=#)[0-9,]+/.exec(statement)?.[0].replaceAll(',', ''));
|
||||
if (name && !Number.isNaN(rank)) {
|
||||
info['category2'] = { name, rank };
|
||||
}
|
||||
}
|
||||
this.channel.emit('item-category-rank-collected', {
|
||||
asin: params.asin,
|
||||
...info,
|
||||
});
|
||||
}
|
||||
//#endregion
|
||||
//#region Fetch Goods' Images
|
||||
const imageUrls = await injector.getImageUrls();
|
||||
imageUrls.length > 0 &&
|
||||
this.channel.emit('item-images-collected', {
|
||||
asin: params.asin,
|
||||
imageUrls: Array.from(new Set(imageUrls)),
|
||||
});
|
||||
await new Promise((resolve) => setTimeout(resolve, 2000)); // Wait 2 seconds.
|
||||
//#endregion
|
||||
//#region Fetch Top Reviews
|
||||
const reviews = await injector.getTopReviews();
|
||||
reviews.length > 0 &&
|
||||
this.channel.emit('item-top-reviews-collected', {
|
||||
asin: params.asin,
|
||||
topReviews: reviews,
|
||||
});
|
||||
//#endregion
|
||||
}
|
||||
|
||||
@withErrorHandling
|
||||
public async wanderReviewPage(asin: string) {
|
||||
const url = new URL(
|
||||
`https://www.amazon.com/product-reviews/${asin}/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews`,
|
||||
);
|
||||
const tab = await this.createNewTab(url.toString());
|
||||
const injector = new AmazonReviewPageInjector(tab);
|
||||
await injector.waitForPageLoad();
|
||||
for (let star = 1; star <= 5; star++) {
|
||||
await injector.showStarsDropDownMenu();
|
||||
await injector.selectStar(star);
|
||||
while (true) {
|
||||
await injector.waitForPageLoad();
|
||||
const reviews = await injector.getSinglePageReviews();
|
||||
reviews.length > 0 && this.channel.emit('item-review-collected', { asin, reviews });
|
||||
const hasNextPage = await injector.jumpToNextPageIfExist();
|
||||
if (!hasNextPage) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
setTimeout(() => browser.tabs.remove(tab.id!), 1000);
|
||||
}
|
||||
|
||||
public async runSearchPageTask(
|
||||
keywordsList: string[],
|
||||
progress?: (remains: string[]) => Promise<void>,
|
||||
): Promise<void> {
|
||||
let remains = [...keywordsList];
|
||||
let interrupt = false;
|
||||
const unsubscribe = this._controlChannel.on('interrupt', () => {
|
||||
interrupt = true;
|
||||
});
|
||||
while (remains.length > 0 && !interrupt) {
|
||||
const kw = remains.shift()!;
|
||||
await this.doSearch(kw);
|
||||
await this.wanderSearchPage();
|
||||
progress && progress(remains);
|
||||
}
|
||||
unsubscribe();
|
||||
}
|
||||
|
||||
public async runDetaiPageTask(
|
||||
asins: string[],
|
||||
progress?: (remains: string[]) => Promise<void>,
|
||||
): Promise<void> {
|
||||
let remains = [...asins];
|
||||
let interrupt = false;
|
||||
const unsubscribe = this._controlChannel.on('interrupt', () => {
|
||||
interrupt = true;
|
||||
});
|
||||
while (remains.length > 0 && !interrupt) {
|
||||
const asin = remains.shift()!;
|
||||
await this.wanderDetailPage(asin);
|
||||
progress && progress(remains);
|
||||
}
|
||||
unsubscribe();
|
||||
}
|
||||
|
||||
public async runReviewPageTask(
|
||||
asins: string[],
|
||||
progress?: (remains: string[]) => Promise<void>,
|
||||
): Promise<void> {
|
||||
let remains = [...asins];
|
||||
while (remains.length > 0) {
|
||||
const asin = remains.shift()!;
|
||||
await this.wanderReviewPage(asin);
|
||||
progress && progress(remains);
|
||||
}
|
||||
}
|
||||
|
||||
public async stop(): Promise<void> {
|
||||
this._controlChannel.emit('interrupt');
|
||||
}
|
||||
}
|
||||
|
||||
class PageWorker {
|
||||
public useAmazonPageWorker(): AmazonPageWorker {
|
||||
return AmazonPageWorkerImpl.getInstance();
|
||||
}
|
||||
}
|
||||
|
||||
const pageWorker = new PageWorker();
|
||||
|
||||
export default pageWorker;
|
||||
export { amazon, homedepot };
|
||||
|
||||
50
src/logic/page-worker/types.d.ts
vendored
50
src/logic/page-worker/types.d.ts
vendored
@ -39,48 +39,64 @@ type AmazonItem = Pick<AmazonSearchItem, 'asin'> &
|
||||
Partial<AmazonSearchItem> &
|
||||
Partial<AmazonDetailItem> & { hasDetail: boolean };
|
||||
|
||||
type HomedepotDetailItem = {
|
||||
OSMID: string;
|
||||
link: string;
|
||||
brandName: string;
|
||||
title: string;
|
||||
price: string;
|
||||
rate: string;
|
||||
innerText: string;
|
||||
reviewCount: number;
|
||||
mainImageUrl: string;
|
||||
};
|
||||
|
||||
interface AmazonPageWorkerEvents {
|
||||
/**
|
||||
* The event is fired when worker collected links to items on the Amazon search page.
|
||||
*/
|
||||
['item-links-collected']: { objs: AmazonSearchItem[] };
|
||||
|
||||
/**
|
||||
* The event is fired when worker collected goods' base info on the Amazon detail page.
|
||||
*/
|
||||
['item-base-info-collected']: Pick<AmazonDetailItem, 'asin' | 'title' | 'price'>;
|
||||
|
||||
/**
|
||||
* The event is fired when worker collected goods' rating on the Amazon detail page.
|
||||
*/
|
||||
['item-rating-collected']: Pick<AmazonDetailItem, 'asin' | 'rating' | 'ratingCount'>;
|
||||
|
||||
/**
|
||||
* The event is fired when worker
|
||||
*/
|
||||
['item-category-rank-collected']: Pick<AmazonDetailItem, 'asin' | 'category1' | 'category2'>;
|
||||
|
||||
/**
|
||||
* The event is fired when images collected
|
||||
*/
|
||||
['item-images-collected']: Pick<AmazonDetailItem, 'asin' | 'imageUrls'>;
|
||||
|
||||
/**
|
||||
* The event is fired when top reviews collected in detail page
|
||||
*/
|
||||
['item-top-reviews-collected']: Pick<AmazonDetailItem, 'asin' | 'topReviews'>;
|
||||
|
||||
/**
|
||||
* The event is fired when reviews collected in all review page
|
||||
*/
|
||||
['item-review-collected']: { asin: string; reviews: AmazonReview[] };
|
||||
|
||||
/**
|
||||
* Error event that occurs when there is an issue with the Amazon page worker
|
||||
*/
|
||||
['error']: { message: string; url?: string };
|
||||
}
|
||||
|
||||
interface HomedepotEvents {
|
||||
/**
|
||||
* The event is fired when detail items collect
|
||||
*/
|
||||
['detail-item-collected']: { item: HomedepotDetailItem };
|
||||
/**
|
||||
* The event is fired when error occurs.
|
||||
*/
|
||||
['error']: { message: string; url?: string };
|
||||
}
|
||||
|
||||
interface AmazonPageWorker {
|
||||
/**
|
||||
* The channel for communication with the Amazon page worker.
|
||||
@ -120,3 +136,23 @@ interface AmazonPageWorker {
|
||||
*/
|
||||
stop(): Promise<void>;
|
||||
}
|
||||
|
||||
interface HomedepotWorker {
|
||||
/**
|
||||
* The channel for communication with the Homedepot page worker.
|
||||
*/
|
||||
readonly channel: Emittery<HomedepotEvents>;
|
||||
|
||||
/**
|
||||
* Browsing goods detail page and collect target information
|
||||
*/
|
||||
runDetailPageTask(
|
||||
OSMIDs: string[],
|
||||
progress?: (remains: string[]) => Promise<void> | void,
|
||||
): Promise<void>;
|
||||
|
||||
/**
|
||||
* Stop the worker.
|
||||
*/
|
||||
stop(): Promise<void>;
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { exec } from './execute-script';
|
||||
import type { Tabs } from 'webextension-polyfill';
|
||||
import type { AmazonReview, AmazonSearchItem } from './page-worker/types';
|
||||
import type { AmazonReview, AmazonSearchItem, HomedepotDetailItem } from './page-worker/types';
|
||||
|
||||
class BaseInjector {
|
||||
readonly _tab: Tabs.Tab;
|
||||
@ -235,40 +235,24 @@ export class AmazonDetailPageInjector extends BaseInjector {
|
||||
|
||||
public async getImageUrls() {
|
||||
return this.run(async () => {
|
||||
let urls = Array.from(document.querySelectorAll<HTMLImageElement>('.imageThumbnail img')).map(
|
||||
(e) => e.src,
|
||||
);
|
||||
//#region process more images https://github.com/primedigitaltech/azon_seeker/issues/4
|
||||
const overlay = document.querySelector<HTMLDivElement>('.overlayRestOfImages');
|
||||
if (overlay) {
|
||||
if (document.querySelector<HTMLDivElement>('#ivThumbs')!.getClientRects().length === 0) {
|
||||
overlay.click();
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
}
|
||||
urls = Array.from(
|
||||
document.querySelectorAll<HTMLDivElement>('#ivThumbs .ivThumbImage[style]'),
|
||||
).map((e) => e.style.background);
|
||||
urls = urls.map((s) => {
|
||||
const [url] = /(?<=url\(").+(?=")/.exec(s)!;
|
||||
return url;
|
||||
});
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
document
|
||||
.querySelector<HTMLButtonElement>(".a-popover button[data-action='a-popover-close']")
|
||||
?.click();
|
||||
}
|
||||
//#endregion
|
||||
//#region post-process image urls
|
||||
urls = urls.map((rawUrl) => {
|
||||
const imgUrl = new URL(rawUrl);
|
||||
const paths = imgUrl.pathname.split('/');
|
||||
const chunks = paths[paths.length - 1].split('.');
|
||||
const [name, ext] = [chunks[0], chunks[chunks.length - 1]];
|
||||
paths[paths.length - 1] = `${name}.${ext}`;
|
||||
imgUrl.pathname = paths.join('/');
|
||||
return imgUrl.toString();
|
||||
});
|
||||
//#endregion
|
||||
const script = document.evaluate(
|
||||
`//script[starts-with(text(), "\nP.when(\'A\').register")]`,
|
||||
document,
|
||||
null,
|
||||
XPathResult.STRING_TYPE,
|
||||
).stringValue;
|
||||
const urls = [
|
||||
...script.matchAll(
|
||||
/(?<="hiRes":")https:\/\/m.media-amazon.com\/images\/I\/[\w\d\.\-+]+(?=")/g,
|
||||
),
|
||||
].map((e) => e[0]);
|
||||
return urls;
|
||||
});
|
||||
}
|
||||
@ -372,8 +356,6 @@ export class AmazonReviewPageInjector extends BaseInjector {
|
||||
);
|
||||
const items: AmazonReview[] = [];
|
||||
for (let i = 0; i < xResult.snapshotLength; i++) {
|
||||
console.log('handling', i);
|
||||
|
||||
const commentNode = xResult.snapshotItem(i) as HTMLDivElement;
|
||||
if (!commentNode) {
|
||||
continue;
|
||||
@ -459,3 +441,64 @@ export class AmazonReviewPageInjector extends BaseInjector {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export class HomedepotDetailPageInjector extends BaseInjector {
|
||||
public waitForPageLoad() {
|
||||
return this.run(async () => {
|
||||
while (true) {
|
||||
document
|
||||
.querySelector<HTMLElement>(
|
||||
`#product-section-overview div[role='button'][aria-expanded='false']`,
|
||||
)
|
||||
?.click();
|
||||
const reviewPlaceholderEl = document.querySelector(
|
||||
`[data-component^="ratings-and-reviews"] [class^="placeholder"]`,
|
||||
);
|
||||
reviewPlaceholderEl?.scrollIntoView({ behavior: 'smooth' });
|
||||
if (document.readyState === 'complete' && !reviewPlaceholderEl) {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
document
|
||||
.querySelector(`#product-section-rr`)
|
||||
?.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public getInfo() {
|
||||
return this.run(async () => {
|
||||
const link = document.location.toString();
|
||||
const brandName = document.querySelector<HTMLDivElement>(
|
||||
`[data-component^="product-details:ProductDetailsBrandCollection"]`,
|
||||
)!.innerText;
|
||||
const title = document.querySelector<HTMLDivElement>(
|
||||
`[data-component^="product-details:ProductDetailsTitle"]`,
|
||||
)!.innerText;
|
||||
const price = document.querySelector<HTMLDivElement>(`#standard-price`)!.innerText;
|
||||
const rate = /\d\.\d/.exec(
|
||||
document.querySelector<HTMLDivElement>(`[data-component^="ratings-and-reviews"] .sui-mr-1`)!
|
||||
.innerText,
|
||||
)![0];
|
||||
const reviewCount = Number(
|
||||
/[\d]+/.exec(
|
||||
document.querySelector<HTMLDivElement>(
|
||||
`[data-component^="ratings-and-reviews"] button > span:last-child`,
|
||||
)!.innerText,
|
||||
)![0],
|
||||
);
|
||||
const mainImageUrl = document.querySelector<HTMLImageElement>(
|
||||
`.mediagallery__mainimage img`,
|
||||
)!.src;
|
||||
return {
|
||||
link,
|
||||
brandName,
|
||||
title,
|
||||
price,
|
||||
rate,
|
||||
reviewCount,
|
||||
mainImageUrl,
|
||||
} as Omit<HomedepotDetailItem, 'OSMID'>;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
<script setup lang="ts">
|
||||
import { useLongTask } from '~/composables/useLongTask';
|
||||
import pageWorker from '~/logic/page-worker';
|
||||
import { amazon as pageWorker } from '~/logic/page-worker';
|
||||
import { AmazonDetailItem } from '~/logic/page-worker/types';
|
||||
import { detailAsinInput, detailItems } from '~/logic/storage';
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
<script lang="ts" setup>
|
||||
import { useLongTask } from '~/composables/useLongTask';
|
||||
import pageWorker from '~/logic/page-worker';
|
||||
import { amazon as pageWorker } from '~/logic/page-worker';
|
||||
import type { AmazonReview } from '~/logic/page-worker/types';
|
||||
import { reviewAsinInput, reviewItems } from '~/logic/storage';
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
<script setup lang="ts">
|
||||
import { keywordsList } from '~/logic/storage';
|
||||
import pageWorker from '~/logic/page-worker';
|
||||
import { amazon as pageWorker } from '~/logic/page-worker';
|
||||
import { NButton } from 'naive-ui';
|
||||
import { searchItems } from '~/logic/storage';
|
||||
import { useLongTask } from '~/composables/useLongTask';
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user