mirror of
https://github.com/primedigitaltech/azon_seeker.git
synced 2026-01-31 11:49:18 +08:00
782 lines
28 KiB
TypeScript
782 lines
28 KiB
TypeScript
import { BaseInjector } from './base';
|
||
|
||
export class AmazonSearchPageInjector extends BaseInjector {
|
||
public waitForPageLoaded() {
|
||
return this.run(async () => {
|
||
await new Promise((resolve) => setTimeout(resolve, 500 + ~~(500 * Math.random())));
|
||
while (true) {
|
||
const targetNode = document.querySelector('.s-pagination-next');
|
||
|
||
await new Promise((resolve) => setTimeout(resolve, ~~(Math.random() * 50) + 500));
|
||
|
||
const h = Math.max(
|
||
document.documentElement.scrollHeight,
|
||
document.body.scrollHeight,
|
||
document.documentElement.offsetHeight,
|
||
document.body.offsetHeight,
|
||
document.documentElement.clientHeight,
|
||
);
|
||
|
||
// await modernScrollTo({ top: h * (0.4 + Math.random() * 0.2), behavior: 'smooth' });
|
||
|
||
if (targetNode || document.readyState === 'complete') {
|
||
// await new Promise((resolve) => setTimeout(resolve, ~~(Math.random() * 50) + 250));
|
||
|
||
// 第一段:滚到中下部
|
||
// window.scrollTo({
|
||
// top: h * (1 - ~~(Math.random() * 50)),
|
||
// behavior: 'smooth'
|
||
// });
|
||
await modernScrollTo({ top: h * (0.9 + Math.random() * 0.08), behavior: 'smooth' });
|
||
await new Promise((resolve) => setTimeout(resolve, ~~(Math.random() * 50) + 250));
|
||
|
||
// targetNode?.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||
await scrollIntoViewAndWait(targetNode as HTMLElement, {
|
||
behavior: 'smooth',
|
||
block: 'center',
|
||
});
|
||
await new Promise((resolve) => setTimeout(resolve, ~~(Math.random() * 50) + 250));
|
||
|
||
break;
|
||
}
|
||
}
|
||
while (true) {
|
||
await new Promise((resolve) => setTimeout(resolve, 500 + ~~(500 * Math.random())));
|
||
// const spins = Array.from(document.querySelectorAll<HTMLElement>('.a-spinner')).filter(
|
||
// (e) => e.getClientRects().length > 0,
|
||
// );
|
||
// if (spins.length === 0) {
|
||
// break;
|
||
// }
|
||
|
||
// const spins = Array.from(document.querySelectorAll('.a-carousel-card-empty'))
|
||
// if (spins.length === 0) {
|
||
// break;
|
||
// }
|
||
|
||
const pagination = document.querySelectorAll('.rhf-sign-in-button');
|
||
if (pagination.length > 0) {
|
||
await new Promise((resolve) => setTimeout(resolve, 500 + ~~(500 * Math.random())));
|
||
break;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 平滑滚动并等待滚动结束
|
||
* @param options 原生 ScrollToOptions 接口 (包含 top, left, behavior)
|
||
*/
|
||
async function modernScrollTo(options: ScrollToOptions): Promise<void> {
|
||
return new Promise((resolve) => {
|
||
// 1. 针对已经处于目标位置的情况做兜底处理
|
||
// 如果目标位置与当前位置一致,部分浏览器可能不会触发 scrollend
|
||
const targetTop = options.top ?? window.scrollY;
|
||
const targetLeft = options.left ?? window.scrollX;
|
||
|
||
if (targetTop === window.scrollY && targetLeft === window.scrollX) {
|
||
resolve();
|
||
return;
|
||
}
|
||
|
||
// 2. 监听原生滚动结束事件
|
||
// 注意:scrollend 是 2023-2024 年起普及的标准,2026 年已是主流
|
||
document.addEventListener(
|
||
'scrollend',
|
||
() => {
|
||
resolve();
|
||
},
|
||
{ once: true },
|
||
);
|
||
|
||
// 3. 执行滚动
|
||
window.scrollTo(options);
|
||
});
|
||
}
|
||
|
||
/**
|
||
* 将元素滚动到视野中心并等待动画结束
|
||
* @param element 目标 DOM 元素
|
||
* @param options 滚动配置
|
||
*/
|
||
async function scrollIntoViewAndWait(
|
||
element: HTMLElement,
|
||
options: ScrollIntoViewOptions = { behavior: 'smooth', block: 'center' },
|
||
): Promise<void> {
|
||
return new Promise((resolve) => {
|
||
// 监听滚动结束事件
|
||
// scrollend 会在滚动圆满完成或被中断时触发
|
||
document.addEventListener(
|
||
'scrollend',
|
||
() => {
|
||
resolve();
|
||
},
|
||
{ once: true },
|
||
);
|
||
|
||
// 触发滚动
|
||
element.scrollIntoView(options);
|
||
|
||
// 兼容性兜底:如果元素已经在视野内,浏览器可能不触发滚动
|
||
// 可以在此处检查位置,若无位移则直接 resolve
|
||
});
|
||
}
|
||
});
|
||
}
|
||
// public waitForPageLoaded() {
|
||
// return this.run(async () => {
|
||
// await new Promise((resolve) => setTimeout(resolve, 500 + ~~(500 * Math.random())));
|
||
|
||
// // 第一个 while 循环:添加 12 秒超时刷新机制
|
||
// const timeoutMs = 12000; // 12 秒
|
||
// let startTime = Date.now();
|
||
// let refreshCount = 0;
|
||
// const maxRefresh = 1; // 最多刷新 1 次
|
||
// console.log('waitForPageLoaded');
|
||
|
||
// while (true) {
|
||
// const targetNode = document.querySelector('.s-pagination-next');
|
||
// window.scrollBy(0, ~~(Math.random() * 500) + 500);
|
||
// await new Promise((resolve) => setTimeout(resolve, ~~(Math.random() * 50) + 500));
|
||
|
||
// if (targetNode || document.readyState === 'complete') {
|
||
// targetNode?.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||
// break;
|
||
// }
|
||
|
||
// // 检查是否超时
|
||
// if (Date.now() - startTime > timeoutMs) {
|
||
// if (refreshCount < maxRefresh) {
|
||
// // 刷新页面
|
||
// console.log('检查是否超时,刷新页面');
|
||
|
||
// browser.tabs.query({ active: true, currentWindow: true }).then(tabs => {
|
||
// browser.tabs.reload(tabs[0].id);
|
||
// });
|
||
|
||
// await new Promise((resolve) => setTimeout(resolve, 2000)); // 等待刷新开始
|
||
|
||
// // 重置计时器和刷新计数
|
||
// refreshCount++;
|
||
// startTime = Date.now();
|
||
// continue; // 重新从 while 循环开始
|
||
// } else {
|
||
// // 已刷新过一次,仍然超时,抛出错误
|
||
// throw new Error('等待页面加载超时(已尝试刷新)');
|
||
// }
|
||
// }
|
||
// }
|
||
|
||
// // 第二个 while 循环:等待 spinner 消失
|
||
// while (true) {
|
||
// await new Promise((resolve) => setTimeout(resolve, 500 + ~~(500 * Math.random())));
|
||
// const spins = Array.from(document.querySelectorAll('.a-spinner')).filter(
|
||
// (e) => e.getClientRects().length > 0
|
||
// );
|
||
// if (spins.length === 0) {
|
||
// break;
|
||
// }
|
||
// }
|
||
// });
|
||
// }
|
||
|
||
public async getPagePattern() {
|
||
return this.run(async () => {
|
||
return Array.from(
|
||
document.querySelectorAll<HTMLElement>(
|
||
'.puisg-row:has(.a-section.a-spacing-small.a-spacing-top-small:not(.a-text-right))',
|
||
),
|
||
).filter((e) => e.getClientRects().length > 0).length > 0
|
||
? 'pattern-1'
|
||
: 'pattern-2';
|
||
});
|
||
}
|
||
|
||
public async getPageData(pattern: 'pattern-1' | 'pattern-2') {
|
||
let data: Pick<AmazonSearchItem, 'link' | 'title' | 'imageSrc' | 'price'>[] | null = null;
|
||
switch (pattern) {
|
||
// 处理商品以列表形式展示的情况
|
||
case 'pattern-1':
|
||
data = await this.run(async () => {
|
||
const items = Array.from(
|
||
document.querySelectorAll<HTMLElement>(
|
||
'.puisg-row:has(.a-section.a-spacing-small.a-spacing-top-small:not(.a-text-right))',
|
||
),
|
||
).filter((e) => e.getClientRects().length > 0);
|
||
const linkObjs = items.reduce<
|
||
Pick<AmazonSearchItem, 'link' | 'title' | 'imageSrc' | 'price'>[]
|
||
>((objs, el) => {
|
||
const link = el.querySelector<HTMLAnchorElement>('a')?.href;
|
||
const title = el
|
||
.querySelector<HTMLHeadingElement>('h2.a-color-base')!
|
||
.getAttribute('aria-label')!;
|
||
const imageSrc = el.querySelector<HTMLImageElement>('img.s-image')!.src!;
|
||
const price =
|
||
el.querySelector<HTMLElement>('.a-price:not(.a-text-price) .a-offscreen')
|
||
?.innerText ||
|
||
(
|
||
document.evaluate(
|
||
`.//div[@data-cy="secondary-offer-recipe"]//span[@class='a-color-base' and contains(., '$') and not(*)]`,
|
||
el,
|
||
null,
|
||
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||
).singleNodeValue as HTMLSpanElement | null
|
||
)?.innerText;
|
||
link && objs.push({ link, title, imageSrc, price });
|
||
return objs;
|
||
}, []);
|
||
return linkObjs;
|
||
});
|
||
break;
|
||
// 处理商品以二维图片格展示的情况
|
||
case 'pattern-2':
|
||
data = await this.run(async () => {
|
||
const items = Array.from(
|
||
document.querySelectorAll<HTMLElement>(
|
||
'.puis-card-container',
|
||
) as unknown as HTMLElement[],
|
||
).filter((e) => e.getClientRects().length > 0);
|
||
const linkObjs = items.reduce<
|
||
Pick<AmazonSearchItem, 'link' | 'title' | 'imageSrc' | 'price'>[]
|
||
>((objs, el) => {
|
||
const link = el.querySelector<HTMLAnchorElement>('a.a-link-normal')?.href;
|
||
const title = el.querySelector<HTMLHeadingElement>('h2.a-color-base')!.innerText;
|
||
const imageSrc = el.querySelector<HTMLImageElement>('img.s-image')!.src!;
|
||
const price =
|
||
el.querySelector<HTMLElement>('.a-price:not(.a-text-price) .a-offscreen')
|
||
?.innerText ||
|
||
(
|
||
document.evaluate(
|
||
`.//div[@data-cy="secondary-offer-recipe"]//span[@class='a-color-base' and contains(., '$') and not(*)]`,
|
||
el,
|
||
null,
|
||
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||
).singleNodeValue as HTMLSpanElement | null
|
||
)?.innerText;
|
||
link && objs.push({ link, title, imageSrc, price });
|
||
return objs;
|
||
}, []);
|
||
return linkObjs;
|
||
});
|
||
break;
|
||
default:
|
||
break;
|
||
}
|
||
data = data && data.filter((r) => new URL(r.link).pathname.includes('/dp/')); // No advertisement only
|
||
return data;
|
||
}
|
||
|
||
public async getCurrentPage() {
|
||
return this.run(async () => {
|
||
const node = document.querySelector<HTMLElement>('.s-pagination-item.s-pagination-selected');
|
||
return node ? Number(node.innerText) : 1;
|
||
});
|
||
}
|
||
|
||
/**
|
||
* 检测当前亚马逊搜索页面是否有下一页,并自动点击翻页按钮。
|
||
*
|
||
* 该方法在页面上下文中执行,查找亚马逊标准分页按钮('.s-pagination-next'),
|
||
* 检查按钮是否未被禁用('s-pagination-disabled' 类),然后模拟用户点击。
|
||
* 点击前会随机等待 500-1000 毫秒以避免被识别为机器人。
|
||
*
|
||
* @returns `true` 表示有下一页且已点击翻页按钮,页面正在加载下一页内容;
|
||
* `false` 表示没有下一页(按钮不存在或被禁用)。
|
||
*/
|
||
public async determineHasNextPage() {
|
||
return this.run(async () => {
|
||
const nextButton = document.querySelector<HTMLLinkElement>('.s-pagination-next');
|
||
if (nextButton) {
|
||
if (!nextButton.classList.contains('s-pagination-disabled')) {
|
||
await new Promise((resolve) => setTimeout(resolve, 500 + ~~(500 * Math.random())));
|
||
nextButton.click();
|
||
return true;
|
||
} else {
|
||
return false;
|
||
}
|
||
} else {
|
||
return false;
|
||
}
|
||
});
|
||
}
|
||
// /**
|
||
// * 检测并执行亚马逊搜索页面翻页,等待页面刷新完成。
|
||
// * @returns 能否翻页(true=翻页成功,false=没有下一页或翻页失败)
|
||
// */
|
||
// public async determineHasNextPage(): Promise<boolean> {
|
||
// return this.run(async () => {
|
||
// const nextButton = document.querySelector<HTMLLinkElement>('.s-pagination-next');
|
||
|
||
// if (nextButton && !nextButton.classList.contains('s-pagination-disabled')) {
|
||
// // 记录当前页码
|
||
// const initialPage = await this.getCurrentPage();
|
||
|
||
// // 随机等待后点击
|
||
// await new Promise((resolve) => setTimeout(resolve, 500 + ~~(500 * Math.random())));
|
||
// nextButton.click();
|
||
|
||
// // 等待页面刷新完成
|
||
// await this.waitForPageLoaded();
|
||
|
||
// // 验证翻页是否成功
|
||
// const newPage = await this.getCurrentPage();
|
||
|
||
// // 只有页码真正变化才算翻页成功
|
||
// return newPage !== null && newPage !== initialPage;
|
||
// }
|
||
|
||
// return false;
|
||
// });
|
||
// }
|
||
|
||
/**
|
||
* 检测并执行亚马逊搜索页面翻页,通过 URL 变化确认刷新完成。
|
||
* @returns 能否翻页(true=已翻页,false=已是最后一页)
|
||
*/
|
||
// public async determineHasNextPage(): Promise<boolean> {
|
||
// return this.run(async () => {
|
||
// const nextButton = document.querySelector<HTMLLinkElement>('.s-pagination-next');
|
||
|
||
// if (nextButton && !nextButton.classList.contains('s-pagination-disabled')) {
|
||
// // 1. 记录当前 URL
|
||
// const initialUrl = window.location.href;
|
||
|
||
// // 2. 随机等待后点击
|
||
// await new Promise((resolve) => setTimeout(resolve, 500 + ~~(500 * Math.random())));
|
||
// nextButton.click();
|
||
|
||
// // 3. 等待 URL 变化(表示页面已开始导航)
|
||
// await new Promise<void>((resolve) => {
|
||
// const checkUrl = () => {
|
||
// if (window.location.href !== initialUrl) {
|
||
// resolve();
|
||
// } else {
|
||
// setTimeout(checkUrl, 100);
|
||
// }
|
||
// };
|
||
// checkUrl();
|
||
// });
|
||
|
||
// // 4. 等待页面稳定(document.readyState === 'complete')
|
||
// await new Promise<void>((resolve) => {
|
||
// const checkReadyState = () => {
|
||
// if (document.readyState === 'complete') {
|
||
// resolve();
|
||
// } else {
|
||
// setTimeout(checkReadyState, 100);
|
||
// }
|
||
// };
|
||
// checkReadyState();
|
||
// });
|
||
|
||
// // 5. 额外等待确保内容加载
|
||
// await new Promise((resolve) => setTimeout(resolve, 500));
|
||
|
||
// return true;
|
||
// }
|
||
|
||
// return false;
|
||
// });
|
||
// }
|
||
}
|
||
|
||
export class AmazonDetailPageInjector extends BaseInjector {
|
||
/**检测是否是错误页面 */
|
||
public async detectErrorPage(): Promise<boolean> {
|
||
return this.run(async () => {
|
||
const errorImage = document.querySelector('img[src*="error/en_US/title"]');
|
||
return !!errorImage;
|
||
});
|
||
}
|
||
|
||
/**等待页面加载完成 */
|
||
public async waitForPageLoaded() {
|
||
return this.run(async () => {
|
||
while (true) {
|
||
window.scrollBy(0, ~~(Math.random() * 500) + 500);
|
||
await new Promise((resolve) => setTimeout(resolve, ~~(Math.random() * 100) + 200));
|
||
const targetNode = document.querySelector(
|
||
'#prodDetails:has(td), #detailBulletsWrapper_feature_div:has(li), .av-page-desktop, #productDescription_feature_div',
|
||
);
|
||
const exceptionalNodeSelectors = ['.music-detail-header', '.avu-retail-page'];
|
||
for (const selector of exceptionalNodeSelectors) {
|
||
if (document.querySelector(selector)) {
|
||
return false;
|
||
}
|
||
}
|
||
if (targetNode && document.readyState !== 'loading') {
|
||
targetNode.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||
break;
|
||
}
|
||
}
|
||
return true;
|
||
});
|
||
}
|
||
|
||
/**获取基本信息 */
|
||
public async getBaseInfo() {
|
||
return this.run(async () => {
|
||
const title = document.querySelector<HTMLElement>('#title')!.innerText;
|
||
const price = document.querySelector<HTMLElement>(
|
||
'.aok-offscreen, .a-price:not(.a-text-price) .a-offscreen',
|
||
)?.innerText;
|
||
const boughtInfo = document.querySelector<HTMLElement>(
|
||
`#social-proofing-faceout-title-tk_bought`,
|
||
)?.innerText;
|
||
const availableDate = (
|
||
document.evaluate(
|
||
`//span[contains(text(), 'Date First Available')]/following-sibling::*[1]`,
|
||
document,
|
||
null,
|
||
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||
).singleNodeValue as HTMLElement | undefined
|
||
)?.innerText;
|
||
const categories = document
|
||
.querySelector<HTMLElement>('#wayfinding-breadcrumbs_feature_div')
|
||
?.innerText.replaceAll('\n', '');
|
||
const shipFrom = document.querySelector<HTMLElement>(
|
||
'#fulfillerInfoFeature_feature_div > *:last-of-type',
|
||
)?.innerText;
|
||
const soldBy = document.querySelector<HTMLElement>(`#sellerProfileTriggerId`)?.innerText;
|
||
return { title, price, boughtInfo, availableDate, categories, shipFrom, soldBy };
|
||
});
|
||
}
|
||
|
||
/**获取评价信息 */
|
||
public async getRatingInfo() {
|
||
return this.run(async () => {
|
||
const review = document.querySelector('#averageCustomerReviews');
|
||
const rating = Number(
|
||
review?.querySelector('#acrPopover')?.getAttribute('title')?.split(' ')[0],
|
||
);
|
||
const ratingCount = Number(
|
||
review
|
||
?.querySelector('#acrCustomerReviewText')
|
||
?.getAttribute('aria-label')
|
||
?.split(' ')[0]
|
||
?.replace(',', ''),
|
||
);
|
||
return {
|
||
rating: isNaN(rating) || rating == 0 ? 0 : rating,
|
||
ratingCount: isNaN(ratingCount) || ratingCount == 0 ? 0 : ratingCount,
|
||
};
|
||
});
|
||
}
|
||
|
||
/**获取排名信息 */
|
||
public async getRankText() {
|
||
return this.run(async () => {
|
||
const xpathExps = [
|
||
`//div[@id='detailBulletsWrapper_feature_div']//ul[.//li[contains(., 'Best Sellers Rank')]]//span[@class='a-list-item' and contains(., 'Best Sellers Rank')]`,
|
||
`//div[@id='prodDetails']//table/tbody/tr[th[1][contains(text(), 'Best Sellers Rank')]]/td`,
|
||
`//div[@id='productDetails_db_sections']//table/tbody/tr[th[1][contains(text(), 'Best Sellers Rank')]]/td`,
|
||
];
|
||
for (const xpathExp of xpathExps) {
|
||
const targetNode = document.evaluate(
|
||
xpathExp,
|
||
document,
|
||
null,
|
||
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||
null,
|
||
).singleNodeValue as HTMLElement | null;
|
||
if (targetNode) {
|
||
targetNode.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||
return targetNode.innerText;
|
||
}
|
||
}
|
||
return null;
|
||
});
|
||
}
|
||
|
||
/**获取图像链接 */
|
||
public async getImageUrls() {
|
||
return this.run(async () => {
|
||
const overlay = document.querySelector<HTMLElement>('.overlayRestOfImages');
|
||
if (overlay) {
|
||
if (document.querySelector<HTMLElement>('#ivThumbs')!.getClientRects().length === 0) {
|
||
overlay.click();
|
||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||
}
|
||
}
|
||
let script = document.evaluate(
|
||
`//script[starts-with(text(), "\nP.when(\'A\').register") or contains(text(), "\nP.when('A').register")]`,
|
||
document,
|
||
null,
|
||
XPathResult.STRING_TYPE,
|
||
).stringValue;
|
||
const extractUrls = (pattern: RegExp) =>
|
||
Array.from(script.matchAll(pattern)).map((e) => e[0]);
|
||
let urls = extractUrls(
|
||
/(?<="hiRes":")https:\/\/m.media-amazon.com\/images\/I\/[\w\d\.\-+]+(?=")/g,
|
||
);
|
||
if (urls.length === 0) {
|
||
urls = extractUrls(
|
||
/(?<="large":")https:\/\/m.media-amazon.com\/images\/I\/[\w\d\.\-+]+(?=")/g,
|
||
);
|
||
}
|
||
document.querySelector<HTMLElement>('header > [data-action="a-popover-close"]')?.click();
|
||
return urls;
|
||
});
|
||
}
|
||
|
||
/**获取精选评论 */
|
||
public async getTopReviews() {
|
||
return this.run(async () => {
|
||
const targetNode = document.querySelector<HTMLElement>('.cr-widget-FocalReviews');
|
||
if (!targetNode) {
|
||
return [];
|
||
}
|
||
targetNode.scrollIntoView({ behavior: 'smooth', block: 'end' });
|
||
while (targetNode.getClientRects().length === 0) {
|
||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||
}
|
||
const xResult = document.evaluate(
|
||
`.//div[contains(@id, 'review-card')]`,
|
||
targetNode,
|
||
null,
|
||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
|
||
);
|
||
const items: AmazonReview[] = [];
|
||
for (let i = 0; i < xResult.snapshotLength; i++) {
|
||
const commentNode = xResult.snapshotItem(i) as HTMLElement | null;
|
||
if (!commentNode) {
|
||
continue;
|
||
}
|
||
const id = commentNode.id.split('-')[0];
|
||
const username = commentNode.querySelector<HTMLElement>('.a-profile-name')!.innerText;
|
||
const title = commentNode.querySelector<HTMLElement>(
|
||
'[data-hook="review-title"] > span:not(.a-letter-space)',
|
||
)!.innerText;
|
||
const rating = commentNode.querySelector<HTMLElement>(
|
||
'[data-hook*="review-star-rating"]',
|
||
)!.innerText;
|
||
const dateInfo = commentNode.querySelector<HTMLElement>(
|
||
'[data-hook="review-date"]',
|
||
)!.innerText;
|
||
const content = commentNode.querySelector<HTMLElement>(
|
||
'[data-hook="review-body"]',
|
||
)!.innerText;
|
||
const imageSrc = Array.from(
|
||
commentNode.querySelectorAll<HTMLImageElement>(
|
||
'.review-image-tile-section img[src] img[src]',
|
||
),
|
||
).map((e) => {
|
||
const url = new URL(e.getAttribute('src')!);
|
||
const paths = url.pathname.split('/');
|
||
const chunks = paths[paths.length - 1].split('.');
|
||
paths[paths.length - 1] = `${chunks[0]}.${chunks[chunks.length - 1]}`;
|
||
url.pathname = paths.join('/');
|
||
return url.toString();
|
||
});
|
||
items.push({ id, username, title, rating, dateInfo, content, imageSrc });
|
||
}
|
||
return items;
|
||
});
|
||
}
|
||
|
||
/**滑动扫描A+界面 */
|
||
public async scanAPlus() {
|
||
return this.run(async () => {
|
||
const aplusEl = document.querySelector<HTMLElement>('#aplus_feature_div');
|
||
if (
|
||
!aplusEl ||
|
||
aplusEl.getClientRects().length === 0 ||
|
||
aplusEl.getClientRects()[0].height === 0
|
||
) {
|
||
return false;
|
||
}
|
||
while (aplusEl.getClientRects().length === 0) {
|
||
await new Promise((resolve) => setTimeout(resolve, 500));
|
||
}
|
||
aplusEl.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
||
while (true) {
|
||
const rect = aplusEl.getClientRects()[0];
|
||
if (rect.top + rect.height < 100) {
|
||
break;
|
||
}
|
||
window.scrollBy({ top: 100, behavior: 'smooth' });
|
||
await new Promise((resolve) => setTimeout(resolve, 100 + ~~(100 * Math.random())));
|
||
}
|
||
return true;
|
||
});
|
||
}
|
||
|
||
/**获取A+截图 */
|
||
public async captureAPlus() {
|
||
return this.screenshot({ type: 'CSS', selector: '#aplus_feature_div' });
|
||
}
|
||
|
||
/**获取额外商品信息 */
|
||
public async getExtraInfo() {
|
||
return this.run(async () => {
|
||
const $x = <T extends HTMLElement>(xpath: string): T[] | undefined => {
|
||
const result = document.evaluate(
|
||
xpath,
|
||
document,
|
||
null,
|
||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
|
||
null,
|
||
);
|
||
const nodes: T[] = [];
|
||
for (let i = 0; i < result.snapshotLength; i++) {
|
||
nodes.push(result.snapshotItem(i)! as T);
|
||
}
|
||
return nodes.length > 0 ? nodes : undefined;
|
||
};
|
||
const abouts = $x(
|
||
`//*[normalize-space(text())='About this item']/following-sibling::ul[1]/li`,
|
||
)?.map((el) => el.innerText);
|
||
const brand = $x(`//*[./span[normalize-space(text())='Brand']]/following-sibling::*[1]`)?.[0]
|
||
.innerText;
|
||
const flavor = $x(
|
||
`//*[./span[normalize-space(text())='Flavor']]/following-sibling::*[1]`,
|
||
)?.[0].innerText;
|
||
const unitCount = $x(
|
||
`//*[./span[normalize-space(text())='Unit Count']]/following-sibling::*[1]`,
|
||
)?.[0].innerText;
|
||
const itemForm = $x(
|
||
`//*[./span[normalize-space(text())='Item Form']]/following-sibling::*[1]`,
|
||
)?.[0].innerText;
|
||
const productDimensions = $x(
|
||
`//span[contains(text(), 'Dimensions')]/following-sibling::*[1]`,
|
||
)?.[0].innerText;
|
||
return {
|
||
abouts,
|
||
brand,
|
||
flavor,
|
||
unitCount,
|
||
itemForm,
|
||
productDimensions,
|
||
};
|
||
});
|
||
}
|
||
}
|
||
|
||
export class AmazonReviewPageInjector extends BaseInjector {
|
||
public async waitForPageLoad() {
|
||
return this.run(async () => {
|
||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||
while (true) {
|
||
const targetNode = document.querySelector(
|
||
'.reviews-content, #cm_cr-review_list ul[role="list"]:not(.histogram)',
|
||
);
|
||
targetNode?.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||
if (
|
||
targetNode &&
|
||
targetNode.getClientRects().length > 0 &&
|
||
document.readyState !== 'loading'
|
||
) {
|
||
break;
|
||
}
|
||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||
}
|
||
while (true) {
|
||
const loadingNode = document.querySelector('.reviews-loading');
|
||
if (loadingNode && loadingNode.getClientRects().length === 0) {
|
||
break;
|
||
}
|
||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||
}
|
||
});
|
||
}
|
||
|
||
public async getSinglePageReviews() {
|
||
return this.run(async () => {
|
||
const targetNode = document.querySelector('#cm_cr-review_list');
|
||
if (!targetNode) {
|
||
return [];
|
||
}
|
||
// targetNode.scrollIntoView({ behavior: "smooth", block: "end" })
|
||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||
const xResult = document.evaluate(
|
||
`.//div[contains(@id, 'review-card')]`,
|
||
targetNode,
|
||
null,
|
||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
|
||
);
|
||
const items: AmazonReview[] = [];
|
||
for (let i = 0; i < xResult.snapshotLength; i++) {
|
||
const commentNode = xResult.snapshotItem(i) as HTMLElement;
|
||
if (!commentNode) {
|
||
continue;
|
||
}
|
||
const id = commentNode.id.split('-')[0];
|
||
const username = commentNode.querySelector<HTMLElement>('.a-profile-name')!.innerText;
|
||
const title = commentNode.querySelector<HTMLElement>(
|
||
'[data-hook="review-title"] > span:not(.a-letter-space)',
|
||
)!.innerText;
|
||
const rating = commentNode.querySelector<HTMLElement>(
|
||
'[data-hook*="review-star-rating"]',
|
||
)!.innerText;
|
||
const dateInfo = commentNode.querySelector<HTMLElement>(
|
||
'[data-hook="review-date"]',
|
||
)!.innerText;
|
||
const content = commentNode.querySelector<HTMLElement>(
|
||
'[data-hook="review-body"]',
|
||
)!.innerText;
|
||
const imageSrc = Array.from(
|
||
commentNode.querySelectorAll<HTMLImageElement>('.review-image-tile-section img[src]'),
|
||
).map((e) => {
|
||
const url = new URL(e.getAttribute('src')!);
|
||
const paths = url.pathname.split('/');
|
||
const chunks = paths[paths.length - 1].split('.');
|
||
paths[paths.length - 1] = `${chunks[0]}.${chunks[chunks.length - 1]}`;
|
||
url.pathname = paths.join('/');
|
||
return url.toString();
|
||
});
|
||
items.push({ id, username, title, rating, dateInfo, content, imageSrc });
|
||
}
|
||
return items;
|
||
});
|
||
}
|
||
|
||
public jumpToNextPageIfExist() {
|
||
return this.run(async () => {
|
||
const latestReview = document.evaluate(
|
||
`//*[@id='cm_cr-review_list']//li[@data-hook='review'][last()]`,
|
||
document.body,
|
||
null,
|
||
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||
).singleNodeValue as HTMLElement | null;
|
||
latestReview?.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||
const nextPageNode = document.querySelector<HTMLElement>(
|
||
'[data-hook="pagination-bar"] .a-pagination > *:nth-of-type(2)',
|
||
);
|
||
nextPageNode?.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||
const ret = nextPageNode && !nextPageNode.classList.contains('a-disabled');
|
||
ret && nextPageNode?.querySelector('a')?.click();
|
||
return ret;
|
||
});
|
||
}
|
||
|
||
public async showStarsDropDownMenu() {
|
||
return this.run(async () => {
|
||
while (true) {
|
||
const dropdown = document.querySelector<HTMLElement>('#star-count-dropdown')!;
|
||
dropdown.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||
dropdown.click();
|
||
if (dropdown.getAttribute('aria-expanded') === 'true') {
|
||
break;
|
||
}
|
||
await new Promise((resolve) => setTimeout(resolve, 100));
|
||
}
|
||
});
|
||
}
|
||
|
||
public async selectStar(star: number) {
|
||
return this.run(
|
||
async ({ star }) => {
|
||
const starNode = document.evaluate(
|
||
`//ul[@role='listbox']/li/a[text()="${star} star only"]`,
|
||
document.body,
|
||
null,
|
||
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||
).singleNodeValue as HTMLElement;
|
||
starNode.click();
|
||
await new Promise((resolve) => setTimeout(resolve, 100));
|
||
},
|
||
{ star },
|
||
);
|
||
}
|
||
}
|