mirror of
https://github.com/primedigitaltech/azon_seeker.git
synced 2026-01-19 13:13:22 +08:00
feature: split 2 patterns of search page & declare wanderDetailPage function
This commit is contained in:
parent
94fa61dfa1
commit
1dc76ffc20
3
.gitignore
vendored
3
.gitignore
vendored
@ -15,3 +15,6 @@ node_modules
|
||||
src/auto-imports.d.ts
|
||||
src/components.d.ts
|
||||
.eslintcache
|
||||
|
||||
**/test_data.ts
|
||||
**/TestPanel.vue
|
||||
@ -1,3 +1,4 @@
|
||||
{
|
||||
"singleQuote": true
|
||||
"singleQuote": true,
|
||||
"printWidth": 100
|
||||
}
|
||||
|
||||
8
.vscode/extensions.json
vendored
8
.vscode/extensions.json
vendored
@ -1,9 +1,3 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"vue.volar"
|
||||
// "antfu.iconify",
|
||||
// "antfu.unocss",
|
||||
// "dbaeumer.vscode-eslint",
|
||||
// "csstools.postcss"
|
||||
]
|
||||
"recommendations": ["vue.volar"]
|
||||
}
|
||||
|
||||
16
.vscode/launch.json
vendored
Normal file
16
.vscode/launch.json
vendored
Normal file
@ -0,0 +1,16 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "msedge",
|
||||
"request": "attach",
|
||||
"name": "Attach to side panel",
|
||||
"webRoot": "${workspaceFolder}/src/",
|
||||
"port": 9222,
|
||||
"urlFilter": "chrome-extension://*"
|
||||
}
|
||||
]
|
||||
}
|
||||
5
.vscode/settings.json
vendored
5
.vscode/settings.json
vendored
@ -1,12 +1,11 @@
|
||||
{
|
||||
"cSpell.words": ["Vitesse"],
|
||||
"typescript.tsdk": "node_modules/typescript/lib",
|
||||
"vite.autoStart": false,
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.fixAll.eslint": "explicit"
|
||||
},
|
||||
"files.associations": {
|
||||
"*.css": "postcss"
|
||||
},
|
||||
"prettier.tabWidth": 2
|
||||
"prettier.tabWidth": 2,
|
||||
"prettier.printWidth": 100
|
||||
}
|
||||
|
||||
17
src/logic/execute-script.ts
Normal file
17
src/logic/execute-script.ts
Normal file
@ -0,0 +1,17 @@
|
||||
/**
|
||||
*
|
||||
* @param tabId
|
||||
* @param func
|
||||
* @returns
|
||||
*/
|
||||
export async function executeScript<T>(tabId: number, func: () => Promise<T>): Promise<T | null> {
|
||||
const injectResults = await browser.scripting.executeScript({
|
||||
target: { tabId },
|
||||
func,
|
||||
});
|
||||
const ret = injectResults.pop();
|
||||
if (ret?.error) {
|
||||
console.error('注入脚本时发生错误', ret.error);
|
||||
}
|
||||
return ret?.result as T | null;
|
||||
}
|
||||
@ -1,5 +1,7 @@
|
||||
import Emittery from 'emittery';
|
||||
import { AmazonPageWorker, AmazonPageWorkerEvents } from './types';
|
||||
import type { AmazonGoodsLinkItem, AmazonPageWorker, AmazonPageWorkerEvents } from './types';
|
||||
import Browser from 'webextension-polyfill';
|
||||
import { executeScript } from '../execute-script';
|
||||
|
||||
class AmazonPageWorkerImpl implements AmazonPageWorker {
|
||||
readonly channel = new Emittery<AmazonPageWorkerEvents>();
|
||||
@ -12,70 +14,123 @@ class AmazonPageWorkerImpl implements AmazonPageWorker {
|
||||
.query({ active: true, currentWindow: true })
|
||||
.then((tabs) => tabs[0]);
|
||||
const currentUrl = new URL(tab.url!);
|
||||
if (
|
||||
currentUrl.hostname !== url.hostname ||
|
||||
currentUrl.searchParams.get('k') !== keywords
|
||||
) {
|
||||
if (currentUrl.hostname !== url.hostname || currentUrl.searchParams.get('k') !== keywords) {
|
||||
await browser.tabs.update(tab.id, { url: url.toString() });
|
||||
await new Promise<void>((resolve) => setTimeout(resolve, 1000));
|
||||
}
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
private async wanderSearchSinglePage() {
|
||||
const tab = await browser.tabs
|
||||
.query({ active: true, currentWindow: true })
|
||||
.then((tabs) => tabs[0]);
|
||||
const results = await browser.scripting.executeScript({
|
||||
target: { tabId: tab.id! },
|
||||
func: async () => {
|
||||
try {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, 500 + ~~(500 * Math.random())),
|
||||
);
|
||||
while (!document.querySelector('.s-pagination-strip')) {
|
||||
window.scrollBy(0, ~~(Math.random() * 500) + 500);
|
||||
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||
}
|
||||
const items = document.querySelectorAll<HTMLDivElement>(
|
||||
'.a-section.a-spacing-small.puis-padding-left-small',
|
||||
);
|
||||
const links: string[] = [];
|
||||
items.forEach((el) => {
|
||||
const link =
|
||||
el.querySelector<HTMLAnchorElement>('a.a-link-normal')?.href;
|
||||
link && links.push(link);
|
||||
});
|
||||
const nextButton =
|
||||
document.querySelector<HTMLLinkElement>('.s-pagination-next');
|
||||
if (
|
||||
nextButton &&
|
||||
!nextButton.classList.contains('s-pagination-disabled')
|
||||
) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, 500 + ~~(500 * Math.random())),
|
||||
);
|
||||
nextButton.click();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
return links;
|
||||
} catch (e) {
|
||||
return null;
|
||||
}
|
||||
},
|
||||
private async wanderSearchSinglePage(tab: Browser.Tabs.Tab) {
|
||||
const tabId = tab.id!;
|
||||
// #region Wait for the Next button to appear, indicating that the product items have finished loading
|
||||
await executeScript(tabId, async () => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 500 + ~~(500 * Math.random())));
|
||||
while (!document.querySelector('.s-pagination-strip')) {
|
||||
window.scrollBy(0, ~~(Math.random() * 500) + 500);
|
||||
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||
}
|
||||
});
|
||||
// #endregion
|
||||
// #region Determine the type of product search page https://github.com/primedigitaltech/azon_seeker/issues/1
|
||||
const pagePattern = await executeScript(tabId, async () => {
|
||||
return [
|
||||
...(document.querySelectorAll<HTMLDivElement>(
|
||||
'.a-section.a-spacing-small.puis-padding-left-small',
|
||||
) as unknown as HTMLDivElement[]),
|
||||
].filter((e) => e.getClientRects().length > 0).length === 0
|
||||
? 'pattern-1'
|
||||
: 'pattern-2';
|
||||
});
|
||||
if (typeof pagePattern !== 'string') {
|
||||
this.channel.emit('error', { message: '无法判断商品搜索页类型', url: tab.url });
|
||||
throw new Error('无法判断商品搜索页类型');
|
||||
}
|
||||
// #endregion
|
||||
// #region Retrieve key nodes and their information from the critical product search page
|
||||
let data: AmazonGoodsLinkItem[] | null = null;
|
||||
switch (pagePattern) {
|
||||
// 处理商品以列表形式展示的情况
|
||||
case 'pattern-1':
|
||||
data = await executeScript(tabId, async () => {
|
||||
const items = [
|
||||
...(document.querySelectorAll<HTMLDivElement>(
|
||||
'.a-section.a-spacing-small.a-spacing-top-small:not(.a-text-right)',
|
||||
) as unknown as HTMLDivElement[]),
|
||||
].filter((e) => e.getClientRects().length > 0);
|
||||
const linkObjs = items.reduce<AmazonGoodsLinkItem[]>((objs, el) => {
|
||||
const link = el.querySelector<HTMLAnchorElement>('a')?.href;
|
||||
const title = el
|
||||
.querySelector<HTMLHeadingElement>('h2.a-color-base')
|
||||
?.getAttribute('aria-label');
|
||||
link && objs.push({ link, title: title || '' });
|
||||
return objs;
|
||||
}, []);
|
||||
return linkObjs;
|
||||
});
|
||||
break;
|
||||
// 处理商品以二维图片格展示的情况
|
||||
case 'pattern-2':
|
||||
data = await executeScript(tabId, async () => {
|
||||
const items = [
|
||||
...(document.querySelectorAll<HTMLDivElement>(
|
||||
'.a-section.a-spacing-small.puis-padding-left-small',
|
||||
) as unknown as HTMLDivElement[]),
|
||||
].filter((e) => e.getClientRects().length > 0);
|
||||
const linkObjs = items.reduce<AmazonGoodsLinkItem[]>((objs, el) => {
|
||||
const link = el.querySelector<HTMLAnchorElement>('a.a-link-normal')?.href;
|
||||
const title = el.querySelector<HTMLHeadingElement>('h2.a-color-base')?.innerText;
|
||||
link && objs.push({ link, title: title || '' });
|
||||
return objs;
|
||||
}, []);
|
||||
return linkObjs;
|
||||
});
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
// #endregion
|
||||
// #region Determine if it is the last page, otherwise navigate to the next page
|
||||
const hasNextPage = await executeScript(tabId, async () => {
|
||||
const nextButton = document.querySelector<HTMLLinkElement>('.s-pagination-next');
|
||||
if (nextButton) {
|
||||
if (!nextButton.classList.contains('s-pagination-disabled')) {
|
||||
await new Promise((resolve) => setTimeout(resolve, 500 + ~~(500 * Math.random())));
|
||||
nextButton.click();
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
throw new Error('Error: next page button not found');
|
||||
}
|
||||
});
|
||||
// #endregion
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
return results.pop()?.result as string[] | null;
|
||||
if (data === null || typeof hasNextPage !== 'boolean') {
|
||||
this.channel.emit('error', { message: '爬取单页信息失败', url: tab.url });
|
||||
throw new Error('爬取单页信息失败');
|
||||
}
|
||||
return { data, hasNextPage };
|
||||
}
|
||||
|
||||
public async wanderSearchList(): Promise<void> {
|
||||
let links = await this.wanderSearchSinglePage();
|
||||
while (links) {
|
||||
this.channel.emit('item-links-collected', { links });
|
||||
links = await this.wanderSearchSinglePage();
|
||||
const tab = await browser.tabs
|
||||
.query({ active: true, currentWindow: true })
|
||||
.then((tabs) => tabs[0]);
|
||||
let stopSignal = false;
|
||||
let result = { hasNextPage: true, data: [] as AmazonGoodsLinkItem[] };
|
||||
while (result.hasNextPage && !stopSignal) {
|
||||
result = await this.wanderSearchSinglePage(tab);
|
||||
this.channel.emit('item-links-collected', { objs: result.data });
|
||||
this.channel.on('error', () => {
|
||||
stopSignal = true;
|
||||
});
|
||||
}
|
||||
return new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
}
|
||||
|
||||
public async wanderDetailPage(): Promise<void> {}
|
||||
}
|
||||
|
||||
class PageWorkerFactory {
|
||||
|
||||
23
src/logic/page-worker/types.d.ts
vendored
23
src/logic/page-worker/types.d.ts
vendored
@ -1,13 +1,18 @@
|
||||
import type Emittery from 'emittery';
|
||||
|
||||
type AmazonGoodsLinkItem = { link: string; title: string };
|
||||
|
||||
interface AmazonPageWorkerEvents {
|
||||
/**
|
||||
* Emitted when a new item is found on the Amazon page.
|
||||
* @param link - The item link that was found.
|
||||
* This event is used to collect links to items on the Amazon search page.
|
||||
*/
|
||||
['item-links-collected']: { links: string[] };
|
||||
}
|
||||
['item-links-collected']: { objs: AmazonGoodsLinkItem[] };
|
||||
|
||||
/**
|
||||
* Error event that occurs when there is an issue with the Amazon page worker.
|
||||
*/
|
||||
['error']: { message: string; url?: string };
|
||||
}
|
||||
|
||||
interface AmazonPageWorker {
|
||||
/**
|
||||
@ -17,15 +22,19 @@ interface AmazonPageWorker {
|
||||
readonly channel: Emittery<AmazonPageWorkerEvents>;
|
||||
|
||||
/**
|
||||
* Search for a list of items on Amazon
|
||||
* Search for a list of goods on Amazon
|
||||
* @param keywords - The keywords to search for on Amazon.
|
||||
* @returns A promise that resolves to a string representing the search URL.
|
||||
*/
|
||||
doSearch(keywords: string): Promise<string>;
|
||||
|
||||
/**
|
||||
* Browsing item search page and collect links to those items.
|
||||
* @param entryUrl - The URL of the Amazon search page to start from.
|
||||
* Browsing goods search page and collect links to those goods.
|
||||
*/
|
||||
wanderSearchList(): Promise<void>;
|
||||
|
||||
/**
|
||||
* Browsing goods detail page and collect target information.
|
||||
*/
|
||||
wanderDetailPage(): Promise<void>;
|
||||
}
|
||||
|
||||
11
src/sidepanel/App.vue
Normal file
11
src/sidepanel/App.vue
Normal file
@ -0,0 +1,11 @@
|
||||
<script lang="ts" setup>
|
||||
import Sidepanel from './Sidepanel.vue';
|
||||
</script>
|
||||
|
||||
<template>
|
||||
<n-dialog-provider>
|
||||
<n-message-provider>
|
||||
<sidepanel />
|
||||
</n-message-provider>
|
||||
</n-dialog-provider>
|
||||
</template>
|
||||
@ -1,28 +1,85 @@
|
||||
<script setup lang="ts">
|
||||
import { keywords } from '~/logic/storage';
|
||||
import pageWorker from '~/logic/page-worker';
|
||||
import type { AmazonGoodsLinkItem } from '~/logic/page-worker/types';
|
||||
import { NButton, type DataTableColumns } from 'naive-ui';
|
||||
|
||||
const links = ref<string[]>([]);
|
||||
const message = useMessage();
|
||||
const worker = pageWorker.createAmazonPageWorker();
|
||||
|
||||
type TableData = AmazonGoodsLinkItem & { rank: number };
|
||||
|
||||
const items = ref<AmazonGoodsLinkItem[]>([]);
|
||||
const page = reactive({ current: 1, size: 5 });
|
||||
const columns: DataTableColumns<TableData> = [
|
||||
{
|
||||
title: '排位',
|
||||
key: 'rank',
|
||||
},
|
||||
{
|
||||
title: '标题',
|
||||
key: 'title',
|
||||
},
|
||||
{
|
||||
title: '链接',
|
||||
key: 'link',
|
||||
render(row) {
|
||||
return h(
|
||||
NButton,
|
||||
{
|
||||
type: 'primary',
|
||||
text: true,
|
||||
size: 'small',
|
||||
onClick: async () => {
|
||||
const tab = await browser.tabs
|
||||
.query({
|
||||
active: true,
|
||||
currentWindow: true,
|
||||
})
|
||||
.then((ts) => ts.pop());
|
||||
if (tab) {
|
||||
await browser.tabs.update(tab.id, {
|
||||
url: row.link,
|
||||
});
|
||||
}
|
||||
},
|
||||
},
|
||||
() => '前往',
|
||||
);
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const itemView = computed(() => {
|
||||
const { current, size } = page;
|
||||
return items.value
|
||||
.slice((current - 1) * size, current * size)
|
||||
.map((v, i) => ({ ...v, rank: 1 + (current - 1) * size + i }));
|
||||
});
|
||||
|
||||
onMounted(() => {
|
||||
worker.channel.on('item-links-collected', (ev) => {
|
||||
links.value = links.value.concat(ev.links);
|
||||
items.value = items.value.concat(ev.objs);
|
||||
});
|
||||
});
|
||||
|
||||
const onSearch = async () => {
|
||||
const onCollect = async () => {
|
||||
if (keywords.value.trim() === '') {
|
||||
return;
|
||||
}
|
||||
message.info('开始收集');
|
||||
worker.channel.on('error', ({ message: msg }) => {
|
||||
message.error(msg);
|
||||
});
|
||||
await worker.doSearch(keywords.value);
|
||||
await worker.wanderSearchList();
|
||||
message.info('完成');
|
||||
};
|
||||
</script>
|
||||
|
||||
<template>
|
||||
<main class="side-panel">
|
||||
<n-space>
|
||||
<n-space class="app-header">
|
||||
<mdi-cat style="font-size: 60px; color: black" />
|
||||
<h1>Azon Seeker</h1>
|
||||
</n-space>
|
||||
@ -35,22 +92,27 @@ const onSearch = async () => {
|
||||
round
|
||||
placeholder="请输入关键词"
|
||||
/>
|
||||
<n-button round size="large" @click="onSearch">搜索</n-button>
|
||||
<n-button type="primary" round size="large" @click="onCollect">采集</n-button>
|
||||
</n-space>
|
||||
<div style="height: 10px"></div>
|
||||
<n-card class="result-content-container" title="结果框">
|
||||
<n-empty v-if="links.length === 0" description="还没有结果哦">
|
||||
<n-empty v-if="items.length === 0" description="还没有结果哦">
|
||||
<template #icon>
|
||||
<n-icon :size="50">
|
||||
<solar-cat-linear />
|
||||
</n-icon>
|
||||
</template>
|
||||
</n-empty>
|
||||
<n-list size="medium" v-else>
|
||||
<n-list-item v-for="(link, index) in links" :key="index">
|
||||
<n-tag :href="link" target="_blank">{{ link }}</n-tag>
|
||||
</n-list-item>
|
||||
</n-list>
|
||||
<n-space vertical v-else>
|
||||
<n-data-table :columns="columns" :data="itemView" />
|
||||
<n-pagination
|
||||
v-model:page="page.current"
|
||||
v-model:page-size="page.size"
|
||||
:page-count="~~(items.length / page.size) + 1"
|
||||
:page-sizes="[5, 10, 20]"
|
||||
show-size-picker
|
||||
/>
|
||||
</n-space>
|
||||
</n-card>
|
||||
</main>
|
||||
</template>
|
||||
@ -58,13 +120,16 @@ const onSearch = async () => {
|
||||
<style lang="scss" scoped>
|
||||
.side-panel {
|
||||
width: 100%;
|
||||
height: 100vh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
gap: 20px;
|
||||
|
||||
.app-header {
|
||||
margin-top: 100px;
|
||||
}
|
||||
|
||||
.search-input-box {
|
||||
min-width: 270px;
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import App from './Sidepanel.vue';
|
||||
import App from './App.vue';
|
||||
import { setupApp } from '~/logic/common-setup';
|
||||
import '../styles';
|
||||
|
||||
|
||||
@ -33,12 +33,7 @@ export const sharedConfig: UserConfig = {
|
||||
'webextension-polyfill': [['=', 'browser']],
|
||||
},
|
||||
{
|
||||
'naive-ui': [
|
||||
'useDialog',
|
||||
'useMessage',
|
||||
'useNotification',
|
||||
'useLoadingBar',
|
||||
],
|
||||
'naive-ui': ['useDialog', 'useMessage', 'useNotification', 'useLoadingBar'],
|
||||
},
|
||||
],
|
||||
dts: r('src/auto-imports.d.ts'),
|
||||
@ -54,6 +49,7 @@ export const sharedConfig: UserConfig = {
|
||||
IconsResolver({
|
||||
prefix: '',
|
||||
}),
|
||||
// auto import naive ui
|
||||
NaiveUiResolver(),
|
||||
],
|
||||
}),
|
||||
@ -67,10 +63,7 @@ export const sharedConfig: UserConfig = {
|
||||
enforce: 'post',
|
||||
apply: 'build',
|
||||
transformIndexHtml(html, { path }) {
|
||||
return html.replace(
|
||||
/"\/assets\//g,
|
||||
`"${relative(dirname(path), '/assets')}/`,
|
||||
);
|
||||
return html.replace(/"\/assets\//g, `"${relative(dirname(path), '/assets')}/`);
|
||||
},
|
||||
},
|
||||
],
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user