Skip to content

Commit e1877b3

Browse files
committed
抓取数据逻辑优化;减少次数,提高速率,节约流量
1 parent b6d656f commit e1877b3

File tree

2 files changed

+51
-20
lines changed

2 files changed

+51
-20
lines changed

lib/db/getSiteData.js

+15-19
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import { getConfigMapFromConfigPage } from '@/lib/notion/getNotionConfig'
77
import getPageProperties, {
88
adjustPageProperties
99
} from '@/lib/notion/getPageProperties'
10-
import { getPostBlocks, getSingleBlock } from '@/lib/notion/getPostBlocks'
10+
import { fetchInBatches, getPostBlocks } from '@/lib/notion/getPostBlocks'
1111
import { compressImage, mapImgUrl } from '@/lib/notion/mapImage'
1212
import { deepClone } from '@/lib/utils'
1313
import { idToUuid } from 'notion-utils'
@@ -371,13 +371,14 @@ const EmptyData = pageId => {
371371
* @returns {Promise<JSX.Element|null|*>}
372372
*/
373373
async function getDataBaseInfoByNotionAPI({ pageId, from }) {
374+
console.log('[Fetching Data]', pageId, from)
374375
const pageRecordMap = await getPostBlocks(pageId, from)
375376
if (!pageRecordMap) {
376377
console.error('can`t get Notion Data ; Which id is: ', pageId)
377378
return {}
378379
}
379380
pageId = idToUuid(pageId)
380-
const block = pageRecordMap.block || {}
381+
let block = pageRecordMap.block || {}
381382
const rawMetadata = block[pageId]?.value
382383
// Check Type Page-Database和Inline-Database
383384
if (
@@ -402,6 +403,7 @@ async function getDataBaseInfoByNotionAPI({ pageId, from }) {
402403
collectionView,
403404
viewIds
404405
)
406+
405407
if (pageIds?.length === 0) {
406408
console.error(
407409
'获取到的文章列表为空,请检查notion模板',
@@ -415,29 +417,22 @@ async function getDataBaseInfoByNotionAPI({ pageId, from }) {
415417
// console.log('有效Page数量', pageIds?.length)
416418
}
417419

418-
// 获取每篇文章基础数据
420+
// 抓取主数据库最多抓取1000个blocks,溢出的数block这里统一抓取一遍
421+
const blockIdsNeedFetch = []
419422
for (let i = 0; i < pageIds.length; i++) {
420423
const id = pageIds[i]
421424
const value = block[id]?.value
422425
if (!value) {
423-
// 如果找不到文章对应的block,说明发生了溢出,使用pageID再去请求
424-
const pageBlock = await getSingleBlock(id, from)
425-
if (pageBlock.block[id].value) {
426-
const properties =
427-
(await getPageProperties(
428-
id,
429-
pageBlock.block[id].value,
430-
schema,
431-
null,
432-
getTagOptions(schema)
433-
)) || null
434-
if (properties) {
435-
collectionData.push(properties)
436-
}
437-
}
438-
continue
426+
blockIdsNeedFetch.push(id)
439427
}
428+
}
429+
const fetchedBlocks = await fetchInBatches(blockIdsNeedFetch)
430+
block = Object.assign({}, block, fetchedBlocks)
440431

432+
// 获取每篇文章基础数据
433+
for (let i = 0; i < pageIds.length; i++) {
434+
const id = pageIds[i]
435+
const value = block[id]?.value || fetchedBlocks[id]?.value
441436
const properties =
442437
(await getPageProperties(
443438
id,
@@ -446,6 +441,7 @@ async function getDataBaseInfoByNotionAPI({ pageId, from }) {
446441
null,
447442
getTagOptions(schema)
448443
)) || null
444+
449445
if (properties) {
450446
collectionData.push(properties)
451447
}

lib/notion/getPostBlocks.js

+36-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ export async function getSingleBlock(id, from) {
4141
return pageBlock
4242
}
4343

44-
pageBlock = await getPageWithRetry(id, from)
44+
pageBlock = await getPageWithRetry(id, 'single_' + from)
4545

4646
if (pageBlock) {
4747
await setDataToCache(cacheKey, pageBlock)
@@ -153,3 +153,38 @@ function filterPostBlocks(id, blockMap, slice) {
153153
}
154154
return clonePageBlock
155155
}
156+
157+
/**
158+
* 根据[]ids,批量抓取blocks
159+
* 在获取数据库文章列表时,超过一定数量的block会被丢弃,因此根据pageId批量抓取block
160+
* @param {*} ids
161+
* @param {*} batchSize
162+
* @returns
163+
*/
164+
export const fetchInBatches = async (ids, batchSize = 100) => {
165+
const authToken = BLOG.NOTION_ACCESS_TOKEN || null
166+
const api = new NotionAPI({
167+
authToken,
168+
userTimeZone: Intl.DateTimeFormat().resolvedOptions().timeZone
169+
})
170+
171+
let fetchedBlocks = {}
172+
for (let i = 0; i < ids.length; i += batchSize) {
173+
const batch = ids.slice(i, i + batchSize)
174+
console.log('[API-->>请求] Fetching missing blocks', ids.length)
175+
const start = new Date().getTime()
176+
const pageChunk = await api.getBlocks(batch)
177+
const end = new Date().getTime()
178+
console.log(
179+
`[API<<--响应] 耗时:${end - start}ms Fetching missing blocks count:${ids.length} `
180+
)
181+
182+
console.log('[API<<--响应]')
183+
fetchedBlocks = Object.assign(
184+
{},
185+
fetchedBlocks,
186+
pageChunk?.recordMap?.block
187+
)
188+
}
189+
return fetchedBlocks
190+
}

0 commit comments

Comments
 (0)