refactor: reading markdown docs files (#37774)

* refactor: reading markdown docs files

Refactor how Markdown docs files are read:
- Reuses the same logic across search index generation & page generation
- Improves the indexed content for search:
  - Stops removing MDX components, which often contain useful
    information like Admonitions
  - Denormalizes Partials and CodeSamples for more complete content

This is a prerequisite step for implementing the "Copy docs as Markdown"
functionality.

Only touches regular guides for now, not federated ones.

* fix: tailwind build error (#37728)

We changed to default to ESM imports a while ago, which means local
builds are now breaking because the Tailwind uses a require. Changed to
CJS for Tailwind config file. (I have no idea how this has been working
on Vercel all this time.)

* style: prettier
This commit is contained in:
Charis
2025-08-13 11:37:14 -04:00
committed by GitHub
parent 3635bc1fec
commit e46ab9c1a2
21 changed files with 457 additions and 181 deletions
@@ -4,7 +4,9 @@ import { type OpenAIClientInterface } from '~/lib/openAi'
import { ApiError } from '../../utils'
import { POST } from '../route'
const contentEmbeddingMock = vi.fn().mockImplementation(async () => Result.ok([0.1, 0.2, 0.3]))
const contentEmbeddingMock = vi
.fn()
.mockImplementation(async () => Result.ok({ embedding: [0.1, 0.2, 0.3], tokenCount: 10 }))
const openAIMock: OpenAIClientInterface = {
createContentEmbedding: contentEmbeddingMock,
}
+11 -1
View File
@@ -64,6 +64,16 @@ export class NoDataError<Details extends ObjectOrNever = never> extends ApiError
}
}
export class FileNotFoundError<Details extends ObjectOrNever = never> extends Error {
constructor(
message: string,
error: Error,
public details?: Details
) {
super(`FileNotFound: ${message}`, { cause: error })
}
}
export class MultiError<ErrorType = unknown, Details extends ObjectOrNever = never> extends Error {
constructor(
message: string,
@@ -79,7 +89,7 @@ export class MultiError<ErrorType = unknown, Details extends ObjectOrNever = nev
appendError(message: string, error: ErrorType): this {
this.message = `${this.message}\n\t${message}`
;((this.cause ?? (this.cause = [])) as Array<ErrorType>).push(error)
;((this.cause ??= []) as Array<ErrorType>).push(error)
return this
}
}
+36 -25
View File
@@ -1,18 +1,20 @@
import matter from 'gray-matter'
import * as Sentry from '@sentry/nextjs'
import { fromMarkdown } from 'mdast-util-from-markdown'
import { gfmFromMarkdown } from 'mdast-util-gfm'
import { gfm } from 'micromark-extension-gfm'
import { type Metadata, type ResolvingMetadata } from 'next'
import { notFound } from 'next/navigation'
import { readFile, readdir } from 'node:fs/promises'
import { extname, join, sep } from 'node:path'
import { readdir } from 'node:fs/promises'
import { extname, join, relative, sep } from 'node:path'
import { extractMessageFromAnyError, FileNotFoundError } from '~/app/api/utils'
import { pluckPromise } from '~/features/helpers.fn'
import { cache_fullProcess_withDevCacheBust, existsFile } from '~/features/helpers.fs'
import type { OrPromise } from '~/features/helpers.types'
import { generateOpenGraphImageMeta } from '~/features/seo/openGraph'
import { BASE_PATH } from '~/lib/constants'
import { GUIDES_DIRECTORY, isValidGuideFrontmatter, type GuideFrontmatter } from '~/lib/docs'
import { GuideModelLoader } from '~/resources/guide/guideModelLoader'
import { newEditLink } from './GuidesMdx.template'
const PUBLISHED_SECTIONS = [
@@ -51,31 +53,40 @@ const getGuidesMarkdownInternal = async (slug: string[]) => {
notFound()
}
let mdx: string
try {
mdx = await readFile(fullPath, 'utf-8')
} catch {
// Not using console.error because this includes pages that are genuine
// 404s and clutters up the logs
console.log('Error reading Markdown at path: %s', fullPath)
const guide = (await GuideModelLoader.fromFs(relative(GUIDES_DIRECTORY, fullPath))).unwrap()
const content = guide.content ?? ''
const meta = guide.metadata ?? {}
if (!isValidGuideFrontmatter(meta)) {
throw Error(`Type of frontmatter is not valid for path: ${fullPath}`)
}
const editLink = newEditLink(
`supabase/supabase/blob/master/apps/docs/content/guides/${relPath}.mdx`
)
return {
pathname: `/guides/${slug.join('/')}` satisfies `/${string}`,
meta,
content,
editLink,
}
} catch (error: unknown) {
if (error instanceof FileNotFoundError) {
// Not using console.error because this includes pages that are genuine
// 404s and clutters up the logs
console.log('Could not read Markdown at path: %s', fullPath)
} else {
console.error(
'Error processing Markdown file at path: %s:\n\t%s',
fullPath,
extractMessageFromAnyError(error)
)
Sentry.captureException(error)
}
notFound()
}
const editLink = newEditLink(
`supabase/supabase/blob/master/apps/docs/content/guides/${relPath}.mdx`
)
const { data: meta, content } = matter(mdx)
if (!isValidGuideFrontmatter(meta)) {
throw Error('Type of frontmatter is not valid')
}
return {
pathname: `/guides/${slug.join('/')}` satisfies `/${string}`,
meta,
content,
editLink,
}
}
/**
+70
View File
@@ -113,6 +113,11 @@ export class Result<Ok, Error> {
return this as unknown as Result<Mapped, Error>
}
async mapAsync<Mapped>(fn: (data: Ok) => Promise<Mapped>): Promise<Result<Mapped, Error>> {
if (this.isOk()) return Result.ok(await fn(this.internal.data!))
return this as unknown as Result<Mapped, Error>
}
mapError<MappedError>(fn: (error: Error) => MappedError): Result<Ok, MappedError> {
if (this.isOk()) return this as unknown as Result<Ok, MappedError>
return Result.error(fn(this.internal.error!))
@@ -147,6 +152,27 @@ export class Result<Ok, Error> {
return this.internal.data!
}
unwrapOr(deflt: () => Ok): Ok {
if (this.isOk()) return this.internal.data!
return deflt()
}
unwrapError(): Error {
if (this.isOk()) {
throw new Error(`UnwrapError called on Ok`)
}
return this.internal.error!
}
unwrapErrorSafe(): Error | null {
return this.internal.error
}
unwrapEither(): Ok | Error {
if (this.isOk()) return this.unwrap()
return this.unwrapError()
}
join<OtherOk, OtherError>(
other: Result<OtherOk, OtherError>
): Result<[Ok, OtherOk], [Error, OtherError]> {
@@ -158,3 +184,47 @@ export class Result<Ok, Error> {
return Result.ok([this.internal.data!, other.internal.data!])
}
}
export class Both<Left, Right> {
private internal: {
left: Left
right: Right
}
constructor(left: Left, right: Right) {
this.internal = {
left,
right,
}
}
mapLeft<NewLeft>(fn: (left: Left) => NewLeft): Both<NewLeft, Right> {
return new Both(fn(this.internal.left), this.internal.right)
}
mapRight<NewRight>(fn: (right: Right) => NewRight): Both<Left, NewRight> {
return new Both(this.internal.left, fn(this.internal.right))
}
async mapLeftAsync<NewLeft>(fn: (left: Left) => Promise<NewLeft>): Promise<Both<NewLeft, Right>> {
const res = await fn(this.internal.left)
return new Both(res, this.internal.right)
}
unwrapLeft(): Left {
return this.internal.left
}
unwrapRight(): Right {
return this.internal.right
}
combine<Output>(fn: (left: Left, right: Right) => Output): Output {
return fn(this.internal.left, this.internal.right)
}
intoResult(): Result<Left, Right> {
if (this.internal.right) return Result.error(this.internal.right)
return Result.ok(this.internal.left)
}
}
+3 -5
View File
@@ -1,7 +1,5 @@
type OrPromise<T> = T | Promise<T>
export type Json = string | number | boolean | { [key: string]: Json } | Json[]
type Json = string | number | boolean | { [key: string]: Json } | Json[]
export type OrPromise<T> = T | Promise<T>
type WithRequired<T, K extends keyof T> = T & { [P in K]-?: T[P] }
export type { Json, OrPromise, WithRequired }
export type WithRequired<T, K extends keyof T> = T & { [P in K]-?: T[P] }
+16 -4
View File
@@ -10,13 +10,18 @@ import { Result } from '~/features/helpers.fn'
type Embedding = Array<number>
export interface EmbeddingWithTokens {
embedding: Embedding
token_count: number
}
interface ModerationFlaggedDetails {
flagged: boolean
categories: OpenAI.Moderations.Moderation.Categories
}
export interface OpenAIClientInterface {
createContentEmbedding(text: string): Promise<Result<Embedding, ApiErrorGeneric>>
createContentEmbedding(text: string): Promise<Result<EmbeddingWithTokens, ApiErrorGeneric>>
}
let openAIClient: OpenAIClientInterface | null
@@ -26,7 +31,9 @@ class OpenAIClient implements OpenAIClientInterface {
constructor(private client: OpenAI) {}
async createContentEmbedding(text: string): Promise<Result<Embedding, ApiErrorGeneric>> {
async createContentEmbedding(
text: string
): Promise<Result<EmbeddingWithTokens, ApiErrorGeneric>> {
return await Result.tryCatchFlat(
this.createContentEmbeddingImpl.bind(this),
convertUnknownToApiError,
@@ -36,7 +43,7 @@ class OpenAIClient implements OpenAIClientInterface {
private async createContentEmbeddingImpl(
text: string
): Promise<Result<Embedding, ApiError<ModerationFlaggedDetails>>> {
): Promise<Result<EmbeddingWithTokens, ApiError<ModerationFlaggedDetails>>> {
const query = text.trim()
const moderationResponse = await this.client.moderations.create({ input: query })
@@ -55,7 +62,12 @@ class OpenAIClient implements OpenAIClientInterface {
input: query,
})
const [{ embedding: queryEmbedding }] = embeddingsResponse.data
return Result.ok(queryEmbedding)
const tokenCount = embeddingsResponse.usage.total_tokens
return Result.ok({
embedding: queryEmbedding,
token_count: tokenCount,
})
}
}
+23 -2
View File
@@ -1,11 +1,32 @@
import { createClient, type SupabaseClient } from '@supabase/supabase-js'
import { type Database as DatabaseGenerated } from 'common'
type Database = {
export type Database = {
content: DatabaseGenerated['content']
graphql_public: DatabaseGenerated['graphql_public']
public: {
Tables: DatabaseGenerated['public']['Tables']
Tables: Omit<DatabaseGenerated['public']['Tables'], 'page_section'> & {
page_section: Omit<
DatabaseGenerated['public']['Tables']['page_section'],
'Row' | 'Insert' | 'Update'
> & {
Row: Omit<DatabaseGenerated['public']['Tables']['page_section']['Row'], 'embedding'> & {
embedding: Array<number> | null
}
Insert: Omit<
DatabaseGenerated['public']['Tables']['page_section']['Insert'],
'embedding'
> & {
embedding?: Array<number> | null
}
Update: Omit<
DatabaseGenerated['public']['Tables']['page_section']['Update'],
'embedding'
> & {
embedding?: Array<number> | null
}
}
}
Views: DatabaseGenerated['public']['Views']
Functions: Omit<
DatabaseGenerated['public']['Functions'],
+1 -1
View File
@@ -2,7 +2,7 @@ import 'server-only'
import { createClient, type SupabaseClient } from '@supabase/supabase-js'
import { type Database } from 'common'
import { type Database } from '~/lib/supabase'
let supabaseAdminClient: SupabaseClient<Database> | null = null
+3 -2
View File
@@ -6,7 +6,7 @@
"scripts": {
"build": "next build",
"build:analyze": "ANALYZE=true next build",
"build:llms": "tsx ./scripts/llms.ts",
"build:llms": "tsx --conditions=react-server ./scripts/llms.ts",
"build:sitemap": "tsx ./internals/generate-sitemap.ts",
"clean": "rimraf .next .turbo node_modules features/docs/generated examples __generated__",
"codegen:examples": "shx cp -r ../../examples ./examples",
@@ -16,7 +16,7 @@
"dev": "concurrently --kill-others \"next dev --port 3001\" \"pnpm run dev:watch:troubleshooting\"",
"dev:secrets:pull": "AWS_PROFILE=supabase-dev node ../../scripts/getSecrets.js -n local/docs",
"dev:watch:troubleshooting": "node ./scripts/troubleshooting/watch.mjs",
"embeddings": "tsx scripts/search/generate-embeddings.ts",
"embeddings": "tsx --conditions=react-server scripts/search/generate-embeddings.ts",
"embeddings:refresh": "pnpm run embeddings --refresh",
"last-changed": "tsx scripts/last-changed.ts",
"last-changed:reset": "pnpm run last-changed -- --reset",
@@ -138,6 +138,7 @@
"@types/react-copy-to-clipboard": "^5.0.4",
"@types/react-dom": "catalog:",
"@types/unist": "^2.0.6",
"@types/uuid": "^10.0.0",
"api-types": "workspace:*",
"cheerio": "^1.0.0-rc.12",
"chokidar": "^4.0.3",
@@ -22,7 +22,7 @@ export abstract class SearchResultModel {
const includeFullContent = requestedFields.includes('content')
const embeddingResult = await openAI().createContentEmbedding(query)
return embeddingResult.flatMapAsync(async (embedding) => {
return embeddingResult.flatMapAsync(async ({ embedding }) => {
const matchResult = new Result(
await supabase().rpc('search_content', {
embedding,
@@ -49,7 +49,7 @@ export abstract class SearchResultModel {
const includeFullContent = requestedFields.includes('content')
const embeddingResult = await openAI().createContentEmbedding(query)
return embeddingResult.flatMapAsync(async (embedding) => {
return embeddingResult.flatMapAsync(async ({ embedding }) => {
const matchResult = new Result(
await supabase().rpc('search_content_hybrid', {
query_text: query,
+8
View File
@@ -3,23 +3,31 @@ import { type SearchResultInterface } from '../globalSearch/globalSearchInterfac
export class GuideModel implements SearchResultInterface {
public title?: string
public href?: string
public checksum?: string
public content?: string
public metadata?: Record<string, unknown>
public subsections: Array<SubsectionModel>
constructor({
title,
href,
checksum,
content,
metadata,
subsections,
}: {
title?: string
href?: string
checksum?: string
content?: string
metadata?: Record<string, unknown>
subsections?: Array<{ title?: string; href?: string; content?: string }>
}) {
this.title = title
this.href = href
this.checksum = checksum
this.content = content
this.metadata = metadata
this.subsections = subsections?.map((subsection) => new SubsectionModel(subsection)) ?? []
}
}
@@ -0,0 +1,201 @@
import matter from 'gray-matter'
import { promises as fs } from 'node:fs'
import { join, relative } from 'node:path'
import { extractMessageFromAnyError, FileNotFoundError, MultiError } from '~/app/api/utils'
import { preprocessMdxWithDefaults } from '~/features/directives/utils'
import { Both, Result } from '~/features/helpers.fn'
import { GUIDES_DIRECTORY } from '~/lib/docs'
import { processMdx } from '~/scripts/helpers.mdx'
import { GuideModel } from './guideModel'
/**
* Determines if a file is hidden.
*
* A file is hidden if its name, or the name of any of its parent directories,
* starts with an underscore.
*/
function isHiddenFile(path: string): boolean {
return path.split('/').some((part) => part.startsWith('_'))
}
/**
* Recursively walks a directory and collects all .mdx files that are not hidden.
*/
async function walkMdxFiles(
dir: string,
multiError: { current: MultiError | null }
): Promise<Array<string>> {
const readDirResult = await Result.tryCatch(
() => fs.readdir(dir, { recursive: true }),
(error) => error
)
return readDirResult.match(
(allPaths) => {
const mdxFiles: string[] = []
for (const relativePath of allPaths) {
if (isHiddenFile(relativePath)) {
continue
}
// Only include .mdx files
if (relativePath.endsWith('.mdx')) {
mdxFiles.push(join(dir, relativePath))
}
}
return mdxFiles
},
(error) => {
// If we can't read the directory, add it to the error collection
;(multiError.current ??= new MultiError('Failed to load some guides:')).appendError(
`Failed to read directory ${dir}: ${extractMessageFromAnyError(error)}`,
error
)
return []
}
)
}
/**
* Node.js-specific loader for GuideModel instances from the filesystem.
* This class contains all the filesystem operations that require Node.js capabilities.
*/
export class GuideModelLoader {
/**
* Creates a GuideModel instance by loading and processing a markdown file from the filesystem.
*
* @param relPath - Relative path to the markdown file within the guides directory (e.g., "auth/users.mdx")
* @returns A Result containing either the processed GuideModel or an error message
*
* @example
* ```typescript
* const result = await GuideModelLoader.fromFs('auth/users.mdx')
* result.match(
* (guide) => console.log(guide.title, guide.subsections.length),
* (error) => console.error(error)
* )
* ```
*/
static async fromFs(relPath: string): Promise<Result<GuideModel, Error>> {
return Result.tryCatch(
async () => {
// Read the markdown file from the guides directory
const filePath = join(GUIDES_DIRECTORY, relPath)
const fileContent = await fs.readFile(filePath, 'utf-8')
// Parse frontmatter using gray-matter
const { data: metadata, content: rawContent } = matter(fileContent)
// Replace partials and code samples using directives
const processedContent = await preprocessMdxWithDefaults(rawContent)
// Process MDX to get chunked sections for embedding
const { sections } = await processMdx(processedContent)
// Create subsections from the chunked sections
const subsections = sections.map((section) => ({
title: section.heading,
href: section.slug,
content: section.content,
}))
// Extract title from metadata or first heading
const title = metadata.title || sections.find((s) => s.heading)?.heading
// Create href from relative path (remove .mdx extension)
const href = `https://supabase.com/docs/guides/${relPath.replace(/\.mdx?$/, '')}`
return new GuideModel({
title,
href,
content: processedContent,
metadata,
subsections,
})
},
(error) => {
if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {
return new FileNotFoundError('', error)
}
return new Error(
`Failed to load guide from ${relPath}: ${extractMessageFromAnyError(error)}`,
{
cause: error,
}
)
}
)
}
/**
* Loads GuideModels from a list of file paths in parallel, collecting any
* errors without stopping.
*/
private static async loadGuides(
filePaths: Array<string>,
multiError: { current: MultiError | null }
): Promise<Array<GuideModel>> {
const loadPromises = filePaths.map(async (filePath) => {
const relPath = relative(GUIDES_DIRECTORY, filePath)
return this.fromFs(relPath)
})
const results = await Promise.all(loadPromises)
const guides: Array<GuideModel> = []
results.forEach((result, index) => {
const relPath = relative(GUIDES_DIRECTORY, filePaths[index])
result.match(
(guide) => guides.push(guide),
(error) => {
;(multiError.current ??= new MultiError('Failed to load some guides:')).appendError(
`Failed to load ${relPath}: ${extractMessageFromAnyError(error)}`,
error
)
}
)
})
return guides
}
/**
* Loads all guide models from the filesystem by walking the content directory.
*
* This method recursively walks the guides directory (or a specific section
* subdirectory) and loads all non-hidden .mdx files.
*
* If errors occur while loading individual files, they are collected but
* don't prevent other files from loading.
*
* @param section - Optional section name to limit walking to a specific
* subdirectory (e.g., "database", "auth")
* @returns A Both containing [successful GuideModels, MultiError with all
* failures or null if no errors]
*
* @example
* ```typescript
* // Load all guides
* const guides = (await GuideModelLoader.allFromFs()).unwrapLeft()
*
* // Load only database guides
* const dbGuides = (await GuideModelLoader.allFromFs('database')).unwrapLeft()
* ```
*/
static async allFromFs(section?: string): Promise<Both<Array<GuideModel>, MultiError | null>> {
const searchDir = section ? join(GUIDES_DIRECTORY, section) : GUIDES_DIRECTORY
const multiError = { current: null as MultiError | null }
// Get all .mdx files in the search directory
const mdxFiles = await walkMdxFiles(searchDir, multiError)
// Load each file and collect results
const guides = await this.loadGuides(mdxFiles, multiError)
return new Both(guides, multiError.current)
}
}
+18 -106
View File
@@ -1,17 +1,12 @@
import { createHash } from 'node:crypto'
import { ObjectExpression } from 'estree'
import GithubSlugger from 'github-slugger'
import matter from 'gray-matter'
import { type Content, type Root } from 'mdast'
import { fromMarkdown } from 'mdast-util-from-markdown'
import { toMarkdown } from 'mdast-util-to-markdown'
import { mdxFromMarkdown, type MdxjsEsm } from 'mdast-util-mdx'
import { mdxFromMarkdown, mdxToMarkdown } from 'mdast-util-mdx'
import { toString } from 'mdast-util-to-string'
import { mdxjs } from 'micromark-extension-mdxjs'
import { u } from 'unist-builder'
import { filter } from 'unist-util-filter'
type Json = Record<string, string | number | boolean | null | Json[] | { [key: string]: Json }>
type Section = {
content: string
@@ -21,17 +16,25 @@ type Section = {
export type ProcessedMdx = {
checksum: string
meta: Json
meta: Record<string, unknown>
sections: Section[]
}
async function createHash(content: string): Promise<string> {
const encoder = new TextEncoder()
const data = encoder.encode(content)
const hashBuffer = await crypto.subtle.digest('SHA-256', data)
const hashArray = Array.from(new Uint8Array(hashBuffer))
return hashArray.map((byte) => byte.toString(16).padStart(2, '0')).join('')
}
/**
* Process MDX content.
*
* Splits MDX content into sections based on headings, and calculates checksum.
*/
function processMdx(content: string, options?: { yaml?: boolean }): ProcessedMdx {
const checksum = createHash('sha256').update(content).digest('base64')
async function processMdx(content: string, options?: { yaml?: boolean }): Promise<ProcessedMdx> {
const checksum = await createHash(content)
let frontmatter: Record<string, unknown> = {}
if (options?.yaml) {
@@ -45,43 +48,15 @@ function processMdx(content: string, options?: { yaml?: boolean }): ProcessedMdx
mdastExtensions: [mdxFromMarkdown()],
})
let meta: Record<string, unknown> | undefined
if (options?.yaml) {
meta = frontmatter
} else {
meta = extractMetaExport(mdxTree)
}
const serializableMeta: Json = meta && JSON.parse(JSON.stringify(meta))
// Remove all MDX elements from markdown
const mdTree = filter(
mdxTree,
(node) =>
![
'mdxjsEsm',
'mdxJsxFlowElement',
'mdxJsxTextElement',
'mdxFlowExpression',
'mdxTextExpression',
].includes(node.type)
)
if (!mdTree) {
return {
checksum,
meta: serializableMeta,
sections: [],
}
}
const sectionTrees = splitTreeBy(mdTree, (node) => node.type === 'heading')
const sectionTrees = splitTreeBy(mdxTree, (node) => node.type === 'heading')
const slugger = new GithubSlugger()
const sections = sectionTrees.map((tree) => {
const [firstNode] = tree.children
const content = toMarkdown(tree)
const content = toMarkdown(tree, {
extensions: [mdxToMarkdown()],
})
const rawHeading: string | undefined =
firstNode.type === 'heading' ? toString(firstNode) : undefined
@@ -103,74 +78,11 @@ function processMdx(content: string, options?: { yaml?: boolean }): ProcessedMdx
return {
checksum,
meta: serializableMeta,
sections,
meta: frontmatter,
}
}
/**
* Extracts the `meta` ESM export from the MDX file.
*
* This info is akin to frontmatter.
*/
function extractMetaExport(mdxTree: Root) {
const metaExportNode = mdxTree.children.find((node): node is MdxjsEsm => {
return (
node.type === 'mdxjsEsm' &&
node.data?.estree?.body[0]?.type === 'ExportNamedDeclaration' &&
node.data.estree.body[0].declaration?.type === 'VariableDeclaration' &&
node.data.estree.body[0].declaration.declarations[0]?.id.type === 'Identifier' &&
node.data.estree.body[0].declaration.declarations[0].id.name === 'meta'
)
})
if (!metaExportNode) {
return undefined
}
const objectExpression =
(metaExportNode.data?.estree?.body[0]?.type === 'ExportNamedDeclaration' &&
metaExportNode.data.estree.body[0].declaration?.type === 'VariableDeclaration' &&
metaExportNode.data.estree.body[0].declaration.declarations[0]?.id.type === 'Identifier' &&
metaExportNode.data.estree.body[0].declaration.declarations[0].id.name === 'meta' &&
metaExportNode.data.estree.body[0].declaration.declarations[0].init?.type ===
'ObjectExpression' &&
metaExportNode.data.estree.body[0].declaration.declarations[0].init) ||
undefined
if (!objectExpression) {
return undefined
}
return getObjectFromExpression(objectExpression)
}
/**
* Extracts ES literals from an `estree` `ObjectExpression`
* into a plain JavaScript object.
*/
function getObjectFromExpression(node: ObjectExpression) {
return node.properties.reduce<
Record<string, string | number | bigint | true | RegExp | undefined>
>((object, property) => {
if (property.type !== 'Property') {
return object
}
const key = (property.key.type === 'Identifier' && property.key.name) || undefined
const value = (property.value.type === 'Literal' && property.value.value) || undefined
if (!key) {
return object
}
return {
...object,
[key]: value,
}
}, {})
}
/**
* Splits a `mdast` tree into multiple trees based on
* a predicate function. Will include the splitting node
@@ -210,4 +122,4 @@ function parseHeading(heading: string): { heading: string; customAnchor?: string
}
export { processMdx }
export type { Json, Section }
export type { Section }
@@ -4,7 +4,7 @@ import { createClient } from '@supabase/supabase-js'
import { parseArgs } from 'node:util'
import { OpenAI } from 'openai'
import { v4 as uuidv4 } from 'uuid'
import type { Json, Section } from '../helpers.mdx.js'
import type { Section } from '../helpers.mdx.js'
import { fetchAllSources } from './sources/index.js'
const args = parseArgs({
@@ -78,8 +78,8 @@ async function generateEmbeddings() {
checksum: string
sections: Section[]
ragIgnore?: boolean
meta?: Json
} = embeddingSource.process()
meta?: Record<string, unknown>
} = await embeddingSource.process()
// Check for existing page in DB and compare checksums
const { error: fetchPageError, data: existingPage } = await supabaseClient
+8 -3
View File
@@ -1,4 +1,4 @@
import type { Json, Section } from '../../helpers.mdx.js'
import type { Section } from '../../helpers.mdx.js'
export abstract class BaseLoader {
type: string
@@ -14,7 +14,7 @@ export abstract class BaseLoader {
export abstract class BaseSource {
type: string
checksum?: string
meta?: Json
meta?: Record<string, unknown>
sections?: Section[]
constructor(
@@ -22,7 +22,12 @@ export abstract class BaseSource {
public path: string
) {}
abstract process(): { checksum: string; meta?: Json; ragIgnore?: boolean; sections: Section[] }
abstract process(): Promise<{
checksum: string
meta?: Record<string, unknown>
ragIgnore?: boolean
sections: Section[]
}>
abstract extractIndexedContent(): string
}
@@ -104,7 +104,7 @@ export class GitHubDiscussionSource extends BaseSource {
super(source, path)
}
process() {
async process() {
const { id, title, updatedAt, body, databaseId } = this.discussion
const checksum = createHash('sha256').update(updatedAt).digest('base64')
@@ -92,7 +92,7 @@ export class LintWarningsGuideSource extends BaseSource {
super(source, path)
}
process() {
async process() {
this.checksum = createHash('sha256').update(this.lint.content).digest('base64')
this.meta = {
+36 -12
View File
@@ -1,5 +1,5 @@
import { readFile } from 'fs/promises'
import { processMdx } from '../../helpers.mdx.js'
import { SubsectionModel } from '../../../resources/guide/guideModel.js'
import { GuideModelLoader } from '../../../resources/guide/guideModelLoader.js'
import { BaseLoader, BaseSource } from './base.js'
export class MarkdownLoader extends BaseLoader {
@@ -15,8 +15,22 @@ export class MarkdownLoader extends BaseLoader {
}
async load() {
const contents = await readFile(this.filePath, 'utf8')
return [new MarkdownSource(this.source, this.path, contents, this.options)]
const guide = (
await GuideModelLoader.fromFs(this.filePath.replace(/^content\/guides/, ''))
).unwrap()
return [
new MarkdownSource(
this.source,
this.path,
guide.content ?? '',
{
checksum: guide.checksum,
meta: guide.metadata,
sections: guide.subsections,
},
this.options
),
]
}
}
@@ -27,19 +41,29 @@ export class MarkdownSource extends BaseSource {
source: string,
path: string,
public contents: string,
{
checksum,
meta,
sections,
}: { checksum?: string; meta?: Record<string, unknown>; sections: Array<SubsectionModel> },
public options?: { yaml?: boolean }
) {
super(source, path)
this.checksum = checksum
this.meta = meta ?? {}
this.sections = sections.map((section) => ({
content: section.content ?? '',
heading: section.title,
slug: section.href,
}))
}
process() {
const { checksum, meta, sections } = processMdx(this.contents, this.options)
this.checksum = checksum
this.meta = meta
this.sections = sections
return { checksum, meta, sections }
async process() {
return {
checksum: this.checksum ?? '',
meta: this.meta,
sections: this.sections ?? [],
}
}
extractIndexedContent(): string {
@@ -57,8 +57,8 @@ export class IntegrationSource extends BaseSource {
super(source, path)
}
process() {
const { checksum, sections } = processMdx(this.partnerData.overview)
async process() {
const { checksum, sections } = await processMdx(this.partnerData.overview)
const meta = {
title: upperFirst(this.partnerData.slug),
subtitle: 'Integration',
@@ -12,7 +12,6 @@ import { getApiEndpointById } from '../../../features/docs/Reference.generated.s
import type { CliCommand, CliSpec } from '../../../generator/types/CliSpec.js'
import { flattenSections } from '../../../lib/helpers.js'
import { enrichedOperation, gen_v3 } from '../../../lib/refGenerator/helpers.js'
import type { Json } from '../../helpers.mdx.js'
import { BaseLoader, BaseSource } from './base.js'
export abstract class ReferenceLoader<SpecSection> extends BaseLoader {
@@ -24,7 +23,7 @@ export abstract class ReferenceLoader<SpecSection> extends BaseLoader {
constructor(
source: string,
path: string,
public meta: Json,
public meta: Record<string, unknown>,
public specFilePath: string,
public sectionsFilePath: string
) {
@@ -68,7 +67,7 @@ export abstract class ReferenceLoader<SpecSection> extends BaseLoader {
specSections: SpecSection[],
id: string
): SpecSection | undefined | Promise<SpecSection | undefined>
enhanceMeta(_section: SpecSection): Json {
enhanceMeta(_section: SpecSection): Record<string, unknown> {
return this.meta
}
}
@@ -81,12 +80,12 @@ export abstract class ReferenceSource<SpecSection> extends BaseSource {
path: string,
public refSection: ICommonSection,
public specSection: SpecSection,
public meta: Json
public meta: Record<string, unknown>
) {
super(source, path)
}
process() {
async process() {
const checksum = createHash('sha256')
.update(JSON.stringify(this.refSection) + JSON.stringify(this.specSection))
.digest('base64')
@@ -125,7 +124,7 @@ export class OpenApiReferenceLoader extends ReferenceLoader<Partial<enrichedOper
constructor(
source: string,
path: string,
meta: Json,
meta: Record<string, unknown>,
specFilePath: string,
sectionsFilePath: string
) {
@@ -251,7 +250,7 @@ export class ClientLibReferenceLoader extends ReferenceLoader<IFunctionDefinitio
constructor(
source: string,
path: string,
meta: Json,
meta: Record<string, unknown>,
specFilePath: string,
sectionsFilePath: string
) {
@@ -272,7 +271,7 @@ export class ClientLibReferenceLoader extends ReferenceLoader<IFunctionDefinitio
return functionDefinitions.find((functionDefinition) => functionDefinition.id === id)
}
enhanceMeta(section: IFunctionDefinition): Json {
enhanceMeta(section: IFunctionDefinition): Record<string, unknown> {
return { ...this.meta, slug: section.id, methodName: section.title }
}
}
@@ -311,7 +310,7 @@ export class CliReferenceLoader extends ReferenceLoader<CliCommand> {
constructor(
source: string,
path: string,
meta: Json,
meta: Record<string, unknown>,
specFilePath: string,
sectionsFilePath: string
) {
+3 -1
View File
@@ -624,6 +624,9 @@ importers:
'@types/unist':
specifier: ^2.0.6
version: 2.0.8
'@types/uuid':
specifier: ^10.0.0
version: 10.0.0
api-types:
specifier: workspace:*
version: link:../../packages/api-types
@@ -10921,7 +10924,6 @@ packages:
resolution: {integrity: sha512-t0q23FIpvHDTtnORW+bDJziGsal5uh9RJTJ1fyH8drd4lICOoXhJ5pLMUZ5C0VQei6dNmwTzzoTRgMkO9JgHEQ==}
peerDependencies:
eslint: '>= 5'
bundledDependencies: []
eslint-plugin-import@2.31.0:
resolution: {integrity: sha512-ixmkI62Rbc2/w8Vfxyh1jQRTdRTF52VxwRVHl/ykPAmqG+Nb7/kNn+byLP0LxPgI7zWA16Jt82SybJInmMia3A==}