mirror of
https://github.com/supabase/supabase.git
synced 2026-05-06 08:56:46 -04:00
refactor: reading markdown docs files (#37774)
* refactor: reading markdown docs files
Refactor how Markdown docs files are read:
- Reuses the same logic across search index generation & page generation
- Improves the indexed content for search:
- Stops removing MDX components, which often contain useful
information like Admonitions
- Denormalizes Partials and CodeSamples for more complete content
This is a prerequisite step for implementing the "Copy docs as Markdown"
functionality.
Only touches regular guides for now, not federated ones.
* fix: tailwind build error (#37728)
We changed to default to ESM imports a while ago, which means local
builds are now breaking because the Tailwind uses a require. Changed to
CJS for Tailwind config file. (I have no idea how this has been working
on Vercel all this time.)
* style: prettier
This commit is contained in:
@@ -4,7 +4,9 @@ import { type OpenAIClientInterface } from '~/lib/openAi'
|
||||
import { ApiError } from '../../utils'
|
||||
import { POST } from '../route'
|
||||
|
||||
const contentEmbeddingMock = vi.fn().mockImplementation(async () => Result.ok([0.1, 0.2, 0.3]))
|
||||
const contentEmbeddingMock = vi
|
||||
.fn()
|
||||
.mockImplementation(async () => Result.ok({ embedding: [0.1, 0.2, 0.3], tokenCount: 10 }))
|
||||
const openAIMock: OpenAIClientInterface = {
|
||||
createContentEmbedding: contentEmbeddingMock,
|
||||
}
|
||||
|
||||
@@ -64,6 +64,16 @@ export class NoDataError<Details extends ObjectOrNever = never> extends ApiError
|
||||
}
|
||||
}
|
||||
|
||||
export class FileNotFoundError<Details extends ObjectOrNever = never> extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
error: Error,
|
||||
public details?: Details
|
||||
) {
|
||||
super(`FileNotFound: ${message}`, { cause: error })
|
||||
}
|
||||
}
|
||||
|
||||
export class MultiError<ErrorType = unknown, Details extends ObjectOrNever = never> extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
@@ -79,7 +89,7 @@ export class MultiError<ErrorType = unknown, Details extends ObjectOrNever = nev
|
||||
|
||||
appendError(message: string, error: ErrorType): this {
|
||||
this.message = `${this.message}\n\t${message}`
|
||||
;((this.cause ?? (this.cause = [])) as Array<ErrorType>).push(error)
|
||||
;((this.cause ??= []) as Array<ErrorType>).push(error)
|
||||
return this
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,18 +1,20 @@
|
||||
import matter from 'gray-matter'
|
||||
import * as Sentry from '@sentry/nextjs'
|
||||
import { fromMarkdown } from 'mdast-util-from-markdown'
|
||||
import { gfmFromMarkdown } from 'mdast-util-gfm'
|
||||
import { gfm } from 'micromark-extension-gfm'
|
||||
import { type Metadata, type ResolvingMetadata } from 'next'
|
||||
import { notFound } from 'next/navigation'
|
||||
import { readFile, readdir } from 'node:fs/promises'
|
||||
import { extname, join, sep } from 'node:path'
|
||||
import { readdir } from 'node:fs/promises'
|
||||
import { extname, join, relative, sep } from 'node:path'
|
||||
|
||||
import { extractMessageFromAnyError, FileNotFoundError } from '~/app/api/utils'
|
||||
import { pluckPromise } from '~/features/helpers.fn'
|
||||
import { cache_fullProcess_withDevCacheBust, existsFile } from '~/features/helpers.fs'
|
||||
import type { OrPromise } from '~/features/helpers.types'
|
||||
import { generateOpenGraphImageMeta } from '~/features/seo/openGraph'
|
||||
import { BASE_PATH } from '~/lib/constants'
|
||||
import { GUIDES_DIRECTORY, isValidGuideFrontmatter, type GuideFrontmatter } from '~/lib/docs'
|
||||
import { GuideModelLoader } from '~/resources/guide/guideModelLoader'
|
||||
import { newEditLink } from './GuidesMdx.template'
|
||||
|
||||
const PUBLISHED_SECTIONS = [
|
||||
@@ -51,31 +53,40 @@ const getGuidesMarkdownInternal = async (slug: string[]) => {
|
||||
notFound()
|
||||
}
|
||||
|
||||
let mdx: string
|
||||
try {
|
||||
mdx = await readFile(fullPath, 'utf-8')
|
||||
} catch {
|
||||
// Not using console.error because this includes pages that are genuine
|
||||
// 404s and clutters up the logs
|
||||
console.log('Error reading Markdown at path: %s', fullPath)
|
||||
const guide = (await GuideModelLoader.fromFs(relative(GUIDES_DIRECTORY, fullPath))).unwrap()
|
||||
const content = guide.content ?? ''
|
||||
const meta = guide.metadata ?? {}
|
||||
|
||||
if (!isValidGuideFrontmatter(meta)) {
|
||||
throw Error(`Type of frontmatter is not valid for path: ${fullPath}`)
|
||||
}
|
||||
|
||||
const editLink = newEditLink(
|
||||
`supabase/supabase/blob/master/apps/docs/content/guides/${relPath}.mdx`
|
||||
)
|
||||
|
||||
return {
|
||||
pathname: `/guides/${slug.join('/')}` satisfies `/${string}`,
|
||||
meta,
|
||||
content,
|
||||
editLink,
|
||||
}
|
||||
} catch (error: unknown) {
|
||||
if (error instanceof FileNotFoundError) {
|
||||
// Not using console.error because this includes pages that are genuine
|
||||
// 404s and clutters up the logs
|
||||
console.log('Could not read Markdown at path: %s', fullPath)
|
||||
} else {
|
||||
console.error(
|
||||
'Error processing Markdown file at path: %s:\n\t%s',
|
||||
fullPath,
|
||||
extractMessageFromAnyError(error)
|
||||
)
|
||||
Sentry.captureException(error)
|
||||
}
|
||||
notFound()
|
||||
}
|
||||
|
||||
const editLink = newEditLink(
|
||||
`supabase/supabase/blob/master/apps/docs/content/guides/${relPath}.mdx`
|
||||
)
|
||||
|
||||
const { data: meta, content } = matter(mdx)
|
||||
if (!isValidGuideFrontmatter(meta)) {
|
||||
throw Error('Type of frontmatter is not valid')
|
||||
}
|
||||
|
||||
return {
|
||||
pathname: `/guides/${slug.join('/')}` satisfies `/${string}`,
|
||||
meta,
|
||||
content,
|
||||
editLink,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -113,6 +113,11 @@ export class Result<Ok, Error> {
|
||||
return this as unknown as Result<Mapped, Error>
|
||||
}
|
||||
|
||||
async mapAsync<Mapped>(fn: (data: Ok) => Promise<Mapped>): Promise<Result<Mapped, Error>> {
|
||||
if (this.isOk()) return Result.ok(await fn(this.internal.data!))
|
||||
return this as unknown as Result<Mapped, Error>
|
||||
}
|
||||
|
||||
mapError<MappedError>(fn: (error: Error) => MappedError): Result<Ok, MappedError> {
|
||||
if (this.isOk()) return this as unknown as Result<Ok, MappedError>
|
||||
return Result.error(fn(this.internal.error!))
|
||||
@@ -147,6 +152,27 @@ export class Result<Ok, Error> {
|
||||
return this.internal.data!
|
||||
}
|
||||
|
||||
unwrapOr(deflt: () => Ok): Ok {
|
||||
if (this.isOk()) return this.internal.data!
|
||||
return deflt()
|
||||
}
|
||||
|
||||
unwrapError(): Error {
|
||||
if (this.isOk()) {
|
||||
throw new Error(`UnwrapError called on Ok`)
|
||||
}
|
||||
return this.internal.error!
|
||||
}
|
||||
|
||||
unwrapErrorSafe(): Error | null {
|
||||
return this.internal.error
|
||||
}
|
||||
|
||||
unwrapEither(): Ok | Error {
|
||||
if (this.isOk()) return this.unwrap()
|
||||
return this.unwrapError()
|
||||
}
|
||||
|
||||
join<OtherOk, OtherError>(
|
||||
other: Result<OtherOk, OtherError>
|
||||
): Result<[Ok, OtherOk], [Error, OtherError]> {
|
||||
@@ -158,3 +184,47 @@ export class Result<Ok, Error> {
|
||||
return Result.ok([this.internal.data!, other.internal.data!])
|
||||
}
|
||||
}
|
||||
|
||||
export class Both<Left, Right> {
|
||||
private internal: {
|
||||
left: Left
|
||||
right: Right
|
||||
}
|
||||
|
||||
constructor(left: Left, right: Right) {
|
||||
this.internal = {
|
||||
left,
|
||||
right,
|
||||
}
|
||||
}
|
||||
|
||||
mapLeft<NewLeft>(fn: (left: Left) => NewLeft): Both<NewLeft, Right> {
|
||||
return new Both(fn(this.internal.left), this.internal.right)
|
||||
}
|
||||
|
||||
mapRight<NewRight>(fn: (right: Right) => NewRight): Both<Left, NewRight> {
|
||||
return new Both(this.internal.left, fn(this.internal.right))
|
||||
}
|
||||
|
||||
async mapLeftAsync<NewLeft>(fn: (left: Left) => Promise<NewLeft>): Promise<Both<NewLeft, Right>> {
|
||||
const res = await fn(this.internal.left)
|
||||
return new Both(res, this.internal.right)
|
||||
}
|
||||
|
||||
unwrapLeft(): Left {
|
||||
return this.internal.left
|
||||
}
|
||||
|
||||
unwrapRight(): Right {
|
||||
return this.internal.right
|
||||
}
|
||||
|
||||
combine<Output>(fn: (left: Left, right: Right) => Output): Output {
|
||||
return fn(this.internal.left, this.internal.right)
|
||||
}
|
||||
|
||||
intoResult(): Result<Left, Right> {
|
||||
if (this.internal.right) return Result.error(this.internal.right)
|
||||
return Result.ok(this.internal.left)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
type OrPromise<T> = T | Promise<T>
|
||||
export type Json = string | number | boolean | { [key: string]: Json } | Json[]
|
||||
|
||||
type Json = string | number | boolean | { [key: string]: Json } | Json[]
|
||||
export type OrPromise<T> = T | Promise<T>
|
||||
|
||||
type WithRequired<T, K extends keyof T> = T & { [P in K]-?: T[P] }
|
||||
|
||||
export type { Json, OrPromise, WithRequired }
|
||||
export type WithRequired<T, K extends keyof T> = T & { [P in K]-?: T[P] }
|
||||
|
||||
+16
-4
@@ -10,13 +10,18 @@ import { Result } from '~/features/helpers.fn'
|
||||
|
||||
type Embedding = Array<number>
|
||||
|
||||
export interface EmbeddingWithTokens {
|
||||
embedding: Embedding
|
||||
token_count: number
|
||||
}
|
||||
|
||||
interface ModerationFlaggedDetails {
|
||||
flagged: boolean
|
||||
categories: OpenAI.Moderations.Moderation.Categories
|
||||
}
|
||||
|
||||
export interface OpenAIClientInterface {
|
||||
createContentEmbedding(text: string): Promise<Result<Embedding, ApiErrorGeneric>>
|
||||
createContentEmbedding(text: string): Promise<Result<EmbeddingWithTokens, ApiErrorGeneric>>
|
||||
}
|
||||
|
||||
let openAIClient: OpenAIClientInterface | null
|
||||
@@ -26,7 +31,9 @@ class OpenAIClient implements OpenAIClientInterface {
|
||||
|
||||
constructor(private client: OpenAI) {}
|
||||
|
||||
async createContentEmbedding(text: string): Promise<Result<Embedding, ApiErrorGeneric>> {
|
||||
async createContentEmbedding(
|
||||
text: string
|
||||
): Promise<Result<EmbeddingWithTokens, ApiErrorGeneric>> {
|
||||
return await Result.tryCatchFlat(
|
||||
this.createContentEmbeddingImpl.bind(this),
|
||||
convertUnknownToApiError,
|
||||
@@ -36,7 +43,7 @@ class OpenAIClient implements OpenAIClientInterface {
|
||||
|
||||
private async createContentEmbeddingImpl(
|
||||
text: string
|
||||
): Promise<Result<Embedding, ApiError<ModerationFlaggedDetails>>> {
|
||||
): Promise<Result<EmbeddingWithTokens, ApiError<ModerationFlaggedDetails>>> {
|
||||
const query = text.trim()
|
||||
|
||||
const moderationResponse = await this.client.moderations.create({ input: query })
|
||||
@@ -55,7 +62,12 @@ class OpenAIClient implements OpenAIClientInterface {
|
||||
input: query,
|
||||
})
|
||||
const [{ embedding: queryEmbedding }] = embeddingsResponse.data
|
||||
return Result.ok(queryEmbedding)
|
||||
const tokenCount = embeddingsResponse.usage.total_tokens
|
||||
|
||||
return Result.ok({
|
||||
embedding: queryEmbedding,
|
||||
token_count: tokenCount,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,11 +1,32 @@
|
||||
import { createClient, type SupabaseClient } from '@supabase/supabase-js'
|
||||
import { type Database as DatabaseGenerated } from 'common'
|
||||
|
||||
type Database = {
|
||||
export type Database = {
|
||||
content: DatabaseGenerated['content']
|
||||
graphql_public: DatabaseGenerated['graphql_public']
|
||||
public: {
|
||||
Tables: DatabaseGenerated['public']['Tables']
|
||||
Tables: Omit<DatabaseGenerated['public']['Tables'], 'page_section'> & {
|
||||
page_section: Omit<
|
||||
DatabaseGenerated['public']['Tables']['page_section'],
|
||||
'Row' | 'Insert' | 'Update'
|
||||
> & {
|
||||
Row: Omit<DatabaseGenerated['public']['Tables']['page_section']['Row'], 'embedding'> & {
|
||||
embedding: Array<number> | null
|
||||
}
|
||||
Insert: Omit<
|
||||
DatabaseGenerated['public']['Tables']['page_section']['Insert'],
|
||||
'embedding'
|
||||
> & {
|
||||
embedding?: Array<number> | null
|
||||
}
|
||||
Update: Omit<
|
||||
DatabaseGenerated['public']['Tables']['page_section']['Update'],
|
||||
'embedding'
|
||||
> & {
|
||||
embedding?: Array<number> | null
|
||||
}
|
||||
}
|
||||
}
|
||||
Views: DatabaseGenerated['public']['Views']
|
||||
Functions: Omit<
|
||||
DatabaseGenerated['public']['Functions'],
|
||||
|
||||
@@ -2,7 +2,7 @@ import 'server-only'
|
||||
|
||||
import { createClient, type SupabaseClient } from '@supabase/supabase-js'
|
||||
|
||||
import { type Database } from 'common'
|
||||
import { type Database } from '~/lib/supabase'
|
||||
|
||||
let supabaseAdminClient: SupabaseClient<Database> | null = null
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"scripts": {
|
||||
"build": "next build",
|
||||
"build:analyze": "ANALYZE=true next build",
|
||||
"build:llms": "tsx ./scripts/llms.ts",
|
||||
"build:llms": "tsx --conditions=react-server ./scripts/llms.ts",
|
||||
"build:sitemap": "tsx ./internals/generate-sitemap.ts",
|
||||
"clean": "rimraf .next .turbo node_modules features/docs/generated examples __generated__",
|
||||
"codegen:examples": "shx cp -r ../../examples ./examples",
|
||||
@@ -16,7 +16,7 @@
|
||||
"dev": "concurrently --kill-others \"next dev --port 3001\" \"pnpm run dev:watch:troubleshooting\"",
|
||||
"dev:secrets:pull": "AWS_PROFILE=supabase-dev node ../../scripts/getSecrets.js -n local/docs",
|
||||
"dev:watch:troubleshooting": "node ./scripts/troubleshooting/watch.mjs",
|
||||
"embeddings": "tsx scripts/search/generate-embeddings.ts",
|
||||
"embeddings": "tsx --conditions=react-server scripts/search/generate-embeddings.ts",
|
||||
"embeddings:refresh": "pnpm run embeddings --refresh",
|
||||
"last-changed": "tsx scripts/last-changed.ts",
|
||||
"last-changed:reset": "pnpm run last-changed -- --reset",
|
||||
@@ -138,6 +138,7 @@
|
||||
"@types/react-copy-to-clipboard": "^5.0.4",
|
||||
"@types/react-dom": "catalog:",
|
||||
"@types/unist": "^2.0.6",
|
||||
"@types/uuid": "^10.0.0",
|
||||
"api-types": "workspace:*",
|
||||
"cheerio": "^1.0.0-rc.12",
|
||||
"chokidar": "^4.0.3",
|
||||
|
||||
@@ -22,7 +22,7 @@ export abstract class SearchResultModel {
|
||||
const includeFullContent = requestedFields.includes('content')
|
||||
const embeddingResult = await openAI().createContentEmbedding(query)
|
||||
|
||||
return embeddingResult.flatMapAsync(async (embedding) => {
|
||||
return embeddingResult.flatMapAsync(async ({ embedding }) => {
|
||||
const matchResult = new Result(
|
||||
await supabase().rpc('search_content', {
|
||||
embedding,
|
||||
@@ -49,7 +49,7 @@ export abstract class SearchResultModel {
|
||||
const includeFullContent = requestedFields.includes('content')
|
||||
const embeddingResult = await openAI().createContentEmbedding(query)
|
||||
|
||||
return embeddingResult.flatMapAsync(async (embedding) => {
|
||||
return embeddingResult.flatMapAsync(async ({ embedding }) => {
|
||||
const matchResult = new Result(
|
||||
await supabase().rpc('search_content_hybrid', {
|
||||
query_text: query,
|
||||
|
||||
@@ -3,23 +3,31 @@ import { type SearchResultInterface } from '../globalSearch/globalSearchInterfac
|
||||
export class GuideModel implements SearchResultInterface {
|
||||
public title?: string
|
||||
public href?: string
|
||||
public checksum?: string
|
||||
public content?: string
|
||||
public metadata?: Record<string, unknown>
|
||||
public subsections: Array<SubsectionModel>
|
||||
|
||||
constructor({
|
||||
title,
|
||||
href,
|
||||
checksum,
|
||||
content,
|
||||
metadata,
|
||||
subsections,
|
||||
}: {
|
||||
title?: string
|
||||
href?: string
|
||||
checksum?: string
|
||||
content?: string
|
||||
metadata?: Record<string, unknown>
|
||||
subsections?: Array<{ title?: string; href?: string; content?: string }>
|
||||
}) {
|
||||
this.title = title
|
||||
this.href = href
|
||||
this.checksum = checksum
|
||||
this.content = content
|
||||
this.metadata = metadata
|
||||
this.subsections = subsections?.map((subsection) => new SubsectionModel(subsection)) ?? []
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,201 @@
|
||||
import matter from 'gray-matter'
|
||||
import { promises as fs } from 'node:fs'
|
||||
import { join, relative } from 'node:path'
|
||||
|
||||
import { extractMessageFromAnyError, FileNotFoundError, MultiError } from '~/app/api/utils'
|
||||
import { preprocessMdxWithDefaults } from '~/features/directives/utils'
|
||||
import { Both, Result } from '~/features/helpers.fn'
|
||||
import { GUIDES_DIRECTORY } from '~/lib/docs'
|
||||
import { processMdx } from '~/scripts/helpers.mdx'
|
||||
import { GuideModel } from './guideModel'
|
||||
|
||||
/**
|
||||
* Determines if a file is hidden.
|
||||
*
|
||||
* A file is hidden if its name, or the name of any of its parent directories,
|
||||
* starts with an underscore.
|
||||
*/
|
||||
function isHiddenFile(path: string): boolean {
|
||||
return path.split('/').some((part) => part.startsWith('_'))
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively walks a directory and collects all .mdx files that are not hidden.
|
||||
*/
|
||||
async function walkMdxFiles(
|
||||
dir: string,
|
||||
multiError: { current: MultiError | null }
|
||||
): Promise<Array<string>> {
|
||||
const readDirResult = await Result.tryCatch(
|
||||
() => fs.readdir(dir, { recursive: true }),
|
||||
(error) => error
|
||||
)
|
||||
|
||||
return readDirResult.match(
|
||||
(allPaths) => {
|
||||
const mdxFiles: string[] = []
|
||||
|
||||
for (const relativePath of allPaths) {
|
||||
if (isHiddenFile(relativePath)) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Only include .mdx files
|
||||
if (relativePath.endsWith('.mdx')) {
|
||||
mdxFiles.push(join(dir, relativePath))
|
||||
}
|
||||
}
|
||||
|
||||
return mdxFiles
|
||||
},
|
||||
(error) => {
|
||||
// If we can't read the directory, add it to the error collection
|
||||
;(multiError.current ??= new MultiError('Failed to load some guides:')).appendError(
|
||||
`Failed to read directory ${dir}: ${extractMessageFromAnyError(error)}`,
|
||||
error
|
||||
)
|
||||
return []
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Node.js-specific loader for GuideModel instances from the filesystem.
|
||||
* This class contains all the filesystem operations that require Node.js capabilities.
|
||||
*/
|
||||
export class GuideModelLoader {
|
||||
/**
|
||||
* Creates a GuideModel instance by loading and processing a markdown file from the filesystem.
|
||||
*
|
||||
* @param relPath - Relative path to the markdown file within the guides directory (e.g., "auth/users.mdx")
|
||||
* @returns A Result containing either the processed GuideModel or an error message
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const result = await GuideModelLoader.fromFs('auth/users.mdx')
|
||||
* result.match(
|
||||
* (guide) => console.log(guide.title, guide.subsections.length),
|
||||
* (error) => console.error(error)
|
||||
* )
|
||||
* ```
|
||||
*/
|
||||
static async fromFs(relPath: string): Promise<Result<GuideModel, Error>> {
|
||||
return Result.tryCatch(
|
||||
async () => {
|
||||
// Read the markdown file from the guides directory
|
||||
const filePath = join(GUIDES_DIRECTORY, relPath)
|
||||
const fileContent = await fs.readFile(filePath, 'utf-8')
|
||||
|
||||
// Parse frontmatter using gray-matter
|
||||
const { data: metadata, content: rawContent } = matter(fileContent)
|
||||
|
||||
// Replace partials and code samples using directives
|
||||
const processedContent = await preprocessMdxWithDefaults(rawContent)
|
||||
|
||||
// Process MDX to get chunked sections for embedding
|
||||
const { sections } = await processMdx(processedContent)
|
||||
|
||||
// Create subsections from the chunked sections
|
||||
const subsections = sections.map((section) => ({
|
||||
title: section.heading,
|
||||
href: section.slug,
|
||||
content: section.content,
|
||||
}))
|
||||
|
||||
// Extract title from metadata or first heading
|
||||
const title = metadata.title || sections.find((s) => s.heading)?.heading
|
||||
|
||||
// Create href from relative path (remove .mdx extension)
|
||||
const href = `https://supabase.com/docs/guides/${relPath.replace(/\.mdx?$/, '')}`
|
||||
|
||||
return new GuideModel({
|
||||
title,
|
||||
href,
|
||||
content: processedContent,
|
||||
metadata,
|
||||
subsections,
|
||||
})
|
||||
},
|
||||
(error) => {
|
||||
if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {
|
||||
return new FileNotFoundError('', error)
|
||||
}
|
||||
return new Error(
|
||||
`Failed to load guide from ${relPath}: ${extractMessageFromAnyError(error)}`,
|
||||
{
|
||||
cause: error,
|
||||
}
|
||||
)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads GuideModels from a list of file paths in parallel, collecting any
|
||||
* errors without stopping.
|
||||
*/
|
||||
private static async loadGuides(
|
||||
filePaths: Array<string>,
|
||||
multiError: { current: MultiError | null }
|
||||
): Promise<Array<GuideModel>> {
|
||||
const loadPromises = filePaths.map(async (filePath) => {
|
||||
const relPath = relative(GUIDES_DIRECTORY, filePath)
|
||||
return this.fromFs(relPath)
|
||||
})
|
||||
|
||||
const results = await Promise.all(loadPromises)
|
||||
const guides: Array<GuideModel> = []
|
||||
|
||||
results.forEach((result, index) => {
|
||||
const relPath = relative(GUIDES_DIRECTORY, filePaths[index])
|
||||
|
||||
result.match(
|
||||
(guide) => guides.push(guide),
|
||||
(error) => {
|
||||
;(multiError.current ??= new MultiError('Failed to load some guides:')).appendError(
|
||||
`Failed to load ${relPath}: ${extractMessageFromAnyError(error)}`,
|
||||
error
|
||||
)
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
return guides
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads all guide models from the filesystem by walking the content directory.
|
||||
*
|
||||
* This method recursively walks the guides directory (or a specific section
|
||||
* subdirectory) and loads all non-hidden .mdx files.
|
||||
*
|
||||
* If errors occur while loading individual files, they are collected but
|
||||
* don't prevent other files from loading.
|
||||
*
|
||||
* @param section - Optional section name to limit walking to a specific
|
||||
* subdirectory (e.g., "database", "auth")
|
||||
* @returns A Both containing [successful GuideModels, MultiError with all
|
||||
* failures or null if no errors]
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* // Load all guides
|
||||
* const guides = (await GuideModelLoader.allFromFs()).unwrapLeft()
|
||||
*
|
||||
* // Load only database guides
|
||||
* const dbGuides = (await GuideModelLoader.allFromFs('database')).unwrapLeft()
|
||||
* ```
|
||||
*/
|
||||
static async allFromFs(section?: string): Promise<Both<Array<GuideModel>, MultiError | null>> {
|
||||
const searchDir = section ? join(GUIDES_DIRECTORY, section) : GUIDES_DIRECTORY
|
||||
const multiError = { current: null as MultiError | null }
|
||||
|
||||
// Get all .mdx files in the search directory
|
||||
const mdxFiles = await walkMdxFiles(searchDir, multiError)
|
||||
|
||||
// Load each file and collect results
|
||||
const guides = await this.loadGuides(mdxFiles, multiError)
|
||||
|
||||
return new Both(guides, multiError.current)
|
||||
}
|
||||
}
|
||||
@@ -1,17 +1,12 @@
|
||||
import { createHash } from 'node:crypto'
|
||||
import { ObjectExpression } from 'estree'
|
||||
import GithubSlugger from 'github-slugger'
|
||||
import matter from 'gray-matter'
|
||||
import { type Content, type Root } from 'mdast'
|
||||
import { fromMarkdown } from 'mdast-util-from-markdown'
|
||||
import { toMarkdown } from 'mdast-util-to-markdown'
|
||||
import { mdxFromMarkdown, type MdxjsEsm } from 'mdast-util-mdx'
|
||||
import { mdxFromMarkdown, mdxToMarkdown } from 'mdast-util-mdx'
|
||||
import { toString } from 'mdast-util-to-string'
|
||||
import { mdxjs } from 'micromark-extension-mdxjs'
|
||||
import { u } from 'unist-builder'
|
||||
import { filter } from 'unist-util-filter'
|
||||
|
||||
type Json = Record<string, string | number | boolean | null | Json[] | { [key: string]: Json }>
|
||||
|
||||
type Section = {
|
||||
content: string
|
||||
@@ -21,17 +16,25 @@ type Section = {
|
||||
|
||||
export type ProcessedMdx = {
|
||||
checksum: string
|
||||
meta: Json
|
||||
meta: Record<string, unknown>
|
||||
sections: Section[]
|
||||
}
|
||||
|
||||
async function createHash(content: string): Promise<string> {
|
||||
const encoder = new TextEncoder()
|
||||
const data = encoder.encode(content)
|
||||
const hashBuffer = await crypto.subtle.digest('SHA-256', data)
|
||||
const hashArray = Array.from(new Uint8Array(hashBuffer))
|
||||
return hashArray.map((byte) => byte.toString(16).padStart(2, '0')).join('')
|
||||
}
|
||||
|
||||
/**
|
||||
* Process MDX content.
|
||||
*
|
||||
* Splits MDX content into sections based on headings, and calculates checksum.
|
||||
*/
|
||||
function processMdx(content: string, options?: { yaml?: boolean }): ProcessedMdx {
|
||||
const checksum = createHash('sha256').update(content).digest('base64')
|
||||
async function processMdx(content: string, options?: { yaml?: boolean }): Promise<ProcessedMdx> {
|
||||
const checksum = await createHash(content)
|
||||
|
||||
let frontmatter: Record<string, unknown> = {}
|
||||
if (options?.yaml) {
|
||||
@@ -45,43 +48,15 @@ function processMdx(content: string, options?: { yaml?: boolean }): ProcessedMdx
|
||||
mdastExtensions: [mdxFromMarkdown()],
|
||||
})
|
||||
|
||||
let meta: Record<string, unknown> | undefined
|
||||
if (options?.yaml) {
|
||||
meta = frontmatter
|
||||
} else {
|
||||
meta = extractMetaExport(mdxTree)
|
||||
}
|
||||
|
||||
const serializableMeta: Json = meta && JSON.parse(JSON.stringify(meta))
|
||||
|
||||
// Remove all MDX elements from markdown
|
||||
const mdTree = filter(
|
||||
mdxTree,
|
||||
(node) =>
|
||||
![
|
||||
'mdxjsEsm',
|
||||
'mdxJsxFlowElement',
|
||||
'mdxJsxTextElement',
|
||||
'mdxFlowExpression',
|
||||
'mdxTextExpression',
|
||||
].includes(node.type)
|
||||
)
|
||||
|
||||
if (!mdTree) {
|
||||
return {
|
||||
checksum,
|
||||
meta: serializableMeta,
|
||||
sections: [],
|
||||
}
|
||||
}
|
||||
|
||||
const sectionTrees = splitTreeBy(mdTree, (node) => node.type === 'heading')
|
||||
const sectionTrees = splitTreeBy(mdxTree, (node) => node.type === 'heading')
|
||||
|
||||
const slugger = new GithubSlugger()
|
||||
|
||||
const sections = sectionTrees.map((tree) => {
|
||||
const [firstNode] = tree.children
|
||||
const content = toMarkdown(tree)
|
||||
const content = toMarkdown(tree, {
|
||||
extensions: [mdxToMarkdown()],
|
||||
})
|
||||
|
||||
const rawHeading: string | undefined =
|
||||
firstNode.type === 'heading' ? toString(firstNode) : undefined
|
||||
@@ -103,74 +78,11 @@ function processMdx(content: string, options?: { yaml?: boolean }): ProcessedMdx
|
||||
|
||||
return {
|
||||
checksum,
|
||||
meta: serializableMeta,
|
||||
sections,
|
||||
meta: frontmatter,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the `meta` ESM export from the MDX file.
|
||||
*
|
||||
* This info is akin to frontmatter.
|
||||
*/
|
||||
function extractMetaExport(mdxTree: Root) {
|
||||
const metaExportNode = mdxTree.children.find((node): node is MdxjsEsm => {
|
||||
return (
|
||||
node.type === 'mdxjsEsm' &&
|
||||
node.data?.estree?.body[0]?.type === 'ExportNamedDeclaration' &&
|
||||
node.data.estree.body[0].declaration?.type === 'VariableDeclaration' &&
|
||||
node.data.estree.body[0].declaration.declarations[0]?.id.type === 'Identifier' &&
|
||||
node.data.estree.body[0].declaration.declarations[0].id.name === 'meta'
|
||||
)
|
||||
})
|
||||
|
||||
if (!metaExportNode) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const objectExpression =
|
||||
(metaExportNode.data?.estree?.body[0]?.type === 'ExportNamedDeclaration' &&
|
||||
metaExportNode.data.estree.body[0].declaration?.type === 'VariableDeclaration' &&
|
||||
metaExportNode.data.estree.body[0].declaration.declarations[0]?.id.type === 'Identifier' &&
|
||||
metaExportNode.data.estree.body[0].declaration.declarations[0].id.name === 'meta' &&
|
||||
metaExportNode.data.estree.body[0].declaration.declarations[0].init?.type ===
|
||||
'ObjectExpression' &&
|
||||
metaExportNode.data.estree.body[0].declaration.declarations[0].init) ||
|
||||
undefined
|
||||
|
||||
if (!objectExpression) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
return getObjectFromExpression(objectExpression)
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts ES literals from an `estree` `ObjectExpression`
|
||||
* into a plain JavaScript object.
|
||||
*/
|
||||
function getObjectFromExpression(node: ObjectExpression) {
|
||||
return node.properties.reduce<
|
||||
Record<string, string | number | bigint | true | RegExp | undefined>
|
||||
>((object, property) => {
|
||||
if (property.type !== 'Property') {
|
||||
return object
|
||||
}
|
||||
|
||||
const key = (property.key.type === 'Identifier' && property.key.name) || undefined
|
||||
const value = (property.value.type === 'Literal' && property.value.value) || undefined
|
||||
|
||||
if (!key) {
|
||||
return object
|
||||
}
|
||||
|
||||
return {
|
||||
...object,
|
||||
[key]: value,
|
||||
}
|
||||
}, {})
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits a `mdast` tree into multiple trees based on
|
||||
* a predicate function. Will include the splitting node
|
||||
@@ -210,4 +122,4 @@ function parseHeading(heading: string): { heading: string; customAnchor?: string
|
||||
}
|
||||
|
||||
export { processMdx }
|
||||
export type { Json, Section }
|
||||
export type { Section }
|
||||
|
||||
@@ -4,7 +4,7 @@ import { createClient } from '@supabase/supabase-js'
|
||||
import { parseArgs } from 'node:util'
|
||||
import { OpenAI } from 'openai'
|
||||
import { v4 as uuidv4 } from 'uuid'
|
||||
import type { Json, Section } from '../helpers.mdx.js'
|
||||
import type { Section } from '../helpers.mdx.js'
|
||||
import { fetchAllSources } from './sources/index.js'
|
||||
|
||||
const args = parseArgs({
|
||||
@@ -78,8 +78,8 @@ async function generateEmbeddings() {
|
||||
checksum: string
|
||||
sections: Section[]
|
||||
ragIgnore?: boolean
|
||||
meta?: Json
|
||||
} = embeddingSource.process()
|
||||
meta?: Record<string, unknown>
|
||||
} = await embeddingSource.process()
|
||||
|
||||
// Check for existing page in DB and compare checksums
|
||||
const { error: fetchPageError, data: existingPage } = await supabaseClient
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import type { Json, Section } from '../../helpers.mdx.js'
|
||||
import type { Section } from '../../helpers.mdx.js'
|
||||
|
||||
export abstract class BaseLoader {
|
||||
type: string
|
||||
@@ -14,7 +14,7 @@ export abstract class BaseLoader {
|
||||
export abstract class BaseSource {
|
||||
type: string
|
||||
checksum?: string
|
||||
meta?: Json
|
||||
meta?: Record<string, unknown>
|
||||
sections?: Section[]
|
||||
|
||||
constructor(
|
||||
@@ -22,7 +22,12 @@ export abstract class BaseSource {
|
||||
public path: string
|
||||
) {}
|
||||
|
||||
abstract process(): { checksum: string; meta?: Json; ragIgnore?: boolean; sections: Section[] }
|
||||
abstract process(): Promise<{
|
||||
checksum: string
|
||||
meta?: Record<string, unknown>
|
||||
ragIgnore?: boolean
|
||||
sections: Section[]
|
||||
}>
|
||||
|
||||
abstract extractIndexedContent(): string
|
||||
}
|
||||
|
||||
@@ -104,7 +104,7 @@ export class GitHubDiscussionSource extends BaseSource {
|
||||
super(source, path)
|
||||
}
|
||||
|
||||
process() {
|
||||
async process() {
|
||||
const { id, title, updatedAt, body, databaseId } = this.discussion
|
||||
|
||||
const checksum = createHash('sha256').update(updatedAt).digest('base64')
|
||||
|
||||
@@ -92,7 +92,7 @@ export class LintWarningsGuideSource extends BaseSource {
|
||||
super(source, path)
|
||||
}
|
||||
|
||||
process() {
|
||||
async process() {
|
||||
this.checksum = createHash('sha256').update(this.lint.content).digest('base64')
|
||||
|
||||
this.meta = {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { readFile } from 'fs/promises'
|
||||
import { processMdx } from '../../helpers.mdx.js'
|
||||
import { SubsectionModel } from '../../../resources/guide/guideModel.js'
|
||||
import { GuideModelLoader } from '../../../resources/guide/guideModelLoader.js'
|
||||
import { BaseLoader, BaseSource } from './base.js'
|
||||
|
||||
export class MarkdownLoader extends BaseLoader {
|
||||
@@ -15,8 +15,22 @@ export class MarkdownLoader extends BaseLoader {
|
||||
}
|
||||
|
||||
async load() {
|
||||
const contents = await readFile(this.filePath, 'utf8')
|
||||
return [new MarkdownSource(this.source, this.path, contents, this.options)]
|
||||
const guide = (
|
||||
await GuideModelLoader.fromFs(this.filePath.replace(/^content\/guides/, ''))
|
||||
).unwrap()
|
||||
return [
|
||||
new MarkdownSource(
|
||||
this.source,
|
||||
this.path,
|
||||
guide.content ?? '',
|
||||
{
|
||||
checksum: guide.checksum,
|
||||
meta: guide.metadata,
|
||||
sections: guide.subsections,
|
||||
},
|
||||
this.options
|
||||
),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,19 +41,29 @@ export class MarkdownSource extends BaseSource {
|
||||
source: string,
|
||||
path: string,
|
||||
public contents: string,
|
||||
{
|
||||
checksum,
|
||||
meta,
|
||||
sections,
|
||||
}: { checksum?: string; meta?: Record<string, unknown>; sections: Array<SubsectionModel> },
|
||||
public options?: { yaml?: boolean }
|
||||
) {
|
||||
super(source, path)
|
||||
this.checksum = checksum
|
||||
this.meta = meta ?? {}
|
||||
this.sections = sections.map((section) => ({
|
||||
content: section.content ?? '',
|
||||
heading: section.title,
|
||||
slug: section.href,
|
||||
}))
|
||||
}
|
||||
|
||||
process() {
|
||||
const { checksum, meta, sections } = processMdx(this.contents, this.options)
|
||||
|
||||
this.checksum = checksum
|
||||
this.meta = meta
|
||||
this.sections = sections
|
||||
|
||||
return { checksum, meta, sections }
|
||||
async process() {
|
||||
return {
|
||||
checksum: this.checksum ?? '',
|
||||
meta: this.meta,
|
||||
sections: this.sections ?? [],
|
||||
}
|
||||
}
|
||||
|
||||
extractIndexedContent(): string {
|
||||
|
||||
@@ -57,8 +57,8 @@ export class IntegrationSource extends BaseSource {
|
||||
super(source, path)
|
||||
}
|
||||
|
||||
process() {
|
||||
const { checksum, sections } = processMdx(this.partnerData.overview)
|
||||
async process() {
|
||||
const { checksum, sections } = await processMdx(this.partnerData.overview)
|
||||
const meta = {
|
||||
title: upperFirst(this.partnerData.slug),
|
||||
subtitle: 'Integration',
|
||||
|
||||
@@ -12,7 +12,6 @@ import { getApiEndpointById } from '../../../features/docs/Reference.generated.s
|
||||
import type { CliCommand, CliSpec } from '../../../generator/types/CliSpec.js'
|
||||
import { flattenSections } from '../../../lib/helpers.js'
|
||||
import { enrichedOperation, gen_v3 } from '../../../lib/refGenerator/helpers.js'
|
||||
import type { Json } from '../../helpers.mdx.js'
|
||||
import { BaseLoader, BaseSource } from './base.js'
|
||||
|
||||
export abstract class ReferenceLoader<SpecSection> extends BaseLoader {
|
||||
@@ -24,7 +23,7 @@ export abstract class ReferenceLoader<SpecSection> extends BaseLoader {
|
||||
constructor(
|
||||
source: string,
|
||||
path: string,
|
||||
public meta: Json,
|
||||
public meta: Record<string, unknown>,
|
||||
public specFilePath: string,
|
||||
public sectionsFilePath: string
|
||||
) {
|
||||
@@ -68,7 +67,7 @@ export abstract class ReferenceLoader<SpecSection> extends BaseLoader {
|
||||
specSections: SpecSection[],
|
||||
id: string
|
||||
): SpecSection | undefined | Promise<SpecSection | undefined>
|
||||
enhanceMeta(_section: SpecSection): Json {
|
||||
enhanceMeta(_section: SpecSection): Record<string, unknown> {
|
||||
return this.meta
|
||||
}
|
||||
}
|
||||
@@ -81,12 +80,12 @@ export abstract class ReferenceSource<SpecSection> extends BaseSource {
|
||||
path: string,
|
||||
public refSection: ICommonSection,
|
||||
public specSection: SpecSection,
|
||||
public meta: Json
|
||||
public meta: Record<string, unknown>
|
||||
) {
|
||||
super(source, path)
|
||||
}
|
||||
|
||||
process() {
|
||||
async process() {
|
||||
const checksum = createHash('sha256')
|
||||
.update(JSON.stringify(this.refSection) + JSON.stringify(this.specSection))
|
||||
.digest('base64')
|
||||
@@ -125,7 +124,7 @@ export class OpenApiReferenceLoader extends ReferenceLoader<Partial<enrichedOper
|
||||
constructor(
|
||||
source: string,
|
||||
path: string,
|
||||
meta: Json,
|
||||
meta: Record<string, unknown>,
|
||||
specFilePath: string,
|
||||
sectionsFilePath: string
|
||||
) {
|
||||
@@ -251,7 +250,7 @@ export class ClientLibReferenceLoader extends ReferenceLoader<IFunctionDefinitio
|
||||
constructor(
|
||||
source: string,
|
||||
path: string,
|
||||
meta: Json,
|
||||
meta: Record<string, unknown>,
|
||||
specFilePath: string,
|
||||
sectionsFilePath: string
|
||||
) {
|
||||
@@ -272,7 +271,7 @@ export class ClientLibReferenceLoader extends ReferenceLoader<IFunctionDefinitio
|
||||
return functionDefinitions.find((functionDefinition) => functionDefinition.id === id)
|
||||
}
|
||||
|
||||
enhanceMeta(section: IFunctionDefinition): Json {
|
||||
enhanceMeta(section: IFunctionDefinition): Record<string, unknown> {
|
||||
return { ...this.meta, slug: section.id, methodName: section.title }
|
||||
}
|
||||
}
|
||||
@@ -311,7 +310,7 @@ export class CliReferenceLoader extends ReferenceLoader<CliCommand> {
|
||||
constructor(
|
||||
source: string,
|
||||
path: string,
|
||||
meta: Json,
|
||||
meta: Record<string, unknown>,
|
||||
specFilePath: string,
|
||||
sectionsFilePath: string
|
||||
) {
|
||||
|
||||
Generated
+3
-1
@@ -624,6 +624,9 @@ importers:
|
||||
'@types/unist':
|
||||
specifier: ^2.0.6
|
||||
version: 2.0.8
|
||||
'@types/uuid':
|
||||
specifier: ^10.0.0
|
||||
version: 10.0.0
|
||||
api-types:
|
||||
specifier: workspace:*
|
||||
version: link:../../packages/api-types
|
||||
@@ -10921,7 +10924,6 @@ packages:
|
||||
resolution: {integrity: sha512-t0q23FIpvHDTtnORW+bDJziGsal5uh9RJTJ1fyH8drd4lICOoXhJ5pLMUZ5C0VQei6dNmwTzzoTRgMkO9JgHEQ==}
|
||||
peerDependencies:
|
||||
eslint: '>= 5'
|
||||
bundledDependencies: []
|
||||
|
||||
eslint-plugin-import@2.31.0:
|
||||
resolution: {integrity: sha512-ixmkI62Rbc2/w8Vfxyh1jQRTdRTF52VxwRVHl/ykPAmqG+Nb7/kNn+byLP0LxPgI7zWA16Jt82SybJInmMia3A==}
|
||||
|
||||
Reference in New Issue
Block a user