Files
supabase/apps/docs/scripts/search/sources/github-discussion.ts
Charis e46ab9c1a2 refactor: reading markdown docs files (#37774)
* refactor: reading markdown docs files

Refactor how Markdown docs files are read:
- Reuses the same logic across search index generation & page generation
- Improves the indexed content for search:
  - Stops removing MDX components, which often contain useful
    information like Admonitions
  - Denormalizes Partials and CodeSamples for more complete content

This is a prerequisite step for implementing the "Copy docs as Markdown"
functionality.

Only touches regular guides for now, not federated ones.

* fix: tailwind build error (#37728)

We changed to default to ESM imports a while ago, which means local
builds are now breaking because the Tailwind uses a require. Changed to
CJS for Tailwind config file. (I have no idea how this has been working
on Vercel all this time.)

* style: prettier
2025-08-13 11:37:14 -04:00

148 lines
3.6 KiB
TypeScript

import { createAppAuth } from '@octokit/auth-app'
import { Octokit } from '@octokit/core'
import { paginateGraphql } from '@octokit/plugin-paginate-graphql'
import crypto, { createHash } from 'node:crypto'
import { BaseLoader, BaseSource } from './base.js'
export const ExtendedOctokit = Octokit.plugin(paginateGraphql)
export type ExtendedOctokit = InstanceType<typeof ExtendedOctokit>
export type Discussion = {
id: string
updatedAt: string
url: string
title: string
body: string
databaseId: number
}
export type DiscussionsResponse = {
repository: {
discussions: {
totalCount: number
nodes: Discussion[]
}
}
}
const appId = process.env.DOCS_GITHUB_APP_ID
const installationId = process.env.DOCS_GITHUB_APP_INSTALLATION_ID
const privateKey = process.env.DOCS_GITHUB_APP_PRIVATE_KEY
/**
* Fetches GitHub discussions for a repository + category
*/
export async function fetchDiscussions(owner: string, repo: string, categoryId: string) {
const octokit = new ExtendedOctokit({
authStrategy: createAppAuth,
auth: {
appId,
installationId,
privateKey: crypto.createPrivateKey(privateKey!).export({ type: 'pkcs8', format: 'pem' }),
},
})
const {
repository: {
discussions: { nodes: discussions },
},
} = await octokit.graphql.paginate<DiscussionsResponse>(
`
query troubleshootDiscussions($cursor: String, $owner: String!, $repo: String!, $categoryId: ID!) {
repository(owner: $owner, name: $repo) {
discussions(first: 100, after: $cursor, categoryId: $categoryId) {
totalCount
nodes {
id
updatedAt
url
title
body
databaseId
}
pageInfo {
hasNextPage
endCursor
}
}
}
}
`,
{
owner,
repo,
categoryId,
}
)
return discussions
}
export class GitHubDiscussionLoader extends BaseLoader {
type = 'github-discussions' as const
constructor(
source: string,
public discussion: Discussion
) {
super(source, discussion.url)
}
async load() {
return [new GitHubDiscussionSource(this.source, this.path, this.discussion)]
}
}
export class GitHubDiscussionSource extends BaseSource {
type = 'github-discussions' as const
constructor(
source: string,
path: string,
public discussion: Discussion
) {
super(source, path)
}
async process() {
const { id, title, updatedAt, body, databaseId } = this.discussion
const checksum = createHash('sha256').update(updatedAt).digest('base64')
const meta = { id, title, updatedAt }
// Currently the discussion post itself is being considered as the answer
// (as opposed to a comment marked as answer)
// So we link the slug to the initial discussion post rather than a comment answer
const slug = `discussion-${databaseId}`
// Format the discussion title + body as markdown for better embeddings + LLM response
const content = `# ${title}\n${body}`
// For now, only a single section is created for GH discussions
// Consider adding multiple if we want to include comments/answers
const sections = [
{
heading: title,
slug,
content,
},
]
this.checksum = checksum
this.meta = meta
this.sections = sections
return {
checksum,
meta,
sections,
}
}
extractIndexedContent(): string {
const sections = this.sections ?? []
return sections.map(({ heading, content }) => `${heading}\n\n${content}`).join('\n')
}
}