mirror of
https://github.com/supabase/supabase.git
synced 2026-06-28 19:39:19 -04:00
65fab30935
Adding broad RLS policies to public buckets can cause users to expose more than they expected, like the ability to list all profile pictures on an app. This patches Assistant with knowledge to follow our latest guidance on restrictive RLS policies for storage buckets https://github.com/supabase/supabase/pull/46172 **Changes** - Adds Storage bucket evals for public website assets and avatar access patterns to distinguish public vs private bucket use cases - Adds eval for overly permissive table policies - Adds `storage` knowledge so Assistant distinguishes public buckets, private buckets, object reads, and object listing. - Adds `includeToolCallInputs` option for scorer transcripts so LLM judges can evaluate proposed SQL/tool actions. - Bumps max step count to 10 since storage knowledge may incur another tool call (also 10 is recommended [here](https://vercel.com/academy/ai-sdk/multi-step-and-generative-ui#why-multi-step-is-required) for complex multi-tool scenarios) **References** - https://supabase.com/docs/guides/storage/buckets/fundamentals#public-buckets - https://supabase.com/docs/guides/storage/security/access-control - https://github.com/supabase/supabase/pull/46172 **Notes:** - These prompt tweaks are not meant to be exhaustive fixes, they are mainly hotfixes intended to hold us out until these cases can be addressed more deeply in skills/docs and tracked in a central evals Closes AI-676 Closes AI-756 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Added Storage knowledge resource for the assistant covering Supabase Storage access patterns and RLS guidance. * Added three evaluation cases: two for Storage (marketing assets, avatars) and one for RLS policy generation for user profiles. * **Improvements** * Evaluators now include tool call inputs when judging conversations. * Assistant prompts and generation enhanced with richer Storage/RLS guidance and extended streaming limits. * **Tests** * Added test ensuring tool call inputs are included in serialized thread context. <!-- review_stack_entry_start --> [](https://app.coderabbit.ai/change-stack/supabase/supabase/pull/46168?utm_source=github_walkthrough&utm_medium=github&utm_campaign=change_stack) <!-- review_stack_entry_end --> <!-- end of auto-generated comment: release notes by coderabbit.ai -->
184 lines
4.6 KiB
TypeScript
184 lines
4.6 KiB
TypeScript
import { describe, expect, it } from 'vitest'
|
|
|
|
import { getThreadPartsFromThread } from './trace-utils'
|
|
|
|
// Sanitized mock of the thread shape returned by trace.getThread().
|
|
const MOCK_THREAD = [
|
|
{
|
|
role: 'system',
|
|
content: 'System instructions omitted for fixture.',
|
|
},
|
|
{
|
|
role: 'assistant',
|
|
content: "The user's current project is Acme Analytics.",
|
|
},
|
|
{
|
|
role: 'user',
|
|
content: 'What did we decide earlier?',
|
|
},
|
|
{
|
|
role: 'assistant',
|
|
content: [
|
|
{
|
|
type: 'text',
|
|
text: 'We decided to add an orders table with RLS policies before generating sample data.',
|
|
},
|
|
],
|
|
},
|
|
{
|
|
role: 'user',
|
|
content: 'Can you create that orders table now?',
|
|
},
|
|
{
|
|
role: 'assistant',
|
|
id: null,
|
|
content: [
|
|
{
|
|
type: 'tool_call',
|
|
tool_name: 'rename_chat',
|
|
tool_call_id: 'call_dummy_rename',
|
|
arguments: {
|
|
type: 'valid',
|
|
value: {
|
|
newName: 'Create Orders Table',
|
|
},
|
|
},
|
|
},
|
|
],
|
|
},
|
|
{
|
|
role: 'tool',
|
|
content: [
|
|
{
|
|
type: 'tool_result',
|
|
tool_name: 'rename_chat',
|
|
tool_call_id: 'call_dummy_rename',
|
|
output: {
|
|
status: 'Chat request sent to client',
|
|
},
|
|
},
|
|
],
|
|
},
|
|
{
|
|
role: 'assistant',
|
|
id: null,
|
|
content: [
|
|
{
|
|
type: 'tool_call',
|
|
tool_name: 'load_knowledge',
|
|
tool_call_id: 'call_dummy_knowledge',
|
|
arguments: {
|
|
type: 'valid',
|
|
value: {
|
|
name: 'database',
|
|
},
|
|
},
|
|
},
|
|
{
|
|
type: 'tool_call',
|
|
tool_name: 'execute_sql',
|
|
tool_call_id: 'call_dummy_sql',
|
|
arguments: {
|
|
type: 'valid',
|
|
value: {
|
|
sql: 'create table public.orders (id bigint generated by default as identity primary key);',
|
|
},
|
|
},
|
|
},
|
|
],
|
|
},
|
|
{
|
|
role: 'tool',
|
|
content: [
|
|
{
|
|
type: 'tool_result',
|
|
tool_name: 'load_knowledge',
|
|
tool_call_id: 'call_dummy_knowledge',
|
|
output: 'Knowledge fixture omitted.',
|
|
},
|
|
{
|
|
type: 'tool_result',
|
|
tool_name: 'execute_sql',
|
|
tool_call_id: 'call_dummy_sql',
|
|
output: {
|
|
type: 'text',
|
|
text: 'SQL executed successfully.',
|
|
},
|
|
},
|
|
],
|
|
},
|
|
{
|
|
role: 'assistant',
|
|
id: null,
|
|
content:
|
|
'I created the public.orders table. You should add RLS policies before exposing it to users.',
|
|
},
|
|
]
|
|
|
|
describe('getThreadPartsFromThread', () => {
|
|
it('parses a sanitized Braintrust trace.getThread payload', () => {
|
|
expect(getThreadPartsFromThread(MOCK_THREAD)).toEqual({
|
|
projectContext: "The user's current project is Acme Analytics.",
|
|
priorConversation:
|
|
'[user]\nWhat did we decide earlier?\n\n[assistant]\nWe decided to add an orders table with RLS policies before generating sample data.',
|
|
currentUserInput: 'Can you create that orders table now?',
|
|
lastAssistantTurn:
|
|
'[assistant]\n[called rename_chat]\n\n[assistant]\n[called load_knowledge]\n[called execute_sql]\n\n[assistant]\nI created the public.orders table. You should add RLS policies before exposing it to users.',
|
|
})
|
|
})
|
|
|
|
it('can include tool call inputs in serialized assistant turns', () => {
|
|
expect(getThreadPartsFromThread(MOCK_THREAD, { includeToolCallInputs: true })).toMatchObject({
|
|
lastAssistantTurn: `\
|
|
[assistant]
|
|
[called rename_chat]
|
|
{
|
|
"newName": "Create Orders Table"
|
|
}
|
|
|
|
[assistant]
|
|
[called load_knowledge]
|
|
{
|
|
"name": "database"
|
|
}
|
|
[called execute_sql]
|
|
{
|
|
"sql": "create table public.orders (id bigint generated by default as identity primary key);"
|
|
}
|
|
|
|
[assistant]
|
|
I created the public.orders table. You should add RLS policies before exposing it to users.`,
|
|
})
|
|
})
|
|
|
|
it('uses the most recent project context message', () => {
|
|
expect(
|
|
getThreadPartsFromThread([
|
|
{
|
|
role: 'assistant',
|
|
content: "The user's current project is Old Project.",
|
|
},
|
|
...MOCK_THREAD,
|
|
])
|
|
).toMatchObject({
|
|
projectContext: "The user's current project is Acme Analytics.",
|
|
})
|
|
})
|
|
|
|
it('returns prior conversation without current turn parts when there is no user message', () => {
|
|
expect(
|
|
getThreadPartsFromThread([
|
|
{
|
|
role: 'assistant',
|
|
content: 'I can help with your Supabase project.',
|
|
},
|
|
])
|
|
).toEqual({
|
|
projectContext: null,
|
|
priorConversation: '[assistant]\nI can help with your Supabase project.',
|
|
currentUserInput: null,
|
|
lastAssistantTurn: null,
|
|
})
|
|
})
|
|
})
|