supabase/apps/studio/evals/trace-utils.test.ts

import { describe, expect, it } from 'vitest'

import { getThreadPartsFromThread } from './trace-utils'

// Sanitized mock of the thread shape returned by trace.getThread().
const MOCK_THREAD = [
  {
    role: 'system',
    content: 'System instructions omitted for fixture.',
  },
  {
    role: 'assistant',
    content: "The user's current project is Acme Analytics.",
  },
  {
    role: 'user',
    content: 'What did we decide earlier?',
  },
  {
    role: 'assistant',
    content: [
      {
        type: 'text',
        text: 'We decided to add an orders table with RLS policies before generating sample data.',
      },
    ],
  },
  {
    role: 'user',
    content: 'Can you create that orders table now?',
  },
  {
    role: 'assistant',
    id: null,
    content: [
      {
        type: 'tool_call',
        tool_name: 'rename_chat',
        tool_call_id: 'call_dummy_rename',
        arguments: {
          type: 'valid',
          value: {
            newName: 'Create Orders Table',
          },
        },
      },
    ],
  },
  {
    role: 'tool',
    content: [
      {
        type: 'tool_result',
        tool_name: 'rename_chat',
        tool_call_id: 'call_dummy_rename',
        output: {
          status: 'Chat request sent to client',
        },
      },
    ],
  },
  {
    role: 'assistant',
    id: null,
    content: [
      {
        type: 'tool_call',
        tool_name: 'load_knowledge',
        tool_call_id: 'call_dummy_knowledge',
        arguments: {
          type: 'valid',
          value: {
            name: 'database',
          },
        },
      },
      {
        type: 'tool_call',
        tool_name: 'execute_sql',
        tool_call_id: 'call_dummy_sql',
        arguments: {
          type: 'valid',
          value: {
            sql: 'create table public.orders (id bigint generated by default as identity primary key);',
          },
        },
      },
    ],
  },
  {
    role: 'tool',
    content: [
      {
        type: 'tool_result',
        tool_name: 'load_knowledge',
        tool_call_id: 'call_dummy_knowledge',
        output: 'Knowledge fixture omitted.',
      },
      {
        type: 'tool_result',
        tool_name: 'execute_sql',
        tool_call_id: 'call_dummy_sql',
        output: {
          type: 'text',
          text: 'SQL executed successfully.',
        },
      },
    ],
  },
  {
    role: 'assistant',
    id: null,
    content:
      'I created the public.orders table. You should add RLS policies before exposing it to users.',
  },
]

describe('getThreadPartsFromThread', () => {
  it('parses a sanitized Braintrust trace.getThread payload', () => {
    expect(getThreadPartsFromThread(MOCK_THREAD)).toEqual({
      projectContext: "The user's current project is Acme Analytics.",
      priorConversation:
        '[user]\nWhat did we decide earlier?\n\n[assistant]\nWe decided to add an orders table with RLS policies before generating sample data.',
      currentUserInput: 'Can you create that orders table now?',
      lastAssistantTurn:
        '[assistant]\n[called rename_chat]\n\n[assistant]\n[called load_knowledge]\n[called execute_sql]\n\n[assistant]\nI created the public.orders table. You should add RLS policies before exposing it to users.',
    })
  })

  it('can include tool call inputs in serialized assistant turns', () => {
    expect(getThreadPartsFromThread(MOCK_THREAD, { includeToolCallInputs: true })).toMatchObject({
      lastAssistantTurn: `\
[assistant]
[called rename_chat]
{
  "newName": "Create Orders Table"
}

[assistant]
[called load_knowledge]
{
  "name": "database"
}
[called execute_sql]
{
  "sql": "create table public.orders (id bigint generated by default as identity primary key);"
}

[assistant]
I created the public.orders table. You should add RLS policies before exposing it to users.`,
    })
  })

  it('uses the most recent project context message', () => {
    expect(
      getThreadPartsFromThread([
        {
          role: 'assistant',
          content: "The user's current project is Old Project.",
        },
        ...MOCK_THREAD,
      ])
    ).toMatchObject({
      projectContext: "The user's current project is Acme Analytics.",
    })
  })

  it('returns prior conversation without current turn parts when there is no user message', () => {
    expect(
      getThreadPartsFromThread([
        {
          role: 'assistant',
          content: 'I can help with your Supabase project.',
        },
      ])
    ).toEqual({
      projectContext: null,
      priorConversation: '[assistant]\nI can help with your Supabase project.',
      currentUserInput: null,
      lastAssistantTurn: null,
    })
  })
})