Files
supabase/apps/studio/lib/ai/model.utils.test.ts
Matt Rossman 0c5f64fcba feat(assistant): upgrade default models to gpt-5.4-nano and gpt-5.3-codex (#44107)
Replaces `gpt-5-mini` and `gpt-5` with `gpt-5.4-nano` and
`gpt-5.3-codex` respectively. Clients with stale model IDs in IndexedDB
will gracefully reset to the new defaults. While we can technically keep
the existing models around, we've
[opted](https://supabase.slack.com/archives/C051L8U2EJF/p1774283070517609?thread_ts=1773771991.871669&cid=C051L8U2EJF)
to replace them w/ the newer models for simplicity. Basic completion
endpoints use `'none'` reasoning level for optimal speed.

Rationale for these models is they provide they best balance of
intelligence/speed and cost. GPT-5.4-nano is less expensive (0.8x
price), faster, and smarter than GPT-5-mini. GPT-5.4-mini would be even
smarter but is 3x the price. GPT-5.3-Codex is ~1.4x the price of GPT-5,
while GPT-5.4 would be 2x price, but 5.3-Codex is still a big
intelligence boost from GPT-5.

See [eval
comparison](https://www.braintrust.dev/app/supabase.io/p/Assistant/experiments/mattrossman%2Fai-509-v2-upgrade-assistant-models-beyond-gpt-5-family-1774468619?c=master-1774458837&diff=between_experiments),
scores are relatively stable and conciseness naturally improves on
gpt-5.4-nano.

Other change:
- Fixed an eval test case to clarify that https://supabase.help is also
a correct URL for submitting support ticket, which was unfairly scored
as incorrect
[here](https://www.braintrust.dev/app/supabase.io/p/Assistant/trace?object_type=experiment&object_id=5244cccd-23b2-4f79-9dd2-287f1b40ebad&r=bac9b903-8bde-4c21-99dd-e0ed141c4f9e&s=f248fbf5-75bf-4aab-be0a-87a4298e6d11)

I sanity checked the Assistant, natural language filters, and SQL Editor
completions on staging preview.

References:
- https://openai.com/index/introducing-gpt-5-4-mini-and-nano/
- https://openai.com/index/introducing-gpt-5-3-codex/
- https://developers.openai.com/api/docs/pricing

Closes AI-509
2026-03-26 14:35:54 +08:00

162 lines
6.0 KiB
TypeScript

import { describe, expect, it } from 'vitest'
import {
ASSISTANT_MODELS,
DEFAULT_ASSISTANT_ADVANCE_MODEL_ID,
DEFAULT_ASSISTANT_BASE_MODEL_ID,
DEFAULT_COMPLETION_MODEL,
defaultAssistantModelId,
getAssistantModelEntry,
getDefaultModelForProvider,
isAdvanceOnlyModelId,
isAssistantBaseModelId,
isKnownAssistantModelId,
openaiModelEntry,
PROVIDERS,
} from './model.utils'
import type { ProviderName } from './model.utils'
describe('model.utils', () => {
describe('getDefaultModelForProvider', () => {
it('should return correct default for bedrock provider', () => {
const result = getDefaultModelForProvider('bedrock')
expect(result).toBe('openai.gpt-oss-120b-1:0')
})
it('should return correct default for openai provider', () => {
const result = getDefaultModelForProvider('openai')
expect(result).toBe('gpt-5.4-nano')
})
it('should return undefined for unknown provider', () => {
const result = getDefaultModelForProvider('unknown' as ProviderName)
expect(result).toBeUndefined()
})
})
describe('PROVIDERS registry', () => {
it('should have bedrock provider with models', () => {
expect(PROVIDERS.bedrock).toBeDefined()
expect(PROVIDERS.bedrock.models).toBeDefined()
expect(Object.keys(PROVIDERS.bedrock.models)).toContain(
'anthropic.claude-3-7-sonnet-20250219-v1:0'
)
expect(Object.keys(PROVIDERS.bedrock.models)).toContain('openai.gpt-oss-120b-1:0')
})
it('should have openai provider with models', () => {
expect(PROVIDERS.openai).toBeDefined()
expect(PROVIDERS.openai.models).toBeDefined()
expect(Object.keys(PROVIDERS.openai.models)).toContain('gpt-5.3-codex')
expect(Object.keys(PROVIDERS.openai.models)).toContain('gpt-5.4-nano')
})
it('should have exactly one default model per provider', () => {
const providers: ProviderName[] = ['bedrock', 'openai']
providers.forEach((provider) => {
const models = PROVIDERS[provider].models
const defaultModels = Object.entries(models).filter(([_, config]) => config.default)
expect(defaultModels.length).toBe(1)
})
})
it('should have valid model configurations', () => {
const providers: ProviderName[] = ['bedrock', 'openai']
providers.forEach((provider) => {
const models = PROVIDERS[provider].models
Object.entries(models).forEach(([_modelId, config]) => {
expect(config).toHaveProperty('default')
expect(typeof config.default).toBe('boolean')
})
})
})
it('should have bedrock model with promptProviderOptions', () => {
const sonnetModel = PROVIDERS.bedrock.models['anthropic.claude-3-7-sonnet-20250219-v1:0']
expect(sonnetModel.promptProviderOptions).toBeDefined()
expect(sonnetModel.promptProviderOptions?.bedrock).toBeDefined()
expect(sonnetModel.promptProviderOptions?.bedrock?.cachePoint).toEqual({
type: 'default',
})
})
it('should have openai provider with providerOptions', () => {
expect(PROVIDERS.openai.providerOptions).toBeDefined()
expect(PROVIDERS.openai.providerOptions?.openai).toBeDefined()
expect(PROVIDERS.openai.providerOptions?.openai?.reasoningEffort).toBeUndefined()
})
})
describe('assistant model registry', () => {
it('should have non-empty base and advance tiers', () => {
expect(
ASSISTANT_MODELS.filter((m) => !m.requiresAdvanceModelEntitlement).length
).toBeGreaterThan(0)
expect(
ASSISTANT_MODELS.filter((m) => m.requiresAdvanceModelEntitlement).length
).toBeGreaterThan(0)
})
it('all model IDs should be unique', () => {
const ids = ASSISTANT_MODELS.map((m) => m.id)
expect(new Set(ids).size).toBe(ids.length)
})
it('should have all models in openai provider registry', () => {
ASSISTANT_MODELS.forEach((entry) => {
expect(Object.keys(PROVIDERS.openai.models)).toContain(entry.id)
})
})
it('defaults should satisfy unions', () => {
expect(DEFAULT_ASSISTANT_BASE_MODEL_ID).toBe('gpt-5.4-nano')
expect(DEFAULT_ASSISTANT_ADVANCE_MODEL_ID).toBe('gpt-5.3-codex')
expect(defaultAssistantModelId(false)).toBe(DEFAULT_ASSISTANT_BASE_MODEL_ID)
expect(defaultAssistantModelId(true)).toBe(DEFAULT_ASSISTANT_ADVANCE_MODEL_ID)
})
it('isAssistantBaseModelId / isAdvanceOnlyModelId', () => {
expect(isAssistantBaseModelId('gpt-5.4-nano')).toBe(true)
expect(isAssistantBaseModelId('gpt-5.3-codex')).toBe(false)
expect(isAdvanceOnlyModelId('gpt-5.3-codex')).toBe(true)
expect(isAdvanceOnlyModelId('gpt-5.4-nano')).toBe(false)
})
it('isKnownAssistantModelId', () => {
expect(isKnownAssistantModelId('gpt-5.4-nano')).toBe(true)
expect(isKnownAssistantModelId('gpt-5.3-codex')).toBe(true)
expect(isKnownAssistantModelId('gpt-5')).toBe(false)
expect(isKnownAssistantModelId('gpt-5-mini')).toBe(false)
expect(isKnownAssistantModelId('unknown')).toBe(false)
})
it('getAssistantModelEntry returns config for known ids', () => {
expect(getAssistantModelEntry('gpt-5.4-nano').reasoningEffort).toBe('low')
expect(getAssistantModelEntry('gpt-5.3-codex').reasoningEffort).toBe('low')
expect(getAssistantModelEntry('gpt-5.4-nano')).toEqual(
ASSISTANT_MODELS.find((m) => m.id === 'gpt-5.4-nano')
)
})
it('DEFAULT_COMPLETION_MODEL is gpt-5.4-nano with no reasoning effort', () => {
expect(DEFAULT_COMPLETION_MODEL.id).toBe(DEFAULT_ASSISTANT_BASE_MODEL_ID)
expect(DEFAULT_COMPLETION_MODEL.reasoningEffort).toBe('none')
})
it('openaiModelEntry enforces valid reasoning effort at compile time', () => {
// Valid: supported effort level
const withEffort = openaiModelEntry({
id: 'gpt-5.4-nano',
reasoningEffort: 'low',
})
expect(withEffort.reasoningEffort).toBe('low')
// Valid: no effort
const withoutEffort = openaiModelEntry({ id: 'gpt-5.4-nano' })
expect(withoutEffort.reasoningEffort).toBeUndefined()
})
})
})