Files
supabase/apps/studio/lib/sql-event-parser.ts
T
Alaister Young 2555e81dde fix(studio): don't scan dollar-quoted bodies for DDL in SQL editor (#45050)
Fixes a false positive in the CREATE-TABLE-without-RLS warning modal
added in #45008. The warning was firing on `CREATE FUNCTION` statements
because the `SELECT..INTO` detector was matching plpgsql variable
assignments inside `$$…$$` function bodies.

Reported example that triggered the modal with no table actually being
created:

```sql
create or replace function schema_checks()
returns jsonb
language plpgsql
as $$
declare
  ret jsonb;
begin
  select jsonb_build_object('value', 'ok') into ret;
  return ret;
end;
$$;
```

**Changed:**
- `SQLEventParser.match()` now strips the body of `$tag$…$tag$` blocks
before running detectors. Tags are kept as markers; content is blanked
out so function bodies, DO blocks, and dollar-quoted string literals are
never scanned as DDL.
- Updated a pre-existing parser test that asserted the buggy behaviour
(it expected `CREATE TABLE fake` inside a `$$…$$` string literal to be
detected — `$$…$$` is a string literal in Postgres, not DDL).

**Added:**
- Regression tests in `SQLEditor.utils.test.ts` covering: the exact
reported function, DO blocks with `select into`, `create table` text
inside a function body, mixed top-level `CREATE TABLE` + function with
`INTO` assignments, and custom `$body$…$body$` tags.
- Parser-level regression test in `sql-event-parser.test.ts`.

## To test

- In the SQL editor, paste the function from the Slack report and run it
— the RLS warning modal should not appear.
- Run `create table foo (id int8 primary key);` on its own — modal still
appears as before.
- Run `create table foo (id int8); create or replace function bar()
returns int language plpgsql as $$ declare v int; begin select 1 into v;
return v; end; $$;` — modal should flag only `foo`, not `v`.
- Run an existing destructive query (`drop table x`) — unaffected, modal
still works.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **Bug Fixes**
* Parser no longer treats DDL/DML-like text inside PL/pgSQL functions,
DO blocks, or dollar-quoted bodies (including nested/custom tags) as
top-level CREATE TABLE/SELECT INTO, preventing false detections and UI
warnings.

* **Tests**
* Added unit and e2e regression tests covering dollar-quoted blocks,
nested dollar tags, DO blocks, SELECT INTO inside functions, and
positive controls with a real top-level CREATE TABLE.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: Alaister Young <10985857+alaister@users.noreply.github.com>
2026-04-20 21:19:28 +08:00

149 lines
5.2 KiB
TypeScript

/**
* Lightweight SQL parser for telemetry event detection.
*
* [Sean] Replace this with a proper SQL parser like `@supabase/pg-parser` once a
* browser-compatible version is available.
*/
import { TABLE_EVENT_ACTIONS, TableEventAction } from 'common/telemetry-constants'
export interface TableEventDetails {
type: TableEventAction
schema?: string
tableName?: string
}
type Detector = {
type: TableEventAction
patterns: RegExp[]
}
export class SQLEventParser {
private static DETECTORS: Detector[] = [
{
type: TABLE_EVENT_ACTIONS.TableCreated,
patterns: [
/CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?(?<schema>(?:"[^"]+"|[\w]+)\.)?(?<table>(?:"(?:[^"]|"")+"|`(?:[^`]|``)+`|[\w]+))/i,
/CREATE\s+TEMP(?:ORARY)?\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?(?<schema>(?:"[^"]+"|[\w]+)\.)?(?<table>(?:"(?:[^"]|"")+"|`(?:[^`]|``)+`|[\w]+))/i,
/CREATE\s+UNLOGGED\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?(?<schema>(?:"[^"]+"|[\w]+)\.)?(?<table>(?:"(?:[^"]|"")+"|`(?:[^`]|``)+`|[\w]+))/i,
/SELECT\s+.*?\s+INTO\s+(?<schema>(?:"[^"]+"|[\w]+)\.)?(?<table>(?:"(?:[^"]|"")+"|`(?:[^`]|``)+`|[\w]+))/is,
/CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?(?<schema>(?:"[^"]+"|[\w]+)\.)?(?<table>(?:"(?:[^"]|"")+"|`(?:[^`]|``)+`|[\w]+))\s+AS\s+SELECT/i,
],
},
{
type: TABLE_EVENT_ACTIONS.TableDataAdded,
patterns: [
/INSERT\s+INTO\s+(?<schema>(?:"[^"]+"|[\w]+)\.)?(?<table>(?:"(?:[^"]|"")+"|`(?:[^`]|``)+`|[\w]+))/i,
/COPY\s+(?<schema>(?:"[^"]+"|[\w]+)\.)?(?<table>(?:"(?:[^"]|"")+"|`(?:[^`]|``)+`|[\w]+))\s+FROM/i,
],
},
{
type: TABLE_EVENT_ACTIONS.TableRLSEnabled,
patterns: [
/ALTER\s+TABLE\s+(?<schema>(?:"[^"]+"|[\w]+)\.)?(?<table>(?:"(?:[^"]|"")+"|`(?:[^`]|``)+`|[\w]+)).*?ENABLE\s+ROW\s+LEVEL\s+SECURITY/i,
/ALTER\s+TABLE\s+(?<schema>(?:"[^"]+"|[\w]+)\.)?(?<table>(?:"(?:[^"]|"")+"|`(?:[^`]|``)+`|[\w]+)).*?ENABLE\s+RLS/i,
],
},
]
private cleanIdentifier(identifier?: string) {
return identifier?.replace(/["`']/g, '').replace(/\.$/, '')
}
// Blank out the body of $tag$...$tag$ blocks (PL/pgSQL function bodies, DO
// blocks, dollar-quoted string literals) so their contents aren't scanned for
// DDL. A `select ... into var` inside a function body is variable assignment,
// not table creation, and would otherwise trip the SELECT..INTO detector.
//
// The backreference \1 forces opening and closing tags to match, so a nested
// inner block with a different tag (e.g. $fn$ containing $sql$...$sql$) is
// consumed as part of the outer body instead of being paired as the outer.
//
// Must run before statement splitting — splitStatements' dollar-quote regex
// doesn't enforce matching tags, so inner semicolons would otherwise leak
// out and fragment the function body across statements.
private stripDollarQuoteBodies(sql: string): string {
return sql.replace(/(\$[a-zA-Z0-9_]*\$)[\s\S]*?\1/g, '$1$1')
}
private match(sql: string): TableEventDetails | null {
for (const { type, patterns } of SQLEventParser.DETECTORS) {
for (const pattern of patterns) {
const match = sql.match(pattern)
if (match?.groups) {
return {
type,
schema: this.cleanIdentifier(match.groups.schema),
tableName: this.cleanIdentifier(match.groups.table ?? match.groups.object),
}
}
}
}
return null
}
private splitStatements(sql: string): string[] {
// Regex matches:
// - single quotes ('...') with escapes
// - double quotes ("...")
// - dollar-quoted blocks ($$...$$ or $tag$...$tag$)
// - semicolons
// - everything else
const tokens =
sql.match(
/'([^']|'')*'|"([^"]|"")*"|\$[a-zA-Z0-9_]*\$[\s\S]*?\$[a-zA-Z0-9_]*\$|;|[^'"$;]+/g
) || []
const statements: string[] = []
let current = ''
for (const token of tokens) {
if (token === ';') {
if (current.trim()) statements.push(current.trim())
current = ''
} else {
current += token
}
}
if (current.trim()) {
statements.push(current.trim())
}
return statements
}
private deduplicate(events: TableEventDetails[]): TableEventDetails[] {
const seen = new Set<string>()
return events.filter((e) => {
const key = `${e.type}:${e.schema || ''}:${e.tableName || ''}`
if (seen.has(key)) return false
seen.add(key)
return true
})
}
private removeComments(sql: string): string {
return sql
.replace(/--.*?$/gm, '') // line comments
.replace(/\/\*[\s\S]*?\*\//g, '') // block comments
}
getTableEvents(sql: string): TableEventDetails[] {
// Order matters: strip dollar-quote bodies first so comment syntax inside
// a function body (which is just literal text in Postgres) isn't treated
// as a comment by removeComments, and so inner semicolons inside the body
// can't confuse splitStatements.
const statements = this.splitStatements(this.removeComments(this.stripDollarQuoteBodies(sql)))
const results: TableEventDetails[] = []
for (const stmt of statements) {
const event = this.match(stmt)
if (event) results.push(event)
}
return this.deduplicate(results)
}
}
export const sqlEventParser = new SQLEventParser()