Strip UTF-8-encoded C1 control characters from rendered items

The display sanitizer already stripped raw 8-bit C1 bytes (0x80-0x9F) because they decode to RuneError as standalone bytes. Their valid UTF-8 encodings (0xC2 0x80 .. 0xC2 0x9F) decode to the same code points but were passed through, allowing a filename or input line containing CSI (U+009B), OSC (U+009D), or DCS (U+0090) to inject terminal control sequences when rendered.
2026-05-06 05:26:45 -04:00 · 2026-05-05 09:56:59 +09:00
parent b4a86a9c8a
commit 263eb4732f
1 changed files with 2 additions and 1 deletions
@@ -67,7 +67,8 @@ func (r *LightRenderer) stderrInternal(str string, allowNLCR bool, resetCode str
 	for len(bytes) > 0 {
 		r, sz := utf8.DecodeRune(bytes)
 		nlcr := r == '\n' || r == '\r'
-		if r >= 32 || r == '\x1b' || nlcr {
+		isC1 := r >= 0x80 && r <= 0x9F
+		if (r >= 32 && !isC1) || r == '\x1b' || nlcr {
 			if nlcr && !allowNLCR {
 				if r == '\r' {
 					runes = append(runes, []rune(CR+resetCode)...)