Strip UTF-8-encoded C1 control characters from rendered items

The display sanitizer already stripped raw 8-bit C1 bytes (0x80-0x9F)
because they decode to RuneError as standalone bytes. Their valid UTF-8
encodings (0xC2 0x80 .. 0xC2 0x9F) decode to the same code points but
were passed through, allowing a filename or input line containing CSI
(U+009B), OSC (U+009D), or DCS (U+0090) to inject terminal control
sequences when rendered.
This commit is contained in:
Junegunn Choi
2026-05-05 09:56:59 +09:00
parent b4a86a9c8a
commit 263eb4732f
+2 -1
View File
@@ -67,7 +67,8 @@ func (r *LightRenderer) stderrInternal(str string, allowNLCR bool, resetCode str
for len(bytes) > 0 {
r, sz := utf8.DecodeRune(bytes)
nlcr := r == '\n' || r == '\r'
if r >= 32 || r == '\x1b' || nlcr {
isC1 := r >= 0x80 && r <= 0x9F
if (r >= 32 && !isC1) || r == '\x1b' || nlcr {
if nlcr && !allowNLCR {
if r == '\r' {
runes = append(runes, []rune(CR+resetCode)...)