ircutils: add SanitizeText

This commit is contained in:
Shivaram Lingamneni 2021-04-23 11:30:37 -04:00
parent 4b81d501cc
commit 97bbb1c210
2 changed files with 50 additions and 0 deletions

View File

@ -4,6 +4,8 @@
package ircutils
import (
"strings"
"unicode"
"unicode/utf8"
)
@ -23,3 +25,38 @@ func TruncateUTF8Safe(message string, byteLimit int) (result string) {
}
return message
}
// Sanitizes human-readable text to make it safe for IRC;
// assumes UTF-8 and uses the replacement character where
// applicable.
func SanitizeText(message string, byteLimit int) (result string) {
var buf strings.Builder
for _, r := range message {
if r == '\x00' || r == '\r' {
continue
} else if r == '\n' {
if buf.Len()+2 <= byteLimit {
buf.WriteString(" ")
continue
} else {
break
}
} else if unicode.IsSpace(r) {
if buf.Len()+1 <= byteLimit {
buf.WriteString(" ")
} else {
break
}
} else {
rLen := utf8.RuneLen(r)
if buf.Len()+rLen <= byteLimit {
buf.WriteRune(r)
} else {
break
}
}
}
return buf.String()
}

View File

@ -31,3 +31,16 @@ func TestTruncateUTF8(t *testing.T) {
// shouldn't truncate the whole string
assertEqual(TruncateUTF8Safe("\xff\xff\xff\xff\xff\xff", 5), "\xff\xff")
}
func TestSanitize(t *testing.T) {
assertEqual(SanitizeText("abc", 10), "abc")
assertEqual(SanitizeText("abcdef", 5), "abcde")
assertEqual(SanitizeText("shivaram\x00shivaram\x00shivarampassphrase", 400), "shivaramshivaramshivarampassphrase")
assertEqual(SanitizeText("the quick brown fox\xffjumps over the lazy dog", 400), "the quick brown fox\xef\xbf\xbdjumps over the lazy dog")
// \r ignored, \n is two spaces
assertEqual(SanitizeText("the quick brown fox\r\njumps over the lazy dog", 400), "the quick brown fox jumps over the lazy dog")
assertEqual(SanitizeText("the quick brown fox\njumps over the lazy dog", 400), "the quick brown fox jumps over the lazy dog")
}