add UTF8-safe truncation utility function

This commit is contained in:
Shivaram Lingamneni 2021-03-18 03:43:31 -04:00
parent f3d1c7c294
commit debcdc124d
2 changed files with 58 additions and 0 deletions

25
ircutils/unicode.go Normal file

@ -0,0 +1,25 @@
// Copyright (c) 2021 Shivaram Lingamneni
// Released under the MIT License
package ircutils
import (
"unicode/utf8"
)
// truncate a message, taking care not to make valid UTF8 into invalid UTF8
func TruncateUTF8Safe(message string, byteLimit int) (result string) {
if len(message) <= byteLimit {
return message
}
message = message[:byteLimit]
for i := 0; i < (utf8.UTFMax - 1); i++ {
r, n := utf8.DecodeLastRuneInString(message)
if r == utf8.RuneError && n <= 1 {
message = message[:len(message)-1]
} else {
break
}
}
return message
}

33
ircutils/unicode_test.go Normal file

@ -0,0 +1,33 @@
// Copyright (c) 2021 Shivaram Lingamneni
// Released under the MIT License
package ircutils
import (
"fmt"
"reflect"
"testing"
)
func assertEqual(found, expected interface{}) {
if !reflect.DeepEqual(found, expected) {
panic(fmt.Sprintf("expected %#v, found %#v", expected, found))
}
}
func TestTruncateUTF8(t *testing.T) {
assertEqual(TruncateUTF8Safe("fffff", 512), "fffff")
assertEqual(TruncateUTF8Safe("fffff", 5), "fffff")
assertEqual(TruncateUTF8Safe("ffffff", 5), "fffff")
assertEqual(TruncateUTF8Safe("ffffffffff", 5), "fffff")
assertEqual(TruncateUTF8Safe("12345🐬", 9), "12345🐬")
assertEqual(TruncateUTF8Safe("12345🐬", 8), "12345")
assertEqual(TruncateUTF8Safe("12345🐬", 7), "12345")
assertEqual(TruncateUTF8Safe("12345🐬", 6), "12345")
assertEqual(TruncateUTF8Safe("12345", 5), "12345")
assertEqual(TruncateUTF8Safe("\xff\xff\xff\xff\xff\xff", 512), "\xff\xff\xff\xff\xff\xff")
assertEqual(TruncateUTF8Safe("\xff\xff\xff\xff\xff\xff", 6), "\xff\xff\xff\xff\xff\xff")
// shouldn't truncate the whole string
assertEqual(TruncateUTF8Safe("\xff\xff\xff\xff\xff\xff", 5), "\xff\xff")
}