add UTF8-safe truncation utility function
This commit is contained in:
parent
f3d1c7c294
commit
debcdc124d
25
ircutils/unicode.go
Normal file
25
ircutils/unicode.go
Normal file
@ -0,0 +1,25 @@
|
||||
// Copyright (c) 2021 Shivaram Lingamneni
|
||||
// Released under the MIT License
|
||||
|
||||
package ircutils
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// truncate a message, taking care not to make valid UTF8 into invalid UTF8
|
||||
func TruncateUTF8Safe(message string, byteLimit int) (result string) {
|
||||
if len(message) <= byteLimit {
|
||||
return message
|
||||
}
|
||||
message = message[:byteLimit]
|
||||
for i := 0; i < (utf8.UTFMax - 1); i++ {
|
||||
r, n := utf8.DecodeLastRuneInString(message)
|
||||
if r == utf8.RuneError && n <= 1 {
|
||||
message = message[:len(message)-1]
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return message
|
||||
}
|
33
ircutils/unicode_test.go
Normal file
33
ircutils/unicode_test.go
Normal file
@ -0,0 +1,33 @@
|
||||
// Copyright (c) 2021 Shivaram Lingamneni
|
||||
// Released under the MIT License
|
||||
|
||||
package ircutils
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func assertEqual(found, expected interface{}) {
|
||||
if !reflect.DeepEqual(found, expected) {
|
||||
panic(fmt.Sprintf("expected %#v, found %#v", expected, found))
|
||||
}
|
||||
}
|
||||
func TestTruncateUTF8(t *testing.T) {
|
||||
assertEqual(TruncateUTF8Safe("fffff", 512), "fffff")
|
||||
assertEqual(TruncateUTF8Safe("fffff", 5), "fffff")
|
||||
assertEqual(TruncateUTF8Safe("ffffff", 5), "fffff")
|
||||
assertEqual(TruncateUTF8Safe("ffffffffff", 5), "fffff")
|
||||
|
||||
assertEqual(TruncateUTF8Safe("12345🐬", 9), "12345🐬")
|
||||
assertEqual(TruncateUTF8Safe("12345🐬", 8), "12345")
|
||||
assertEqual(TruncateUTF8Safe("12345🐬", 7), "12345")
|
||||
assertEqual(TruncateUTF8Safe("12345🐬", 6), "12345")
|
||||
assertEqual(TruncateUTF8Safe("12345", 5), "12345")
|
||||
|
||||
assertEqual(TruncateUTF8Safe("\xff\xff\xff\xff\xff\xff", 512), "\xff\xff\xff\xff\xff\xff")
|
||||
assertEqual(TruncateUTF8Safe("\xff\xff\xff\xff\xff\xff", 6), "\xff\xff\xff\xff\xff\xff")
|
||||
// shouldn't truncate the whole string
|
||||
assertEqual(TruncateUTF8Safe("\xff\xff\xff\xff\xff\xff", 5), "\xff\xff")
|
||||
}
|
Loading…
Reference in New Issue
Block a user