Merge pull request #54 from slingamn/utf8_truncate
add UTF8-safe truncation utility function
This commit is contained in:
commit
bdc2c2cd2f
25
ircutils/unicode.go
Normal file
25
ircutils/unicode.go
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
// Copyright (c) 2021 Shivaram Lingamneni
|
||||||
|
// Released under the MIT License
|
||||||
|
|
||||||
|
package ircutils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unicode/utf8"
|
||||||
|
)
|
||||||
|
|
||||||
|
// truncate a message, taking care not to make valid UTF8 into invalid UTF8
|
||||||
|
func TruncateUTF8Safe(message string, byteLimit int) (result string) {
|
||||||
|
if len(message) <= byteLimit {
|
||||||
|
return message
|
||||||
|
}
|
||||||
|
message = message[:byteLimit]
|
||||||
|
for i := 0; i < (utf8.UTFMax - 1); i++ {
|
||||||
|
r, n := utf8.DecodeLastRuneInString(message)
|
||||||
|
if r == utf8.RuneError && n <= 1 {
|
||||||
|
message = message[:len(message)-1]
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return message
|
||||||
|
}
|
33
ircutils/unicode_test.go
Normal file
33
ircutils/unicode_test.go
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
// Copyright (c) 2021 Shivaram Lingamneni
|
||||||
|
// Released under the MIT License
|
||||||
|
|
||||||
|
package ircutils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func assertEqual(found, expected interface{}) {
|
||||||
|
if !reflect.DeepEqual(found, expected) {
|
||||||
|
panic(fmt.Sprintf("expected %#v, found %#v", expected, found))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func TestTruncateUTF8(t *testing.T) {
|
||||||
|
assertEqual(TruncateUTF8Safe("fffff", 512), "fffff")
|
||||||
|
assertEqual(TruncateUTF8Safe("fffff", 5), "fffff")
|
||||||
|
assertEqual(TruncateUTF8Safe("ffffff", 5), "fffff")
|
||||||
|
assertEqual(TruncateUTF8Safe("ffffffffff", 5), "fffff")
|
||||||
|
|
||||||
|
assertEqual(TruncateUTF8Safe("12345🐬", 9), "12345🐬")
|
||||||
|
assertEqual(TruncateUTF8Safe("12345🐬", 8), "12345")
|
||||||
|
assertEqual(TruncateUTF8Safe("12345🐬", 7), "12345")
|
||||||
|
assertEqual(TruncateUTF8Safe("12345🐬", 6), "12345")
|
||||||
|
assertEqual(TruncateUTF8Safe("12345", 5), "12345")
|
||||||
|
|
||||||
|
assertEqual(TruncateUTF8Safe("\xff\xff\xff\xff\xff\xff", 512), "\xff\xff\xff\xff\xff\xff")
|
||||||
|
assertEqual(TruncateUTF8Safe("\xff\xff\xff\xff\xff\xff", 6), "\xff\xff\xff\xff\xff\xff")
|
||||||
|
// shouldn't truncate the whole string
|
||||||
|
assertEqual(TruncateUTF8Safe("\xff\xff\xff\xff\xff\xff", 5), "\xff\xff")
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user