diff --git a/ircutils/unicode.go b/ircutils/unicode.go new file mode 100644 index 0000000..14f628e --- /dev/null +++ b/ircutils/unicode.go @@ -0,0 +1,25 @@ +// Copyright (c) 2021 Shivaram Lingamneni +// Released under the MIT License + +package ircutils + +import ( + "unicode/utf8" +) + +// truncate a message, taking care not to make valid UTF8 into invalid UTF8 +func TruncateUTF8Safe(message string, byteLimit int) (result string) { + if len(message) <= byteLimit { + return message + } + message = message[:byteLimit] + for i := 0; i < (utf8.UTFMax - 1); i++ { + r, n := utf8.DecodeLastRuneInString(message) + if r == utf8.RuneError && n <= 1 { + message = message[:len(message)-1] + } else { + break + } + } + return message +} diff --git a/ircutils/unicode_test.go b/ircutils/unicode_test.go new file mode 100644 index 0000000..3e0c096 --- /dev/null +++ b/ircutils/unicode_test.go @@ -0,0 +1,33 @@ +// Copyright (c) 2021 Shivaram Lingamneni +// Released under the MIT License + +package ircutils + +import ( + "fmt" + "reflect" + "testing" +) + +func assertEqual(found, expected interface{}) { + if !reflect.DeepEqual(found, expected) { + panic(fmt.Sprintf("expected %#v, found %#v", expected, found)) + } +} +func TestTruncateUTF8(t *testing.T) { + assertEqual(TruncateUTF8Safe("fffff", 512), "fffff") + assertEqual(TruncateUTF8Safe("fffff", 5), "fffff") + assertEqual(TruncateUTF8Safe("ffffff", 5), "fffff") + assertEqual(TruncateUTF8Safe("ffffffffff", 5), "fffff") + + assertEqual(TruncateUTF8Safe("12345🐬", 9), "12345🐬") + assertEqual(TruncateUTF8Safe("12345🐬", 8), "12345") + assertEqual(TruncateUTF8Safe("12345🐬", 7), "12345") + assertEqual(TruncateUTF8Safe("12345🐬", 6), "12345") + assertEqual(TruncateUTF8Safe("12345", 5), "12345") + + assertEqual(TruncateUTF8Safe("\xff\xff\xff\xff\xff\xff", 512), "\xff\xff\xff\xff\xff\xff") + assertEqual(TruncateUTF8Safe("\xff\xff\xff\xff\xff\xff", 6), "\xff\xff\xff\xff\xff\xff") + // shouldn't truncate the whole string + assertEqual(TruncateUTF8Safe("\xff\xff\xff\xff\xff\xff", 5), "\xff\xff") +}