Merge pull request #45 from slingamn/utf8_truncation.1
ircmsg: make truncation utf8-safe
This commit is contained in:
commit
78fec0a07e
@ -9,6 +9,7 @@ import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -34,17 +35,30 @@ const (
|
||||
var (
|
||||
// ErrorLineIsEmpty indicates that the given IRC line was empty.
|
||||
ErrorLineIsEmpty = errors.New("Line is empty")
|
||||
|
||||
// ErrorLineContainsBadChar indicates that the line contained invalid characters
|
||||
ErrorLineContainsBadChar = errors.New("Line contains invalid characters")
|
||||
// ErrorLineTooLong indicates that the message exceeded the maximum tag length
|
||||
// (the name references 417 ERR_INPUTTOOLONG; we reserve the right to return it
|
||||
// for messages that exceed the non-tag length limit)
|
||||
ErrorLineTooLong = errors.New("Line could not be parsed because a specified length limit was exceeded")
|
||||
|
||||
// ErrorBodyTooLong indicates that the message body exceeded the specified
|
||||
// length limit (typically 512 bytes). This error is non-fatal; if encountered
|
||||
// when parsing a message, the message is parsed up to the length limit, and
|
||||
// if encountered when serializing a message, the message is truncated to the limit.
|
||||
ErrorBodyTooLong = errors.New("Line body exceeded the specified length limit; outgoing messages will be truncated")
|
||||
|
||||
// ErrorTagsTooLong indicates that the message exceeded the maximum tag length
|
||||
// (the specified response on the server side is 417 ERR_INPUTTOOLONG).
|
||||
ErrorTagsTooLong = errors.New("Line could not be processed because its tag data exceeded the length limit")
|
||||
|
||||
// ErrorInvalidTagContent indicates that a tag name or value was invalid
|
||||
ErrorInvalidTagContent = errors.New("Line could not be processed because it contained an invalid tag name or value")
|
||||
|
||||
// ErrorCommandMissing indicates that an IRC message was invalid because it lacked a command.
|
||||
ErrorCommandMissing = errors.New("IRC messages MUST have a command")
|
||||
ErrorBadParam = errors.New("Cannot have an empty param, a param with spaces, or a param that starts with ':' before the last parameter")
|
||||
|
||||
// ErrorBadParam indicates that an IRC message could not be serialized because
|
||||
// its parameters violated the syntactic constraints on IRC parameters:
|
||||
// non-final parameters cannot be empty, contain a space, or start with `:`.
|
||||
ErrorBadParam = errors.New("Cannot have an empty param, a param with spaces, or a param that starts with ':' before the last parameter")
|
||||
)
|
||||
|
||||
// IRCMessage represents an IRC message, as defined by the RFCs and as
|
||||
@ -148,28 +162,36 @@ func ParseLine(line string) (ircmsg IRCMessage, err error) {
|
||||
// ParseLineStrict creates and returns an IRCMessage from the given IRC line,
|
||||
// taking the maximum length into account and truncating the message as appropriate.
|
||||
// If fromClient is true, it enforces the client limit on tag data length (4094 bytes),
|
||||
// allowing the server to return ERR_INPUTTOOLONG as appropriate. If truncateLen is
|
||||
// allowing the server to return ERR_INPUTTOOLONG as appropriate. If maxLenBody is
|
||||
// nonzero, it is the length at which the non-tag portion of the message is truncated.
|
||||
func ParseLineStrict(line string, fromClient bool, truncateLen int) (ircmsg IRCMessage, err error) {
|
||||
func ParseLineStrict(line string, fromClient bool, maxLenBody int) (ircmsg IRCMessage, err error) {
|
||||
maxTagDataLength := MaxlenTagData
|
||||
if fromClient {
|
||||
maxTagDataLength = MaxlenClientTagData
|
||||
}
|
||||
return parseLine(line, maxTagDataLength, truncateLen)
|
||||
return parseLine(line, maxTagDataLength, maxLenBody)
|
||||
}
|
||||
|
||||
// slice off any amount of ' ' from the front of the string
|
||||
func trimInitialSpaces(str string) string {
|
||||
var i int
|
||||
for i = 0; i < len(str) && str[i] == ' '; i += 1 {
|
||||
for i = 0; i < len(str) && str[i] == ' '; i++ {
|
||||
}
|
||||
return str[i:]
|
||||
}
|
||||
|
||||
func parseLine(line string, maxTagDataLength int, truncateLen int) (ircmsg IRCMessage, err error) {
|
||||
func parseLine(line string, maxTagDataLength int, maxLenBody int) (ircmsg IRCMessage, err error) {
|
||||
// remove either \n or \r\n from the end of the line:
|
||||
line = strings.TrimSuffix(line, "\n")
|
||||
line = strings.TrimSuffix(line, "\r")
|
||||
// whether we removed them ourselves, or whether they were removed previously,
|
||||
// they count against the line limit:
|
||||
if maxLenBody != 0 {
|
||||
if maxLenBody <= 2 {
|
||||
return ircmsg, ErrorLineIsEmpty
|
||||
}
|
||||
maxLenBody -= 2
|
||||
}
|
||||
// now validate for the 3 forbidden bytes:
|
||||
if strings.IndexByte(line, '\x00') != -1 || strings.IndexByte(line, '\n') != -1 || strings.IndexByte(line, '\r') != -1 {
|
||||
return ircmsg, ErrorLineContainsBadChar
|
||||
@ -187,7 +209,7 @@ func parseLine(line string, maxTagDataLength int, truncateLen int) (ircmsg IRCMe
|
||||
}
|
||||
tags := line[1:tagEnd]
|
||||
if 0 < maxTagDataLength && maxTagDataLength < len(tags) {
|
||||
return ircmsg, ErrorLineTooLong
|
||||
return ircmsg, ErrorTagsTooLong
|
||||
}
|
||||
err = ircmsg.parseTags(tags)
|
||||
if err != nil {
|
||||
@ -198,8 +220,8 @@ func parseLine(line string, maxTagDataLength int, truncateLen int) (ircmsg IRCMe
|
||||
}
|
||||
|
||||
// truncate if desired
|
||||
if 0 < truncateLen && truncateLen < len(line) {
|
||||
line = line[:truncateLen]
|
||||
if maxLenBody != 0 && maxLenBody < len(line) {
|
||||
err = ErrorBodyTooLong
|
||||
}
|
||||
|
||||
// modern: "These message parts, and parameters themselves, are separated
|
||||
@ -252,7 +274,7 @@ func parseLine(line string, maxTagDataLength int, truncateLen int) (ircmsg IRCMe
|
||||
line = line[paramEnd+1:]
|
||||
}
|
||||
|
||||
return ircmsg, nil
|
||||
return ircmsg, err
|
||||
}
|
||||
|
||||
// helper to parse tags
|
||||
@ -337,8 +359,8 @@ func paramRequiresTrailing(param string) bool {
|
||||
}
|
||||
|
||||
// line returns a sendable line created from an IRCMessage.
|
||||
func (ircmsg *IRCMessage) line(tagLimit, clientOnlyTagDataLimit, serverAddedTagDataLimit, truncateLen int) ([]byte, error) {
|
||||
if len(ircmsg.Command) < 1 {
|
||||
func (ircmsg *IRCMessage) line(tagLimit, clientOnlyTagDataLimit, serverAddedTagDataLimit, truncateLen int) (result []byte, err error) {
|
||||
if len(ircmsg.Command) == 0 {
|
||||
return nil, ErrorCommandMissing
|
||||
}
|
||||
|
||||
@ -382,10 +404,10 @@ func (ircmsg *IRCMessage) line(tagLimit, clientOnlyTagDataLimit, serverAddedTagD
|
||||
lenTags = buf.Len()
|
||||
|
||||
if 0 < tagLimit && tagLimit < buf.Len() {
|
||||
return nil, ErrorLineTooLong
|
||||
return nil, ErrorTagsTooLong
|
||||
}
|
||||
if (0 < clientOnlyTagDataLimit && clientOnlyTagDataLimit < lenClientOnlyTags) || (0 < serverAddedTagDataLimit && serverAddedTagDataLimit < lenRegularTags) {
|
||||
return nil, ErrorLineTooLong
|
||||
return nil, ErrorTagsTooLong
|
||||
}
|
||||
|
||||
if len(ircmsg.Prefix) > 0 {
|
||||
@ -408,18 +430,33 @@ func (ircmsg *IRCMessage) line(tagLimit, clientOnlyTagDataLimit, serverAddedTagD
|
||||
buf.WriteString(param)
|
||||
}
|
||||
|
||||
// truncate if desired
|
||||
// -2 for \r\n
|
||||
restLen := buf.Len() - lenTags
|
||||
if 0 < truncateLen && (truncateLen-2) < restLen {
|
||||
buf.Truncate(lenTags + (truncateLen - 2))
|
||||
// truncate if desired; leave 2 bytes over for \r\n:
|
||||
if truncateLen != 0 && (truncateLen-2) < (buf.Len()-lenTags) {
|
||||
err = ErrorBodyTooLong
|
||||
newBufLen := lenTags + (truncateLen - 2)
|
||||
buf.Truncate(newBufLen)
|
||||
// XXX: we may have truncated in the middle of a UTF8-encoded codepoint;
|
||||
// if so, remove additional bytes, stopping when the sequence either
|
||||
// ends in a valid codepoint, or we have removed 3 bytes (the maximum
|
||||
// length of the remnant of a once-valid, truncated codepoint; we don't
|
||||
// want to truncate the entire message if it wasn't UTF8 in the first
|
||||
// place).
|
||||
for i := 0; i < (utf8.UTFMax - 1); i++ {
|
||||
r, n := utf8.DecodeLastRune(buf.Bytes())
|
||||
if r == utf8.RuneError && n <= 1 {
|
||||
newBufLen--
|
||||
buf.Truncate(newBufLen)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
buf.WriteString("\r\n")
|
||||
|
||||
result := buf.Bytes()
|
||||
result = buf.Bytes()
|
||||
toValidate := result[:len(result)-2]
|
||||
if bytes.IndexByte(toValidate, '\x00') != -1 || bytes.IndexByte(toValidate, '\r') != -1 || bytes.IndexByte(toValidate, '\n') != -1 {
|
||||
return nil, ErrorLineContainsBadChar
|
||||
}
|
||||
return result, nil
|
||||
return result, err
|
||||
}
|
||||
|
@ -1,9 +1,12 @@
|
||||
package ircmsg
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type testcode struct {
|
||||
@ -11,30 +14,29 @@ type testcode struct {
|
||||
message IRCMessage
|
||||
}
|
||||
type testcodewithlen struct {
|
||||
raw string
|
||||
length int
|
||||
message IRCMessage
|
||||
raw string
|
||||
length int
|
||||
message IRCMessage
|
||||
truncateExpected bool
|
||||
}
|
||||
|
||||
var decodelentests = []testcodewithlen{
|
||||
{":dan-!d@localhost PRIVMSG dan #test :What a cool message\r\n", 20,
|
||||
MakeMessage(nil, "dan-!d@localhost", "PR")},
|
||||
{":dan-!d@localhost PRIVMSG dan #test :What a cool message\r\n", 22,
|
||||
MakeMessage(nil, "dan-!d@localhost", "PRIVMSG", "dan", "#test", "What a cool message"), true},
|
||||
{"@time=12732;re TEST *\r\n", 512,
|
||||
MakeMessage(map[string]string{"time": "12732", "re": ""}, "", "TEST", "*")},
|
||||
MakeMessage(map[string]string{"time": "12732", "re": ""}, "", "TEST", "*"), false},
|
||||
{"@time=12732;re TEST *\r\n", 512,
|
||||
MakeMessage(map[string]string{"time": "12732", "re": ""}, "", "TEST", "*")},
|
||||
MakeMessage(map[string]string{"time": "12732", "re": ""}, "", "TEST", "*"), false},
|
||||
{":dan- TESTMSG\r\n", 2048,
|
||||
MakeMessage(nil, "dan-", "TESTMSG")},
|
||||
{":dan- TESTMSG dan \r\n", 12,
|
||||
MakeMessage(nil, "dan-", "TESTMS")},
|
||||
MakeMessage(nil, "dan-", "TESTMSG"), false},
|
||||
{"TESTMSG\r\n", 6,
|
||||
MakeMessage(nil, "", "TESTMS")},
|
||||
MakeMessage(nil, "", "TESTMSG"), true},
|
||||
{"TESTMSG\r\n", 7,
|
||||
MakeMessage(nil, "", "TESTMSG")},
|
||||
MakeMessage(nil, "", "TESTMSG"), true},
|
||||
{"TESTMSG\r\n", 8,
|
||||
MakeMessage(nil, "", "TESTMSG")},
|
||||
MakeMessage(nil, "", "TESTMSG"), true},
|
||||
{"TESTMSG\r\n", 9,
|
||||
MakeMessage(nil, "", "TESTMSG")},
|
||||
MakeMessage(nil, "", "TESTMSG"), false},
|
||||
}
|
||||
|
||||
// map[string]string{"time": "12732", "re": ""}
|
||||
@ -100,15 +102,22 @@ var decodetesterrors = []testparseerror{
|
||||
{"privmsg #channel :command injection attempt \r:Nickserv PRIVMSG user :Please re-enter your password", ErrorLineContainsBadChar},
|
||||
}
|
||||
|
||||
func validateTruncateError(pair testcodewithlen, err error, t *testing.T) {
|
||||
if pair.truncateExpected {
|
||||
if err != ErrorBodyTooLong {
|
||||
t.Error("For", pair.raw, "expected truncation, but got error", err)
|
||||
}
|
||||
} else {
|
||||
if err != nil {
|
||||
t.Error("For", pair.raw, "expected no error, but got", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecode(t *testing.T) {
|
||||
for _, pair := range decodelentests {
|
||||
ircmsg, err := ParseLineStrict(pair.raw, true, pair.length)
|
||||
if err != nil {
|
||||
t.Error(
|
||||
"For", pair.raw,
|
||||
"Failed to parse line:", err,
|
||||
)
|
||||
}
|
||||
validateTruncateError(pair, err, t)
|
||||
|
||||
if !reflect.DeepEqual(ircmsg, pair.message) {
|
||||
t.Error(
|
||||
@ -159,11 +168,11 @@ var encodetests = []testcode{
|
||||
}
|
||||
var encodelentests = []testcodewithlen{
|
||||
{":dan-!d@lo\r\n", 12,
|
||||
MakeMessage(nil, "dan-!d@localhost", "PRIVMSG", "dan", "#test", "What a cool message")},
|
||||
MakeMessage(nil, "dan-!d@localhost", "PRIVMSG", "dan", "#test", "What a cool message"), true},
|
||||
{"@time=12732 TEST *\r\n", 52,
|
||||
MakeMessage(map[string]string{"time": "12732"}, "", "TEST", "*")},
|
||||
MakeMessage(map[string]string{"time": "12732"}, "", "TEST", "*"), false},
|
||||
{"@riohwihowihirgowihre TEST *\r\n", 8,
|
||||
MakeMessage(map[string]string{"riohwihowihirgowihre": ""}, "", "TEST", "*", "*")},
|
||||
MakeMessage(map[string]string{"riohwihowihirgowihre": ""}, "", "TEST", "*", "*"), true},
|
||||
}
|
||||
|
||||
func TestEncode(t *testing.T) {
|
||||
@ -203,12 +212,7 @@ func TestEncode(t *testing.T) {
|
||||
}
|
||||
for _, pair := range encodelentests {
|
||||
line, err := pair.message.LineBytesStrict(true, pair.length)
|
||||
if err != nil {
|
||||
t.Error(
|
||||
"For", pair.raw,
|
||||
"Failed to parse line:", err,
|
||||
)
|
||||
}
|
||||
validateTruncateError(pair, err, t)
|
||||
|
||||
if string(line) != pair.raw {
|
||||
t.Error(
|
||||
@ -373,7 +377,7 @@ func TestErrorLineTooLongGeneration(t *testing.T) {
|
||||
message.SetTag(fmt.Sprintf("+client-tag-%d", i), "ok")
|
||||
}
|
||||
line, err = message.LineBytesStrict(true, 0)
|
||||
if err != ErrorLineTooLong {
|
||||
if err != ErrorTagsTooLong {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
@ -382,7 +386,7 @@ func TestErrorLineTooLongGeneration(t *testing.T) {
|
||||
message.SetTag(fmt.Sprintf("server-tag-%d", i), "ok")
|
||||
}
|
||||
line, err = message.LineBytesStrict(true, 0)
|
||||
if err != ErrorLineTooLong {
|
||||
if err != ErrorTagsTooLong {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
@ -394,7 +398,7 @@ func TestErrorLineTooLongGeneration(t *testing.T) {
|
||||
}
|
||||
// client cannot send this much tag data:
|
||||
line, err = message.LineBytesStrict(true, 0)
|
||||
if err != ErrorLineTooLong {
|
||||
if err != ErrorTagsTooLong {
|
||||
t.Error(err)
|
||||
}
|
||||
// but a server can, since the tags are split between client and server budgets:
|
||||
@ -404,6 +408,109 @@ func TestErrorLineTooLongGeneration(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
var truncateTests = []string{
|
||||
"x", // U+0078, Latin Small Letter X, 1 byte
|
||||
"ç", // U+00E7, Latin Small Letter C with Cedilla, 2 bytes
|
||||
"ꙮ", // U+A66E, Cyrillic Letter Multiocular O, 3 bytes
|
||||
"🐬", // U+1F42C, Dolphin, 4 bytes
|
||||
}
|
||||
|
||||
func assertEqual(found, expected interface{}) {
|
||||
if !reflect.DeepEqual(found, expected) {
|
||||
panic(fmt.Sprintf("expected %#v, found %#v", expected, found))
|
||||
}
|
||||
}
|
||||
|
||||
func buildPingParam(initialLen, minLen int, encChar string) (result string) {
|
||||
var out strings.Builder
|
||||
for i := 0; i < initialLen; i++ {
|
||||
out.WriteByte('a')
|
||||
}
|
||||
for out.Len() <= minLen {
|
||||
out.WriteString(encChar)
|
||||
}
|
||||
return out.String()
|
||||
}
|
||||
|
||||
func min(i, j int) int {
|
||||
if i < j {
|
||||
return i
|
||||
}
|
||||
return j
|
||||
}
|
||||
|
||||
func TestTruncate(t *testing.T) {
|
||||
// OK, this test is weird: we're going to build a line with a final parameter
|
||||
// that consists of a bunch of a's, then some nonzero number of repetitions
|
||||
// of a different UTF8-encoded codepoint. we'll test all 4 possible lengths
|
||||
// for a codepoint, and a number of different alignments for the codepoint
|
||||
// relative to the 512-byte boundary. in all cases, we should produce valid
|
||||
// UTF8, and truncate at most 3 bytes below the 512-byte boundary.
|
||||
for idx, s := range truncateTests {
|
||||
// sanity check that we have the expected lengths:
|
||||
assertEqual(len(s), idx+1)
|
||||
r, _ := utf8.DecodeRuneInString(s)
|
||||
if r == utf8.RuneError {
|
||||
panic("invalid codepoint in test suite")
|
||||
}
|
||||
|
||||
// "PING [param]\r\n", max parameter size is 512-7=505 bytes
|
||||
for initialLen := 490; initialLen < 500; initialLen++ {
|
||||
for i := 1; i < 50; i++ {
|
||||
param := buildPingParam(initialLen, initialLen+i, s)
|
||||
msg := MakeMessage(nil, "", "PING", param)
|
||||
msgBytes, err := msg.LineBytesStrict(false, 512)
|
||||
msgBytesNonTrunc, _ := msg.LineBytes()
|
||||
if len(msgBytes) == len(msgBytesNonTrunc) {
|
||||
if err != nil {
|
||||
t.Error("message was not truncated, but got error", err)
|
||||
}
|
||||
} else {
|
||||
if err != ErrorBodyTooLong {
|
||||
t.Error("message was truncated, but got error", err)
|
||||
}
|
||||
}
|
||||
if len(msgBytes) > 512 {
|
||||
t.Errorf("invalid serialized length %d", len(msgBytes))
|
||||
}
|
||||
if len(msgBytes) < min(512-3, len(msgBytesNonTrunc)) {
|
||||
t.Errorf("invalid serialized length %d", len(msgBytes))
|
||||
}
|
||||
if !utf8.Valid(msgBytes) {
|
||||
t.Errorf("PING %s encoded to invalid UTF8: %#v\n", param, msgBytes)
|
||||
}
|
||||
// skip over "PING "
|
||||
first, _ := utf8.DecodeRune(msgBytes[5:])
|
||||
assertEqual(first, rune('a'))
|
||||
last, _ := utf8.DecodeLastRune(bytes.TrimSuffix(msgBytes, []byte("\r\n")))
|
||||
assertEqual(last, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateNonUTF8(t *testing.T) {
|
||||
for l := 490; l < 530; l++ {
|
||||
var buf strings.Builder
|
||||
for i := 0; i < l; i++ {
|
||||
buf.WriteByte('\xff')
|
||||
}
|
||||
param := buf.String()
|
||||
msg := MakeMessage(nil, "", "PING", param)
|
||||
msgBytes, err := msg.LineBytesStrict(false, 512)
|
||||
if !(err == nil || err == ErrorBodyTooLong) {
|
||||
panic(err)
|
||||
}
|
||||
if len(msgBytes) > 512 {
|
||||
t.Errorf("invalid serialized length %d", len(msgBytes))
|
||||
}
|
||||
// full length is "PING <param>\r\n", 7+len(param)
|
||||
if len(msgBytes) < min(512-3, 7+len(param)) {
|
||||
t.Errorf("invalid serialized length %d", len(msgBytes))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkGenerate(b *testing.B) {
|
||||
msg := MakeMessage(
|
||||
map[string]string{"time": "2019-02-28T08:12:43.480Z", "account": "shivaram"},
|
||||
|
Loading…
Reference in New Issue
Block a user