improves http body decoding and enforces max length (#295)

https://github.com/zmap/zgrab2/pull/295
This commit is contained in:
Benjamin Wireman 2021-02-03 10:56:32 -05:00 committed by GitHub
parent 3613392bf0
commit d9ed4f141d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -425,25 +425,36 @@ func (scan *scan) Grab() *zgrab2.ScanError {
if resp.ContentLength >= 0 && resp.ContentLength < maxReadLen {
readLen = resp.ContentLength
}
// EOF ignored here because that's the way it was, CopyN goes up to readLen bytes
bytesRead, _ := io.CopyN(buf, resp.Body, readLen)
if scan.scanner.config.WithBodyLength {
scan.results.Response.BodyTextLength = bytesRead
}
bufAsString := buf.String()
io.CopyN(buf, resp.Body, readLen)
encoder, encoding, certain := charset.DetermineEncoding(buf.Bytes(), resp.Header.Get("content-type"))
// do best effort attempt to determine the response's encoding
// ignore the certainty and just go with it
encoder, _, _ := charset.DetermineEncoding(buf.Bytes(), resp.Header.Get("content_type"))
bodyText := ""
decodedSuccessfully := false
decoder := encoder.NewDecoder()
//"windows-1252" is the default value and will likely not decode correctly
if certain || encoding != "windows-1252" {
decoded, decErr := decoder.Bytes(buf.Bytes())
decoded, decErr := decoder.String(bufAsString)
if decErr == nil {
bodyText = string(decoded)
decodedSuccessfully = true
}
}
// if the decoder errors out just use the buffer as a string
if decErr == nil {
scan.results.Response.BodyText = decoded
if !decodedSuccessfully {
bodyText = buf.String()
}
// re-enforce readlen
if int64(len(bodyText)) > readLen {
scan.results.Response.BodyText = bodyText[:int(readLen)]
} else {
scan.results.Response.BodyText = bufAsString
scan.results.Response.BodyText = bodyText
}
if scan.scanner.config.WithBodyLength {
scan.results.Response.BodyTextLength = int64(len(scan.results.Response.BodyText))
}
if len(scan.results.Response.BodyText) > 0 {