zgrab2/lib/output/process.go
2018-03-30 16:28:24 -04:00

730 lines
20 KiB
Go

// Package output contains utilities for processing results from zgrab2 scanners
// for eventual output and consumption by ztag.
package output
import (
"reflect"
"runtime"
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"
"unicode"
"unicode/utf8"
"github.com/sirupsen/logrus"
)
// ZGrabTag holds the information from the `zgrab` tag. Currently only supports
// the zgrab tag.
type ZGrabTag struct {
// Debug means that the field should only be output when doing verbose output.
Debug bool
}
// parseZGrabTag reads the `zgrab` tag and returns the corresponding parsed
// ZGrabTag. Currently only "debug" is recognized; other options should be
// comma separated.
func parseZGrabTag(value string) *ZGrabTag {
ret := ZGrabTag{Debug: false}
fields := strings.Split(value, ",")
for _, field := range fields {
switch strings.TrimSpace(field) {
case "debug":
ret.Debug = true
}
}
return &ret
}
// Check if the type is primitive, or eventually points to a primitive type.
func isPrimitiveType(what reflect.Type) bool {
return isPrimitiveKind(dereferenceType(what).Kind())
}
// Types that are considered to be non-primitive
var compoundKinds = map[reflect.Kind]bool{
reflect.Struct: true,
reflect.Slice: true,
reflect.Array: true,
reflect.Map: true,
reflect.Interface: true,
}
// Get the eventual type for JSON-encoding purposes
func dereferenceType(what reflect.Type) reflect.Type {
for ; what.Kind() == reflect.Ptr; what = what.Elem() {
}
return what
}
// Check if the kind is primitive
func isPrimitiveKind(kind reflect.Kind) bool {
ret, ok := compoundKinds[kind]
return !(ret && ok)
}
// OutputProcessor holds the options and state for a processing run.
type OutputProcessor struct {
// Verbose indicates that debug fields should not be stripped out.
Verbose bool
}
// NewOutputProcessor gets a new OutputProcessor with the default settings.
func NewOutputProcessor() *OutputProcessor {
return &OutputProcessor{
Verbose: false,
}
}
// Process the input using the options in the given OutputProcessor.
func (processor *OutputProcessor) Process(v interface{}) (interface{}, error) {
ret, err := processor.process(v)
if err != nil {
return nil, err
}
return ret.Interface(), nil
}
// Process the input using the default options (strip debug fields).
func Process(v interface{}) (interface{}, error) {
return NewOutputProcessor().Process(v)
}
// Internal version to catch panics
func (processor *OutputProcessor) process(v interface{}) (ret reflect.Value, err error) {
defer func() {
if r := recover(); r != nil {
if _, ok := r.(runtime.Error); ok {
panic(r)
}
if s, ok := r.(string); ok {
panic(s)
}
ret = reflect.ValueOf(nil)
err = r.(error)
}
}()
return processor.processValue(reflect.ValueOf(&v).Elem()), nil
}
// Handle an error
func (processor *OutputProcessor) error(err error) {
panic(err)
}
// Process the given value, returning the processed copy.
func (processor *OutputProcessor) processValue(v reflect.Value) reflect.Value {
return valueProcessor(v)(processor, v)
}
// processorFunc takes an OutputProcessor and a value, and returns a processed copy of the value.
type processorFunc func(s *OutputProcessor, v reflect.Value) reflect.Value
// processorCache maps reflect.Type to processorFunc, and caches the processors
// for the various types.
var processorCache sync.Map
// valueProcessor gets a processorFunc for the given actual value.
func valueProcessor(v reflect.Value) processorFunc {
if !v.IsValid() {
return dupeProcessor
}
return typeProcessor(v.Type())
}
// typeProcessor gets (potentially cached) a processorFunc for the given type.
func typeProcessor(t reflect.Type) processorFunc {
if fi, ok := processorCache.Load(t); ok {
return fi.(processorFunc)
}
// To deal with recursive types, populate the map with an
// indirect func before we build it. This type waits on the
// real func (f) to be ready and then calls it. This indirect
// func is only used for recursive types.
var (
wg sync.WaitGroup
f processorFunc
)
wg.Add(1)
fi, loaded := processorCache.LoadOrStore(t, processorFunc(func(processor *OutputProcessor, v reflect.Value) reflect.Value {
wg.Wait()
return f(processor, v)
}))
if loaded {
return fi.(processorFunc)
}
// Compute the real processor and replace the indirect func with it.
f = newTypeProcessor(t)
wg.Done()
processorCache.Store(t, f)
return f
}
// newTypeProcessor constructs a processorFunc for a type.
func newTypeProcessor(t reflect.Type) processorFunc {
switch t.Kind() {
case reflect.Interface:
return interfaceProcessor
case reflect.Struct:
return newStructProcessor(t)
case reflect.Map:
return newMapProcessor(t)
case reflect.Slice:
return newSliceProcessor(t)
case reflect.Array:
return newArrayProcessor(t)
case reflect.Ptr:
return newPtrProcessor(t)
default:
return dupeProcessor
}
}
// dupeProcessor is a processorFunc that returns a plain duplicate of the given
// (hopefully primitive) value.
func dupeProcessor(_ *OutputProcessor, v reflect.Value) reflect.Value {
ret := reflect.New(v.Type()).Elem()
ret.Set(v)
return ret
}
// interfaceProcessor returns a processor for the value underlying the interface.
func interfaceProcessor(processor *OutputProcessor, v reflect.Value) reflect.Value {
if v.IsNil() {
return reflect.New(v.Type()).Elem() // nil
}
// FIXME: re-wrap in interface{}?
ret := processor.processValue(v.Elem())
return ret
}
// structProcessor holds the state for processing a single struct type.
type structProcessor struct {
// what is the type being processed.
what reflect.Type
// fields contain the needed information to identify / locate / read / set
// the value of the field on an instance of the struct.
fields []field
// fieldEncs are the processorFuncs for the associated fields.
fieldEncs []processorFunc
}
// structProcessor.process processes each field in se.fields (unless omitted).
func (se *structProcessor) process(processor *OutputProcessor, v reflect.Value) reflect.Value {
ret := reflect.New(v.Type()).Elem()
for i, f := range se.fields {
fv := fieldByIndex(v, f.index)
if !fv.IsValid() {
// e.g. it's a field inside a null pointer
continue
}
if f.zgrabTag.Debug && !processor.Verbose {
// ignore
} else {
// get output field
rfv := writableFieldByIndex(ret, f.index)
if rfv.CanSet() {
// set output field to processed value
rfv.Set(se.fieldEncs[i](processor, fv))
} else {
logrus.Warnf("zgrab output process: Cannot copy over field %s (%v)", f.name, rfv)
}
}
}
return ret
}
// newStructProcessor constructs a processor for the struct.
func newStructProcessor(t reflect.Type) processorFunc {
fields := cachedTypeFields(t)
se := &structProcessor{
what: t,
fields: fields,
fieldEncs: make([]processorFunc, len(fields)),
}
for i, f := range fields {
se.fieldEncs[i] = typeProcessor(typeByIndex(t, f.index))
}
return se.process
}
// mapProcessor holds the state for a specific type of map processor.
type mapProcessor struct {
elemEnc processorFunc
}
// mapProcessor.process processes the given compound map type -- processes each
// value and returns a copy of it.
func (me *mapProcessor) process(processor *OutputProcessor, v reflect.Value) reflect.Value {
if v.IsNil() {
return reflect.New(v.Type()).Elem() // nil
}
// As with slices, the value returned by MakeMap cannot be set / addressed.
// So, we make a pointer to the map, then store the map in the pointer.
ret := reflect.New(v.Type()).Elem()
ret.Set(reflect.MakeMap(v.Type()))
keys := v.MapKeys()
sv := make([]reflectWithString, len(keys))
for i, v := range keys {
sv[i].v = v
if err := sv[i].resolve(); err != nil {
processor.error(err)
}
}
for _, kv := range sv {
ret.SetMapIndex(kv.v, me.elemEnc(processor, v.MapIndex(kv.v)))
}
return ret
}
// newMapProcessor constructs a map processor for the given map type; primitive
// types are just duplicated, while compound types get special handling.
func newMapProcessor(t reflect.Type) processorFunc {
if isPrimitiveType(t.Elem()) {
return dupeProcessor
}
me := &mapProcessor{typeProcessor(t.Elem())}
return me.process
}
// sliceProcessor just wraps an arrayProcessor, checking to make sure the value isn't nil.
type sliceProcessor struct {
arrayEnc processorFunc
}
// sliceProcessor.process just wraps the equivalent arrayProcessor.
func (se *sliceProcessor) process(processor *OutputProcessor, v reflect.Value) reflect.Value {
if v.IsNil() {
return reflect.New(v.Type()).Elem() // nil
}
ret := se.arrayEnc(processor, v)
return ret
}
// newSliceProcessor constructs a slice processorFunc -- for primitive types,
// just duplicates the slice, while compound types get special handling.
func newSliceProcessor(t reflect.Type) processorFunc {
if isPrimitiveType(t.Elem()) {
return dupeProcessor
}
enc := &sliceProcessor{newArrayProcessor(t)}
return enc.process
}
// arrayProcessor calls the elemEnc for each element of the array (or slice).
type arrayProcessor struct {
elemEnc processorFunc
}
// arrayProcessor.process creates a new slice/array, then calls the element
// processor on each element.
func (ae *arrayProcessor) process(processor *OutputProcessor, v reflect.Value) reflect.Value {
n := v.Len()
var ret reflect.Value
if v.Kind() == reflect.Slice {
// You cannot call Set() or Addr() on the slice directly; so we create
// the pointer to the slice, and then set ret = *ptr = make([]type, n, cap)
ret = reflect.New(v.Type()).Elem()
ret.Set(reflect.MakeSlice(v.Type(), n, v.Cap()))
} else {
ret = reflect.New(v.Type()).Elem()
}
for i := 0; i < n; i++ {
ret.Index(i).Set(ae.elemEnc(processor, v.Index(i)))
}
return ret
}
// newArrayProcessor constructs a new processorFunc
func newArrayProcessor(t reflect.Type) processorFunc {
if isPrimitiveType(t.Elem()) {
return dupeProcessor
}
enc := &arrayProcessor{typeProcessor(t.Elem())}
return enc.process
}
// ptrProcessor wraps the state for processing a single pointer type
type ptrProcessor struct {
elemEnc processorFunc
}
// ptrProcessor.process creates a new pointer then uses the element processor to full it.
func (pe *ptrProcessor) process(processor *OutputProcessor, v reflect.Value) reflect.Value {
if v.IsNil() {
return reflect.New(v.Type()).Elem() // nil
}
// type = *elem
// ret = new(type) = new(*elem)
ret := reflect.New(v.Type()).Elem()
child := pe.elemEnc(processor, v.Elem())
// *ret = &child
ret.Set(child.Addr())
return ret
}
// newPtrProcessor constructs a processorFunc for the given pointer type.
func newPtrProcessor(t reflect.Type) processorFunc {
enc := &ptrProcessor{typeProcessor(t.Elem())}
return enc.process
}
// isValidJSONNameTag checks if the `json` tag is a valid field name.
func isValidJSONNameTag(s string) bool {
if s == "" {
return false
}
for _, c := range s {
switch {
case strings.ContainsRune("!#$%&()*+-./:<=>?@[]^_{|}~ ", c):
// Backslash and quote chars are reserved, but
// otherwise any punctuation chars are allowed
// in a tag name.
default:
if !unicode.IsLetter(c) && !unicode.IsDigit(c) {
return false
}
}
}
return true
}
// fieldByIndex gets the field of value with the given "index" (which is
// actually a sequence of indexes).
func fieldByIndex(v reflect.Value, index []int) reflect.Value {
for _, i := range index {
if v.Kind() == reflect.Ptr {
if v.IsNil() {
return reflect.Value{}
}
v = v.Elem()
}
v = v.Field(i)
}
return v
}
// Since a class's "fields" may actually be fields of its anonymous member
// structs, and some of these may include pointers, instantiate any nils along
// the way (as such, this should only be called if it is really gointg to be
// written).
func writableFieldByIndex(v reflect.Value, index []int) reflect.Value {
for _, i := range index {
if v.Kind() == reflect.Ptr {
if v.IsNil() {
v.Set(reflect.New(v.Type().Elem()))
}
v = v.Elem()
}
v = v.Field(i)
}
return v
}
// typeByIndex gets the type of the field with the given "index"
func typeByIndex(t reflect.Type, index []int) reflect.Type {
for _, i := range index {
if t.Kind() == reflect.Ptr {
t = t.Elem()
}
t = t.Field(i).Type
}
return t
}
// reflectWithString gets the string version of the given value (for use as a
// key value)
type reflectWithString struct {
v reflect.Value
s string
}
func (w *reflectWithString) resolve() error {
if w.v.Kind() == reflect.String {
w.s = w.v.String()
return nil
}
switch w.v.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
w.s = strconv.FormatInt(w.v.Int(), 10)
return nil
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
w.s = strconv.FormatUint(w.v.Uint(), 10)
return nil
}
panic("unexpected map key type")
}
// A field represents a single field found in a struct.
type field struct {
name string
nameBytes []byte // []byte(name)
equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent
parent reflect.Type
tag bool
index []int
typ reflect.Type
zgrabTag ZGrabTag
}
// byIndex sorts field by index sequence.
type byIndex []field
// Len gets the length of the index sequence.
func (x byIndex) Len() int { return len(x) }
// Swap swaps the ith and jth indexes.
func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
// Less compares the ith and jth index
func (x byIndex) Less(i, j int) bool {
for k, xik := range x[i].index {
if k >= len(x[j].index) {
return false
}
if xik != x[j].index[k] {
return xik < x[j].index[k]
}
}
return len(x[i].index) < len(x[j].index)
}
// typeFields returns a list of fields that JSON should recognize for the given type.
// The algorithm is breadth-first search over the set of structs to include - the top struct
// and then any reachable anonymous structs.
func typeFields(t reflect.Type) []field {
// Anonymous fields to explore at the current level and the next.
current := []field{}
next := []field{{typ: t, parent: t}}
// Count of queued names for current level and the next.
count := map[reflect.Type]int{}
nextCount := map[reflect.Type]int{}
// Types already visited at an earlier level.
visited := map[reflect.Type]bool{}
// Fields found.
var fields []field
for len(next) > 0 {
current, next = next, current[:0]
count, nextCount = nextCount, map[reflect.Type]int{}
for _, f := range current {
if visited[f.typ] {
continue
}
visited[f.typ] = true
// Scan f.typ for fields to include.
for i := 0; i < f.typ.NumField(); i++ {
sf := f.typ.Field(i)
if sf.Anonymous {
t := sf.Type
if t.Kind() == reflect.Ptr {
t = t.Elem()
}
// If embedded, StructField.PkgPath is not a reliable
// indicator of whether the field is exported.
// See https://golang.org/issue/21122
if !isExported(t.Name()) && t.Kind() != reflect.Struct {
// Ignore embedded fields of unexported non-struct types.
// Do not ignore embedded fields of unexported struct types
// since they may have exported fields.
continue
}
} else if sf.PkgPath != "" {
// Ignore unexported non-embedded fields.
continue
}
tag := sf.Tag.Get("json")
if tag == "-" {
continue
}
name := strings.SplitN(tag, ",", 2)[0]
if !isValidJSONNameTag(name) {
name = ""
}
index := make([]int, len(f.index)+1)
copy(index, f.index)
index[len(f.index)] = i
ft := sf.Type
if ft.Name() == "" && ft.Kind() == reflect.Ptr {
// Follow pointer.
ft = ft.Elem()
}
// Record found field and index sequence.
if name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct {
tagged := name != ""
if name == "" {
name = sf.Name
}
fields = append(fields, field{
name: name,
tag: tagged,
index: index,
typ: ft,
parent: t,
zgrabTag: *parseZGrabTag(sf.Tag.Get("zgrab")),
})
if count[f.typ] > 1 {
// If there were multiple instances, add a second,
// so that the annihilation code will see a duplicate.
// It only cares about the distinction between 1 or 2,
// so don't bother generating any more copies.
fields = append(fields, fields[len(fields)-1])
}
continue
}
// Record new anonymous struct to explore in next round.
nextCount[ft]++
if nextCount[ft] == 1 {
next = append(next, field{name: ft.Name(), index: index, typ: ft, parent: t})
}
}
}
}
sort.Slice(fields, func(i, j int) bool {
x := fields
// sort field by name, breaking ties with depth, then
// breaking ties with "name came from json tag", then
// breaking ties with index sequence.
if x[i].name != x[j].name {
return x[i].name < x[j].name
}
if len(x[i].index) != len(x[j].index) {
return len(x[i].index) < len(x[j].index)
}
if x[i].tag != x[j].tag {
return x[i].tag
}
return byIndex(x).Less(i, j)
})
// Delete all fields that are hidden by the Go rules for embedded fields,
// except that fields with JSON tags are promoted.
// The fields are sorted in primary order of name, secondary order
// of field index length. Loop over names; for each name, delete
// hidden fields by choosing the one dominant field that survives.
out := fields[:0]
for advance, i := 0, 0; i < len(fields); i += advance {
// One iteration per name.
// Find the sequence of fields with the name of this first field.
fi := fields[i]
name := fi.name
for advance = 1; i+advance < len(fields); advance++ {
fj := fields[i+advance]
if fj.name != name {
break
}
}
if advance == 1 { // Only one field with this name
out = append(out, fi)
continue
}
dominant, ok := dominantField(fields[i : i+advance])
if ok {
out = append(out, dominant)
}
}
fields = out
sort.Sort(byIndex(fields))
return fields
}
// isExported reports whether the identifier is exported.
func isExported(id string) bool {
r, _ := utf8.DecodeRuneInString(id)
return unicode.IsUpper(r)
}
// dominantField looks through the fields, all of which are known to
// have the same name, to find the single field that dominates the
// others using Go's embedding rules, modified by the presence of
// JSON tags. If there are multiple top-level fields, the boolean
// will be false: This condition is an error in Go and we skip all
// the fields.
func dominantField(fields []field) (field, bool) {
// The fields are sorted in increasing index-length order. The winner
// must therefore be one with the shortest index length. Drop all
// longer entries, which is easy: just truncate the slice.
length := len(fields[0].index)
tagged := -1 // Index of first tagged field.
for i, f := range fields {
if len(f.index) > length {
fields = fields[:i]
break
}
if f.tag {
if tagged >= 0 {
// Multiple tagged fields at the same level: conflict.
// Return no field.
return field{}, false
}
tagged = i
}
}
if tagged >= 0 {
return fields[tagged], true
}
// All remaining fields have the same length. If there's more than one,
// we have a conflict (two fields named "X" at the same level) and we
// return no field.
if len(fields) > 1 {
return field{}, false
}
return fields[0], true
}
var fieldCache struct {
value atomic.Value // map[reflect.Type][]field
mu sync.Mutex // used only by writers
}
// cachedTypeFields is like typeFields but uses a cache to avoid repeated work.
func cachedTypeFields(t reflect.Type) []field {
m, _ := fieldCache.value.Load().(map[reflect.Type][]field)
f := m[t]
if f != nil {
return f
}
// Compute fields without lock.
// Might duplicate effort but won't hold other computations back.
f = typeFields(t)
if f == nil {
f = []field{}
}
fieldCache.mu.Lock()
m, _ = fieldCache.value.Load().(map[reflect.Type][]field)
newM := make(map[reflect.Type][]field, len(m)+1)
for k, v := range m {
newM[k] = v
}
newM[t] = f
fieldCache.value.Store(newM)
fieldCache.mu.Unlock()
return f
}