mirror of
https://github.com/hackerschoice/segfault.git
synced 2024-06-16 11:58:43 +00:00
329 lines
13 KiB
Bash
Executable File
329 lines
13 KiB
Bash
Executable File
#! /usr/bin/env bash
|
|
|
|
# Change to CWD (in case CWD has been updated).
|
|
cd "$(pwd)" || exit
|
|
|
|
BINDIR="$(cd "$(dirname "${0}")" || exit; pwd)"
|
|
source "${BINDIR}/funcs.sh" || exit 254
|
|
|
|
command -v jq >/dev/null || ERREXIT 255 "Command 'jq' not found. Try ${CDC}apt-get install jq${CN}"
|
|
|
|
down()
|
|
{
|
|
local IFS
|
|
IFS=$'\n'
|
|
docker container prune -f
|
|
c=($(docker ps -f name=^lg --all --quiet))
|
|
[[ -n $c ]] && docker stop "${c[@]}"
|
|
docker-compose "$@"
|
|
docker network prune -f
|
|
# Sometimes docker gets into a state when it complains about overlappting
|
|
# network pool even that 'docker network ls' shows no networks beside
|
|
# the 3 default networks and with no containers running:
|
|
ip link show | cut -f2 -d" " | grep -E "^(br-)" | while read x; do x="${x%@*}"; x="${x%:*}"; [[ -z $x ]] && continue; ip link delete "${x}" down; done
|
|
}
|
|
|
|
[[ -z $SF_REDIS_AUTH ]] && {
|
|
SF_REDIS_AUTH=$(echo -n "Redis AUTH $SF_SEED" | sha512sum | base64 -w0)
|
|
SF_REDIS_AUTH="${SF_REDIS_AUTH//[^[:alnum:]]}"
|
|
SF_REDIS_AUTH="${SF_REDIS_AUTH:0:32}"
|
|
export SF_REDIS_AUTH
|
|
}
|
|
|
|
export SF_BACKING_FS="$(docker info --format '{{json .DriverStatus}}' | jq -r '.[0][1]')"
|
|
[[ "$SF_BACKING_FS" != "xfs" ]] && WARN "Backing FS is not XFS (SF_USER_ROOT_FS_SIZE wont work)"
|
|
|
|
[[ "$1" == down ]] && {
|
|
down "$@"
|
|
exit
|
|
}
|
|
[[ "$1" != up ]] && exec docker-compose "$@"
|
|
|
|
# HERE: "up"
|
|
|
|
[[ -z $SF_SEED ]] && ERREXIT 255 "SF_SEED= not set"
|
|
|
|
# Load variables from ENV but only those not already set in
|
|
# user's environemtn.
|
|
load_env()
|
|
{
|
|
local n
|
|
local v
|
|
local arr
|
|
local a
|
|
envfile="./.env"
|
|
|
|
[[ -n $SF_BASEDIR ]] && envfile="${SF_BASEDIR}/.env"
|
|
if [[ ! -f "${envfile}" ]]; then
|
|
WARN "Not found: \${SF_BASEDIR}/.env (${envfile})"
|
|
else
|
|
mapfile -t arr < <(grep -E -v '(^#|^$)' "${envfile}")
|
|
for a in "${arr[@]}"; do
|
|
|
|
n="${a%%=*}"
|
|
v="${a#*=}"
|
|
# Prefer user's environemtn over .env settings.
|
|
[[ -z "$(eval echo \$$n)" ]] && eval "${n}=\"${v}\""
|
|
done
|
|
fi
|
|
|
|
[[ -z $SF_BASEDIR ]] && ERREXIT 255 "SF_BASEDIR= not set in ${envfile}."
|
|
}
|
|
|
|
blockio_init()
|
|
{
|
|
local is_bfq
|
|
local n
|
|
|
|
# Check if there is BFQ-Scheduler support in the Kernel
|
|
for fn in /sys/class/block/*/queue/scheduler; do
|
|
[[ ! -f "${fn}" ]] && break
|
|
grep bfq "${fn}" >/dev/null || break
|
|
is_bfq=1
|
|
break
|
|
done
|
|
|
|
[[ -z $is_bfq ]] && {
|
|
# HERE: no BFQ support. Try load module.
|
|
# Try: apt install linux-modules-extra-aws
|
|
modprobe bfq || { WARN "No BFQ-Scheduler. Attacker can DoS block-IO."; return; }
|
|
is_bfq=1
|
|
}
|
|
|
|
# Return if BFQ is set
|
|
for fn in /sys/class/block/*/queue/scheduler; do
|
|
[[ ! -f "${fn}" ]] && break
|
|
echo bfq >"${fn}" || { WARN ""${fn%/queue*}": Failed to set BFQ scheduler."; return; }
|
|
done
|
|
|
|
# Odd bug. On some systems we set all correctly and docker still complains that
|
|
# it cant use Block IO weights. It appears to be a problem with cgroup v1?
|
|
# It can be fixed on v1 systems by using --cgroup-parent=/guest and creating:
|
|
# mkdir -p /sys/fs/cgroup/blkio/guest
|
|
# echo 1 >/sys/fs/cgroup/blkio/guest/blkio.bfq.weight
|
|
# => But then why cant docker fix this crap?
|
|
# https://github.com/moby/moby/issues/16173#issuecomment-1298432655
|
|
# Test if docker accepts --blkio-weight:
|
|
docker run --rm --blkio-weight=100 alpine true 2>&1 | grep "does not support Block" >/dev/null && { WARN "DOCKER: Your kernel does not support Block I/O weight."; return; }
|
|
}
|
|
|
|
sysinc()
|
|
{
|
|
local key
|
|
local val
|
|
key=$1
|
|
val=$2
|
|
[[ $(sysctl -n "$key") -ge $val ]] && return
|
|
sysctl -q -w "${key}=${val}" || WARN "Could not set '${key}=${val}'"
|
|
}
|
|
|
|
sysdec()
|
|
{
|
|
local key
|
|
local val
|
|
key=$1
|
|
val=$2
|
|
[[ $(sysctl -n "$key") -le $val ]] && return
|
|
sysctl -q -w "${key}=${val}" || WARN "Could not set '${key}=${val}'"
|
|
}
|
|
|
|
warn_file()
|
|
{
|
|
[[ -f "$1" ]] && return
|
|
|
|
WARN "Not found: $1"
|
|
}
|
|
|
|
warn_outdated()
|
|
{
|
|
local fn dst src
|
|
dst="${SF_BASEDIR}/${1}"
|
|
src="${BINDIR}/../${1}"
|
|
|
|
[[ ! -f "$dst" ]] && { WARN "Not found: $dst"; return; }
|
|
[[ ! -f "$src" ]] && ERREXIT 255 "Not found: $src"
|
|
|
|
# Installed file $dst is newer or equal than $src
|
|
[[ ! "$dst" -ot "$src" ]] && return
|
|
|
|
[[ $(stat -c%s "$dst") -eq $(stat -c%s "$src") ]] && return
|
|
|
|
WARN "$dst is outdated? Try ${CDC}touch $dst${CN} to ignore."
|
|
}
|
|
|
|
load_env
|
|
[[ -z $SF_DATADIR ]] && SF_DATADIR="${SF_BASEDIR}/data"
|
|
[[ -z $SF_SHMDIR ]] && SF_SHMDIR="/dev/shm/sf"
|
|
[[ -z $SF_HOST_MTU ]] && SF_HOST_MTU=1500
|
|
export SF_GUEST_MTU=$((SF_HOST_MTU - 80))
|
|
|
|
[[ ! -d "${SF_DATADIR}/user" ]] && mkdir -p "${SF_DATADIR}/user"
|
|
[[ ! -d "${SF_DATADIR}/share" ]] && mkdir -p "${SF_DATADIR}/share"
|
|
|
|
[[ ! -f "${SF_DATADIR}/share/GeoLite2-City.mmdb" ]] && [[ "${MAXMIND_KEY,,}" != "skip" ]] && {
|
|
WARN "Not found: data/share/GeoLite2-City.mmdb"
|
|
echo -e "Try \`curl 'https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-City&license_key=${MAXMIND_KEY:-KEY-NOT-SET}&suffix=tar.gz' | tar xfvz - --strip-components=1 --no-anchored -C '${SF_DATADIR}/share/' 'GeoLite2-City.mmdb'\`."
|
|
echo -e "Try ${CDC}MAXMIND_KEY=skip${CN} to disable. This will also disable limits by GEOIP and disable user tools like geoip and geoiphn."
|
|
}
|
|
|
|
[[ ! -f "${SF_DATADIR}/share/tor-exit-nodes.txt" ]] && {
|
|
WARN "Not found: data/share/tor-exit-nodes.txt"
|
|
echo -e "Try \`curl 'https://www.dan.me.uk/torlist/?exit' >'${SF_DATADIR}/share/tor-exit-nodes.txt'\`"
|
|
}
|
|
|
|
[[ ! -f "${SF_DATADIR}/share/relay-exit-nodes-mullvad.txt" ]] && WARN "Not found: data/share/relay-exit-nodes-mullvad.txt - ${CDM}See contrib/cronjob how to create it.${CN}"
|
|
|
|
[[ ! -f "${SF_DATADIR}/share/proxies.txt" ]] && WARN "Not found: data/share/proxies.txt (Mullvad proxies) - ${CDM}See contrib/cronjob how to create it.${CN}"
|
|
|
|
[[ ! -f "${SF_BASEDIR}/config/etc/relay-exit-nodes-global.txt" ]] && {
|
|
WARN "Not found: ${SF_BASEDIR}/config/etc/relay-exit-nodes-global.txt"
|
|
echo -e "\
|
|
==> Log in from global relays is not controlled. We use a private list from Blind Mouse.
|
|
==> Generate your own list (see THC's Tips & Tricks).
|
|
==> Use ${CDC}touch ${SF_BASEDIR}/config/etc/relay-exit-nodes-global.txt${CN} to stop this warning."
|
|
}
|
|
chmod 644 "${SF_BASEDIR}/config/etc/relay-exit-nodes-global.txt" 2>/dev/null
|
|
|
|
[[ -z $SF_OVERLAYDIR ]] && [[ -d "${SF_BASEDIR}/docker/overlay2" ]] && export SF_OVERLAYDIR="${SF_BASEDIR}/docker/overlay2"
|
|
|
|
[[ -z $SF_IP ]] && {
|
|
command -v dig >/dev/null || ERREXIT 255 "Command 'dig' not found. Try ${CDC}apt-get install dnsutils${CN}"
|
|
export SF_IP=$(dig +short "$SF_FQDN" 2>/dev/null | grep -v '\.$')
|
|
[[ -z $SF_IP ]] && ERREXIT 255 "Could not get SF_IP..."
|
|
WARN "SF_IP not set in .env. Using '$SF_IP'."
|
|
}
|
|
|
|
# xfs_init_quota "${SF_DATADIR}/everyone-root" "everyone" 100 16384 16G
|
|
|
|
# Enable BFQ on all block devices to allow cgroup's io.weight
|
|
# FIXME: One day but this into udev/startup scripts and only for
|
|
# device that we are using...
|
|
blockio_init
|
|
|
|
# BUG-ARP-CACHE:
|
|
# User can cause arp-table overflow. The kernel limit is global for all arp tables
|
|
# but each container gets its own arp table. All containers just put pressure on the global
|
|
# limit.
|
|
# Attack: A user can spawn multiple containers and create 'incomplete' arp entries in its own
|
|
# table. Those entries reduce the amount of entries avaialble for other containers (it's a global limit
|
|
# and not a limit per container).
|
|
#
|
|
# Oddity: Docker-compose is making the host name of each service available (e.g sf-redis, sf-tor etc).
|
|
# This is not done via an /etc/hosts entry but handled by Docker internally. The problem is that
|
|
# 'somewhere' docker (internally) needs an arp-entry (which fails during an attack). Then the
|
|
# name (e.g. sf-redis or so) can not be resolved and all goes to shits.
|
|
#
|
|
# Tweaking base_reachable_time_ms and gc_stale_time has no effect. Best we can do:
|
|
# 1. Use static IPs where possible for inter-container communication.
|
|
# 2. Limit the User's local network (to /22 or /24)
|
|
# 3. Increase the global size of the kernel's arp table (gc_thresh3)
|
|
|
|
# These are global and shared among all containers
|
|
# Increase unless already higher
|
|
sysinc net.ipv4.neigh.default.gc_thresh3 65536
|
|
sysinc net.netfilter.nf_conntrack_buckets 16384 # default is 65536 for >4GB systems
|
|
sysinc net.netfilter.nf_conntrack_max 1048576
|
|
# find /proc/*/fd -lname anon_inode:inotify | cut -d/ -f3 | xargs -I '{}' -- ps --no-headers -o '%p %U %c' -p '{}' | uniq -c | sort -nr
|
|
sysinc fs.inotify.max_user_instances 1024
|
|
|
|
# Conntrack & Namespaces is a mess. Restricting these inside a container
|
|
# only results that the connection is dropped sooner but the state still
|
|
# remains on the host's container. Thus we also reduce the host's timers
|
|
# to deal with this. The host does not do CONNTRACKING and thus these
|
|
# settings should only affect the containers.
|
|
# Decrease unless already lower.
|
|
sysdec net.netfilter.nf_conntrack_tcp_timeout_syn_sent 10
|
|
sysdec net.netfilter.nf_conntrack_tcp_timeout_syn_recv 5 # default is 30, 5 because of reverse tunnels
|
|
sysdec net.netfilter.nf_conntrack_tcp_timeout_last_ack 5 # default is 30
|
|
sysdec net.netfilter.nf_conntrack_tcp_timeout_fin_wait 10 # default is 120
|
|
sysdec net.netfilter.nf_conntrack_tcp_timeout_close 1 # default is 10
|
|
sysdec net.netfilter.nf_conntrack_tcp_timeout_close_wait 10 # default is 60
|
|
sysdec net.netfilter.nf_conntrack_tcp_timeout_unacknowledged 30 # default is 300
|
|
sysdec net.netfilter.nf_conntrack_tcp_timeout_established 10800 # 3h, default is 5 days
|
|
sysdec net.netfilter.nf_conntrack_icmp_timeout 10 # default is 30
|
|
sysdec net.netfilter.nf_conntrack_udp_timeout 10 # default is 30
|
|
|
|
# Each Hugepagesize is 2MB (grep HUGE /proc/meminfo)
|
|
# 512 => 1g as HUGE
|
|
# 8192 => 16g as HUGE
|
|
[[ ! $(cat /proc/sys/vm/nr_hugepages) -gt 0 ]] && WARN "Huge Tables not set. Consider ${CDC}echo \"vm.nr_hugepages=8192\" >>/etc/sysctl.conf && sysctl -w vm.nr_hugepages=8192${CN}"
|
|
|
|
# Warn for outdated files in /sf/config/* (that are older and different size)
|
|
mapfile -t arr < <(cd "${BINDIR}/../" || exit; find config -type f)
|
|
for fn in "${arr[@]}"; do
|
|
warn_outdated "$fn"
|
|
done
|
|
|
|
# Check if there are any fils in /sf/sfbin that are not equal to ./sfbin
|
|
for x in "${BINDIR}/"*; do
|
|
[[ ! -e "$x" ]] && WARN "Oops. Files missing in ${BINDIR}/*???"
|
|
str=$(md5sum "$x")
|
|
src=${str%% *}
|
|
x=$(basename "$x")
|
|
str=$(md5sum "${SF_BASEDIR}/sfbin/${x}" 2>/dev/null)
|
|
dst=${str%% *}
|
|
[[ $src != $dst ]] && WARN "${SF_BASEDIR}/sfbin/${x} is outdated. Please update with ${CDC}${BINDIR}/${x}${CN}"
|
|
done
|
|
|
|
|
|
# Make sure /dev/shm is 'shared'
|
|
[[ "$(findmnt -no TARGET,PROPAGATION /dev/shm)" != *"shared"* ]] && {
|
|
mount --make-shared /dev/shm/ || ERREXIT 252
|
|
}
|
|
|
|
systemctl start sf.slice || WARN 'Could not start sf.slice'
|
|
systemctl start sf-guest.slice || WARN 'Could not start sf-guest.slice'
|
|
systemctl status sf.slice | grep Segfault >/dev/null || WARN 'Bad start sf.slice. Does not belong to Segfault.'
|
|
systemctl status sf-guest.slice | grep Segfault >/dev/null || WARN 'Bad start sf-guest.slice. Does not belong to Segfault.'
|
|
|
|
SF_CG_DIR="/sys/fs/cgroup"
|
|
[[ -d "/sys/fs/cgroup/unified" ]] && {
|
|
SF_CG_DIR="/sys/fs/cgroup/unified"
|
|
# for cgroupv1 docker-run expects the absolute hierarchy path (for --cgroup-parent):
|
|
export SF_CG_PARENT="sf.slice/sf-guest.slice"
|
|
}
|
|
|
|
str=$(mount | grep ^cgroup2 | grep -F "$SF_CG_DIR" )
|
|
[[ $str == *'nsdelegate'* ]] && {
|
|
# HERE: cgroup2 is in use.
|
|
echo -e >&2 "[$(date '+%F %T' -u)] [${CDY}WARN${CN}] ${SF_CG_DIR} is mounted with nsdelegate. Disabling nsdelegate."
|
|
str=${str##*\(}
|
|
str=${str%\)*}
|
|
# We need to move encfsd to the user's cgroup: From sf.slice (sf-encfsd) to sf.slice/sf-guest.slice.
|
|
# We need to turn of "nsdelegate" as otherwise there is no (?) way moving it.
|
|
# (write() to cgroup.procs returns ENOENT if nsdelegate is enabled.)
|
|
|
|
# There is no 'nonsdelegate' and removing nsdelegate requires a hack:
|
|
# mount -t cgroup2 none /mnt && umount /mnt
|
|
# mount -o remount,rw,nosuid,nodev,noexec,relatime,memory_recursiveprot /sys/fs/cgroup
|
|
# Test with:
|
|
# docker run --rm -v /sys/fs/cgroup:/sys/fs/cgroup -it ubuntu bash -c 'sleep 31339 & echo $! >/sys/fs/cgroup/sf.slice/sf-guest.slice/docker-ANY-RUNNING-CONTAINER-ID-HERE.scope/cgroup.procs && echo $! OK'
|
|
mount -t cgroup2 none /mnt
|
|
umount /mnt
|
|
str="${str/,nsdelegate/}"
|
|
str="${str/nsdelegate,/}"
|
|
mount -o "remount,${str}" "${SF_CG_DIR}" || ERREXIT 255
|
|
}
|
|
|
|
# sf.slice's parent is root (/). Any siblings (e.g. /user.slice, /system.slice) also need to do
|
|
# IO Accounting or otherwise /sf.slice can starve those.
|
|
systemctl status system.slice | head -n8 | grep -F "IO: " &>/dev/null || WARN "IO Accounting not enabled. Check /etc/systemd/system.conf"
|
|
grep -F sf.slice /etc/docker/daemon.json &>/dev/null && WARN "Obsolete sf.slice found in /etc/docker/daemon.json. Remove that line."
|
|
|
|
[[ ! -d /var/lib/lxcfs/proc ]] && WARN "LG will report wrong uptime etc. Try ${CDC}apt-get install lxcfs${CN}?"
|
|
# If there was a warning then wait...
|
|
WARN_ENTER
|
|
|
|
# Delete stale run files..
|
|
[[ -d "${SF_SHMDIR}/run/encfsd/user" ]] && rm -rf "${SF_SHMDIR}/run/encfsd/user"
|
|
[[ ! -d "${SF_SHMDIR}/run/redis/sock" ]] && mkdir -p "${SF_SHMDIR}/run/redis/sock"
|
|
chmod 700 "${SF_SHMDIR}/run/redis"
|
|
chown 999 "${SF_SHMDIR}/run/redis/sock" # docker/redis user
|
|
chmod 711 "${SF_SHMDIR}/run/redis/sock"
|
|
# exec docker-compose "$@"
|
|
docker-compose "$@"
|
|
ret=$?
|
|
# If not started as background (-d): run DOWN.
|
|
[[ "$*" != *" -d"* ]] && { down "down"; exit; }
|
|
echo -e "May need to run \`${CDC}$0 down${CN}\` (code=$ret)"
|