1
2
mirror of https://github.com/vimagick/dockerfiles synced 2024-06-20 13:58:42 +00:00

add kafka-arm

This commit is contained in:
kev 2018-07-28 02:55:18 +08:00
parent b493677b34
commit 8f8dd2db4c
10 changed files with 1319 additions and 0 deletions

@ -46,6 +46,7 @@ A collection of delicious docker recipes.
## Big Data
- [x] kafka-arm
- [x] kafka-manager
- [x] zookeeper-arm

40
kafka/arm/Dockerfile Normal file

@ -0,0 +1,40 @@
#
# Dockerfile for kafka-arm
#
FROM easypi/alpine-arm
MAINTAINER EasyPi Software Foundation
ARG kafka_version=1.1.1
ARG scala_version=2.12
ARG glibc_version=2.27-r0
ENV KAFKA_VERSION=$kafka_version \
SCALA_VERSION=$scala_version \
KAFKA_HOME=/opt/kafka \
GLIBC_VERSION=$glibc_version
ENV PATH=${PATH}:${KAFKA_HOME}/bin
COPY download-kafka.sh start-kafka.sh broker-list.sh create-topics.sh versions.sh /tmp/
RUN apk add --no-cache bash curl jq docker openjdk8-jre \
&& mkdir /opt \
&& chmod a+x /tmp/*.sh \
&& mv /tmp/start-kafka.sh /tmp/broker-list.sh /tmp/create-topics.sh /tmp/versions.sh /usr/bin \
&& sync && /tmp/download-kafka.sh \
&& tar xfz /tmp/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz -C /opt \
&& rm /tmp/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz \
&& ln -s /opt/kafka_${SCALA_VERSION}-${KAFKA_VERSION} /opt/kafka \
&& rm /tmp/* \
&& wget https://github.com/sgerrand/alpine-pkg-glibc/releases/download/${GLIBC_VERSION}/glibc-${GLIBC_VERSION}.apk \
&& apk add --no-cache --allow-untrusted glibc-${GLIBC_VERSION}.apk \
&& rm glibc-${GLIBC_VERSION}.apk
COPY overrides /opt/overrides
VOLUME ["/kafka"]
EXPOSE 9092
CMD ["start-kafka.sh"]

5
kafka/arm/broker-list.sh Executable file

@ -0,0 +1,5 @@
#!/bin/bash
CONTAINERS=$(docker ps | grep 9092 | awk '{print $1}')
BROKERS=$(for CONTAINER in ${CONTAINERS}; do docker port "$CONTAINER" 9092 | sed -e "s/0.0.0.0:/$HOST_IP:/g"; done)
echo "${BROKERS/$'\n'/,}"

57
kafka/arm/create-topics.sh Executable file

@ -0,0 +1,57 @@
#!/bin/bash
if [[ -z "$KAFKA_CREATE_TOPICS" ]]; then
exit 0
fi
if [[ -z "$START_TIMEOUT" ]]; then
START_TIMEOUT=600
fi
start_timeout_exceeded=false
count=0
step=10
while netstat -lnt | awk '$4 ~ /:'"$KAFKA_PORT"'$/ {exit 1}'; do
echo "waiting for kafka to be ready"
sleep $step;
count=$((count + step))
if [ $count -gt $START_TIMEOUT ]; then
start_timeout_exceeded=true
break
fi
done
if $start_timeout_exceeded; then
echo "Not able to auto-create topic (waited for $START_TIMEOUT sec)"
exit 1
fi
# introduced in 0.10. In earlier versions, this will fail because the topic already exists.
# shellcheck disable=SC1091
source "/usr/bin/versions.sh"
if [[ "$MAJOR_VERSION" == "0" && "$MINOR_VERSION" -gt "9" ]] || [[ "$MAJOR_VERSION" -gt "0" ]]; then
KAFKA_0_10_OPTS="--if-not-exists"
fi
# Expected format:
# name:partitions:replicas:cleanup.policy
IFS="${KAFKA_CREATE_TOPICS_SEPARATOR-,}"; for topicToCreate in $KAFKA_CREATE_TOPICS; do
echo "creating topics: $topicToCreate"
IFS=':' read -r -a topicConfig <<< "$topicToCreate"
config=
if [ -n "${topicConfig[3]}" ]; then
config="--config=cleanup.policy=${topicConfig[3]}"
fi
COMMAND="JMX_PORT='' ${KAFKA_HOME}/bin/kafka-topics.sh \\
--create \\
--zookeeper ${KAFKA_ZOOKEEPER_CONNECT} \\
--topic ${topicConfig[0]} \\
--partitions ${topicConfig[1]} \\
--replication-factor ${topicConfig[2]} \\
${config} \\
${KAFKA_0_10_OPTS} &"
eval "${COMMAND}"
done
wait

@ -0,0 +1,24 @@
zookeeper:
image: easypi/zookeeper-arm
ports:
- "2181:2181"
volumes:
- ./data/zookeeper/data:/data
- ./data/zookeeper/datalog:/datalog
restart: always
kafka:
image: easypi/kafka-arm
ports:
- "9092:9092"
hostname: kafka
volumes:
- ./data/kafka:/kafka
- /var/run/docker.sock:/var/run/docker.sock
environment:
- KAFKA_ADVERTISED_HOST_NAME=kafka
- KAFKA_ADVERTISED_PORT=9092
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
links:
- zookeeper
restart: always

23
kafka/arm/download-kafka.sh Executable file

@ -0,0 +1,23 @@
#!/bin/sh -e
# shellcheck disable=SC1091
source "/usr/bin/versions.sh"
FILENAME="kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz"
## Versions prior to 0.10.2.1 are not actively mirrored
echo "Downloading kafka $MAJOR_VERSION.$MINOR_VERSION"
if [[ "$MAJOR_VERSION" == "0" && "$MINOR_VERSION" -lt "11" ]]; then
echo "Version prior to 0.10.2.1 - downloading direct"
url="https://archive.apache.org/dist/kafka/${KAFKA_VERSION}/${FILENAME}"
else
url=$(curl --stderr /dev/null "https://www.apache.org/dyn/closer.cgi?path=/kafka/${KAFKA_VERSION}/${FILENAME}&as_json=1" | jq -r '"\(.preferred)\(.path_info)"')
fi
if [[ -z "$url" ]]; then
echo "Unable to determine mirror for downloading Kafka, the service may be down"
exit 1
fi
echo "Downloading Kafka from $url"
wget "${url}" -O "/tmp/${FILENAME}"

1011
kafka/arm/overrides/0.9.0.1.sh Executable file

File diff suppressed because it is too large Load Diff

2
kafka/arm/start-kafka-shell.sh Executable file

@ -0,0 +1,2 @@
#!/bin/bash
docker run --rm -v /var/run/docker.sock:/var/run/docker.sock -e HOST_IP=$1 -e ZK=$2 -i -t wurstmeister/kafka /bin/bash

149
kafka/arm/start-kafka.sh Executable file

@ -0,0 +1,149 @@
#!/bin/bash -e
# Allow specific kafka versions to perform any unique bootstrap operations
OVERRIDE_FILE="/opt/overrides/${KAFKA_VERSION}.sh"
if [[ -x "$OVERRIDE_FILE" ]]; then
echo "Executing override file $OVERRIDE_FILE"
eval "$OVERRIDE_FILE"
fi
# Store original IFS config, so we can restore it at various stages
ORIG_IFS=$IFS
if [[ -z "$KAFKA_ZOOKEEPER_CONNECT" ]]; then
echo "ERROR: missing mandatory config: KAFKA_ZOOKEEPER_CONNECT"
exit 1
fi
if [[ -z "$KAFKA_PORT" ]]; then
export KAFKA_PORT=9092
fi
create-topics.sh &
unset KAFKA_CREATE_TOPICS
if [[ -z "$KAFKA_ADVERTISED_PORT" && \
-z "$KAFKA_LISTENERS" && \
-z "$KAFKA_ADVERTISED_LISTENERS" && \
-S /var/run/docker.sock ]]; then
KAFKA_ADVERTISED_PORT=$(docker port "$(hostname)" $KAFKA_PORT | sed -r 's/.*:(.*)/\1/g')
export KAFKA_ADVERTISED_PORT
fi
if [[ -z "$KAFKA_BROKER_ID" ]]; then
if [[ -n "$BROKER_ID_COMMAND" ]]; then
KAFKA_BROKER_ID=$(eval "$BROKER_ID_COMMAND")
export KAFKA_BROKER_ID
else
# By default auto allocate broker ID
export KAFKA_BROKER_ID=-1
fi
fi
if [[ -z "$KAFKA_LOG_DIRS" ]]; then
export KAFKA_LOG_DIRS="/kafka/kafka-logs-$HOSTNAME"
fi
if [[ -n "$KAFKA_HEAP_OPTS" ]]; then
sed -r -i 's/(export KAFKA_HEAP_OPTS)="(.*)"/\1="'"$KAFKA_HEAP_OPTS"'"/g' "$KAFKA_HOME/bin/kafka-server-start.sh"
unset KAFKA_HEAP_OPTS
fi
if [[ -n "$HOSTNAME_COMMAND" ]]; then
HOSTNAME_VALUE=$(eval "$HOSTNAME_COMMAND")
# Replace any occurences of _{HOSTNAME_COMMAND} with the value
IFS=$'\n'
for VAR in $(env); do
if [[ $VAR =~ ^KAFKA_ && "$VAR" =~ "_{HOSTNAME_COMMAND}" ]]; then
eval "export ${VAR//_\{HOSTNAME_COMMAND\}/$HOSTNAME_VALUE}"
fi
done
IFS=$ORIG_IFS
fi
if [[ -n "$PORT_COMMAND" ]]; then
PORT_VALUE=$(eval "$PORT_COMMAND")
# Replace any occurences of _{PORT_COMMAND} with the value
IFS=$'\n'
for VAR in $(env); do
if [[ $VAR =~ ^KAFKA_ && "$VAR" =~ "_{PORT_COMMAND}" ]]; then
eval "export ${VAR//_\{PORT_COMMAND\}/$PORT_VALUE}"
fi
done
IFS=$ORIG_IFS
fi
if [[ -n "$RACK_COMMAND" && -z "$KAFKA_BROKER_RACK" ]]; then
KAFKA_BROKER_RACK=$(eval "$RACK_COMMAND")
export KAFKA_BROKER_RACK
fi
# Try and configure minimal settings or exit with error if there isn't enough information
if [[ -z "$KAFKA_ADVERTISED_HOST_NAME$KAFKA_LISTENERS" ]]; then
if [[ -n "$KAFKA_ADVERTISED_LISTENERS" ]]; then
echo "ERROR: Missing environment variable KAFKA_LISTENERS. Must be specified when using KAFKA_ADVERTISED_LISTENERS"
exit 1
elif [[ -z "$HOSTNAME_VALUE" ]]; then
echo "ERROR: No listener or advertised hostname configuration provided in environment."
echo " Please define KAFKA_LISTENERS / (deprecated) KAFKA_ADVERTISED_HOST_NAME"
exit 1
fi
# Maintain existing behaviour
# If HOSTNAME_COMMAND is provided, set that to the advertised.host.name value if listeners are not defined.
export KAFKA_ADVERTISED_HOST_NAME="$HOSTNAME_VALUE"
fi
#Issue newline to config file in case there is not one already
echo "" >> "$KAFKA_HOME/config/server.properties"
(
function updateConfig() {
key=$1
value=$2
file=$3
# Omit $value here, in case there is sensitive information
echo "[Configuring] '$key' in '$file'"
# If config exists in file, replace it. Otherwise, append to file.
if grep -E -q "^#?$key=" "$file"; then
sed -r -i "s@^#?$key=.*@$key=$value@g" "$file" #note that no config values may contain an '@' char
else
echo "$key=$value" >> "$file"
fi
}
# Fixes #312
# KAFKA_VERSION + KAFKA_HOME + grep -rohe KAFKA[A-Z0-0_]* /opt/kafka/bin | sort | uniq | tr '\n' '|'
EXCLUSIONS="|KAFKA_VERSION|KAFKA_HOME|KAFKA_DEBUG|KAFKA_GC_LOG_OPTS|KAFKA_HEAP_OPTS|KAFKA_JMX_OPTS|KAFKA_JVM_PERFORMANCE_OPTS|KAFKA_LOG|KAFKA_OPTS|"
# Read in env as a new-line separated array. This handles the case of env variables have spaces and/or carriage returns. See #313
IFS=$'\n'
for VAR in $(env)
do
env_var=$(echo "$VAR" | cut -d= -f1)
if [[ "$EXCLUSIONS" = *"|$env_var|"* ]]; then
echo "Excluding $env_var from broker config"
continue
fi
if [[ $env_var =~ ^KAFKA_ ]]; then
kafka_name=$(echo "$env_var" | cut -d_ -f2- | tr '[:upper:]' '[:lower:]' | tr _ .)
updateConfig "$kafka_name" "${!env_var}" "$KAFKA_HOME/config/server.properties"
fi
if [[ $env_var =~ ^LOG4J_ ]]; then
log4j_name=$(echo "$env_var" | tr '[:upper:]' '[:lower:]' | tr _ .)
updateConfig "$log4j_name" "${!env_var}" "$KAFKA_HOME/config/log4j.properties"
fi
done
)
if [[ -n "$CUSTOM_INIT_SCRIPT" ]] ; then
eval "$CUSTOM_INIT_SCRIPT"
fi
exec "$KAFKA_HOME/bin/kafka-server-start.sh" "$KAFKA_HOME/config/server.properties"

7
kafka/arm/versions.sh Executable file

@ -0,0 +1,7 @@
#!/bin/bash -e
MAJOR_VERSION=$(echo "$KAFKA_VERSION" | cut -d. -f1)
export MAJOR_VERSION
MINOR_VERSION=$(echo "$KAFKA_VERSION" | cut -d. -f2)
export MINOR_VERSION