diff --git a/airflow/Dockerfile b/airflow/Dockerfile index 9723c5f..da562af 100644 --- a/airflow/Dockerfile +++ b/airflow/Dockerfile @@ -9,9 +9,6 @@ ENV AIRFLOW_EXTRAS=async,all_dbs,celery,crypto,devel_hadoop,jdbc,ldap,password,r ENV AIRFLOW_HOME=/opt/airflow ENV AIRFLOW_CONFIG=/opt/airflow/airflow.cfg -ARG FERNET_KEY=4XHGZH0dZ40iOv6z5cyfrXVg5qg3s_d06A7BFfbSsqA= -ENV FERNET_KEY=${FERNET_KEY} - RUN set -xe \ && apk add --no-cache \ build-base \ @@ -23,7 +20,7 @@ RUN set -xe \ mariadb-dev \ postgresql-dev \ python3-dev \ - && pip install cython numpy \ + && pip install cython gunicorn numpy psycopg2-binary \ && pip install apache-airflow[${AIRFLOW_EXTRAS}]==${AIRFLOW_VERSION} \ && pip install "websocket-client<0.55.0,>=0.35" \ && apk del \ diff --git a/airflow/README.md b/airflow/README.md index 1cceb0a..acf3206 100644 --- a/airflow/README.md +++ b/airflow/README.md @@ -22,9 +22,6 @@ airflow ## Quick Start ```bash -$ python -c 'from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())' -4XHGZH0dZ40iOv6z5cyfrXVg5qg3s_d06A7BFfbSsqA= - $ docker stack deploy -c docker-stack.yaml airflow $ docker service update --replicas-max-per-node=1 airflow_worker $ docker service update --replicas 3 airflow_worker @@ -33,5 +30,11 @@ $ curl http://localhost:8080/ $ curl http://localhost:5555/ ``` -> :warning: This docker image was built with a static `FERNET_KEY` environment variable. -> You should set another value to it in `docker-stack.yaml`. +> :warning: You need to prepare nfs server with `airflow.cfg`. + +``` +$ python -c 'from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())' +CD2wL7G0zt1SLuO4JQpLJuHtBaBEcXWKbQyvkvf2cZ8= +``` + +> :warning: You should set another value to `fernet_key` in `airflow.cfg` to improve security. diff --git a/airflow/data/airflow.cfg b/airflow/data/airflow.cfg index 5a2d71f..b464eb0 100644 --- a/airflow/data/airflow.cfg +++ b/airflow/data/airflow.cfg @@ -1,43 +1,12 @@ -# -*- coding: utf-8 -*- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -# This is the template for Airflow's default configuration. When Airflow is -# imported, it looks for a configuration file at $AIRFLOW_HOME/airflow.cfg. If -# it doesn't exist, Airflow uses this template to generate it by replacing -# variables in curly braces with their global values from configuration.py. - -# Users should not modify this file; they should customize the generated -# airflow.cfg instead. - - -# ----------------------- TEMPLATE BEGINS HERE ----------------------- - [core] # The folder where your airflow pipelines live, most likely a # subfolder in a code repository # This path must be absolute -dags_folder = {AIRFLOW_HOME}/dags +dags_folder = /opt/airflow/dags # The folder where airflow should store its log files # This path must be absolute -base_log_folder = {AIRFLOW_HOME}/logs +base_log_folder = /opt/airflow/logs # Airflow can store logs remotely in AWS S3, Google Cloud Storage or Elastic Search. # Users must supply an Airflow connection id that provides access to the storage @@ -61,20 +30,16 @@ logging_config_class = # Log format # Colour the logs when the controlling terminal is a TTY. colored_console_log = True -colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {{%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d}} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s +colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter -log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s +log_format = [%%(asctime)s] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s -# Specify prefix pattern like mentioned below with stream handler TaskHandlerWithCustomFormatter -# task_log_prefix_template = {{ti.dag_id}}-{{ti.task_id}}-{{execution_date}}-{{try_number}} -task_log_prefix_template = - # Log filename format -log_filename_template = {{{{ ti.dag_id }}}}/{{{{ ti.task_id }}}}/{{{{ ts }}}}/{{{{ try_number }}}}.log -log_processor_filename_template = {{{{ filename }}}}.log -dag_processor_manager_log_location = {AIRFLOW_HOME}/logs/dag_processor_manager/dag_processor_manager.log +log_filename_template = {{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{ try_number }}.log +log_processor_filename_template = {{ filename }}.log +dag_processor_manager_log_location = /opt/airflow/logs/dag_processor_manager/dag_processor_manager.log # Hostname by providing a path to a callable, which will resolve the hostname # The format is "package:function". For example, @@ -94,7 +59,7 @@ executor = CeleryExecutor # The SqlAlchemy connection string to the metadata database. # SqlAlchemy supports many different database engine, more information # their website -sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres:5432/airflow +sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postges:5432/airflow # The encoding for the databases sql_engine_encoding = utf-8 @@ -122,10 +87,9 @@ sql_alchemy_max_overflow = 10 # a lower config value will allow the system to recover faster. sql_alchemy_pool_recycle = 1800 -# Check connection at the start of each connection pool checkout. -# Typically, this is a simple statement like “SELECT 1”. -# More information here: https://docs.sqlalchemy.org/en/13/core/pooling.html#disconnect-handling-pessimistic -sql_alchemy_pool_pre_ping = True +# How many seconds to retry re-establishing a DB connection after +# disconnects. Setting this to 0 disables retries. +sql_alchemy_reconnect_timeout = 300 # The schema to use for the metadata database # SqlAlchemy supports databases with the concept of multiple schemas. @@ -151,20 +115,17 @@ max_active_runs_per_dag = 16 load_examples = False # Where your Airflow plugins are stored -plugins_folder = {AIRFLOW_HOME}/plugins +plugins_folder = /opt/airflow/plugins # Secret key to save connection passwords in the db -fernet_key = {FERNET_KEY} +fernet_key = CD2wL7G0zt1SLuO4JQpLJuHtBaBEcXWKbQyvkvf2cZ8= # Whether to disable pickling dags -donot_pickle = True +donot_pickle = False -# How long before timing out a python file import +# How long before timing out a python file import while filling the DagBag dagbag_import_timeout = 30 -# How long before timing out a DagFileProcessor, which processes a dag file -dag_file_processor_timeout = 50 - # The class to use for running task instances in a subprocess task_runner = StandardTaskRunner @@ -195,8 +156,8 @@ enable_xcom_pickling = True # it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED killed_task_cleanup_time = 60 -# Whether to override params with dag_run.conf. If you pass some key-value pairs through `airflow dags backfill -c` or -# `airflow dags trigger -c`, the key-value pairs will override the existing ones in params. +# Whether to override params with dag_run.conf. If you pass some key-value pairs through `airflow backfill -c` or +# `airflow trigger_dag -c`, the key-value pairs will override the existing ones in params. dag_run_conf_overrides_params = False # Worker initialisation check to validate Metadata Database connection @@ -205,9 +166,6 @@ worker_precheck = False # When discovering DAGs, ignore any files that don't contain the strings `DAG` and `airflow`. dag_discovery_safe_mode = True -# The number of retries each task is going to have by default. Can be overridden at dag or task level. -default_task_retries = 0 - [cli] # In what way should the cli access the API. The LocalClient will use the @@ -247,9 +205,6 @@ default_gpus = 0 [hive] # Default mapreduce queue for HiveOperator tasks default_hive_mapred_queue = -# Template for mapred_job_name in HiveOperator, supports the following named parameters: -# hostname, dag_id, task_id, execution_date -mapred_job_name_template = Airflow HiveOperator task for {{hostname}}.{{dag_id}}.{{task_id}}.{{execution_date}} [webserver] # The base url of your website as airflow cannot guess what domain or @@ -283,8 +238,7 @@ worker_refresh_batch_size = 1 worker_refresh_interval = 30 # Secret key used to run your flask app -# It should be as random as possible -secret_key = {SECRET_KEY} +secret_key = temporary_key # Number of workers to run the Gunicorn web server workers = 4 @@ -298,8 +252,25 @@ access_logfile = - error_logfile = - # Expose the configuration file in the web server +# This is only applicable for the flask-admin based web UI (non FAB-based). +# In the FAB-based web UI with RBAC feature, +# access to configuration is controlled by role permissions. expose_config = False +# Set to true to turn on authentication: +# https://airflow.apache.org/security.html#web-authentication +authenticate = False + +# Filter the list of dags by owner name (requires authentication to be enabled) +filter_by_owner = False + +# Filtering mode. Choices include user (default) and ldapgroup. +# Ldap group filtering requires using the ldap backend +# +# Note that the ldap server needs the "memberOf" overlay to be set up +# in order to user the ldapgroup mode. +owner_mode = user + # Default DAG view. Valid values are: # tree, graph, duration, gantt, landing_times dag_default_view = tree @@ -323,6 +294,9 @@ hide_paused_dags_by_default = False # Consistent page size across all listing views in the UI page_size = 100 +# Use FAB-based webserver with RBAC feature +rbac = False + # Define the color of navigation bar navbar_color = #007A87 @@ -362,10 +336,6 @@ smtp_ssl = False smtp_port = 25 smtp_mail_from = airflow@example.com -[sentry] -# Sentry (https://docs.sentry.io) integration -sentry_dsn = - [celery] # This section only applies if you are using the CeleryExecutor in @@ -407,7 +377,7 @@ broker_url = redis://redis:6379/1 # This status is used by the scheduler to update the state of the task # The use of a database is highly recommended # http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-result-backend-settings -result_backend = db+postgresql://airflow:airflow@postgres/airflow +result_backend = db+postgresql://airflow:airflow@postges/airflow # Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start # it `airflow flower`. This defines the IP that Celery Flower runs on @@ -487,12 +457,9 @@ job_heartbeat_sec = 5 # how often the scheduler should run (in seconds). scheduler_heartbeat_sec = 5 -# The number of times to try to schedule each DAG file -# -1 indicates unlimited number -num_runs = -1 - -# The number of seconds to wait between consecutive DAG file processing -processor_poll_interval = 1 +# after how much time should the scheduler terminate in seconds +# -1 indicates to run continuously (see also num_runs) +run_duration = -1 # after how much time (seconds) a new DAGs should be picked up from the filesystem min_file_process_interval = 0 @@ -508,7 +475,7 @@ print_stats_interval = 30 # This is used by the health check in the "/health" endpoint scheduler_health_check_threshold = 30 -child_process_log_directory = {AIRFLOW_HOME}/logs/scheduler +child_process_log_directory = /opt/airflow/logs/scheduler # Local task jobs periodically heartbeat to the DB. If the job has # not heartbeat in this many seconds, the scheduler will mark the @@ -541,11 +508,6 @@ statsd_host = localhost statsd_port = 8125 statsd_prefix = airflow -# If you want to avoid send all the available metrics to StatsD, -# you can configure an allow list of prefixes to send only the metrics that -# start with the elements of the list (e.g: scheduler,executor,dagrun) -statsd_allow_list = - # The scheduler can run multiple threads in parallel to schedule dags. # This defines how many threads will run. max_threads = 2 @@ -574,6 +536,48 @@ search_scope = LEVEL # broken schema, or do not return a schema. ignore_malformed_schema = False +[mesos] +# Mesos master address which MesosExecutor will connect to. +master = localhost:5050 + +# The framework name which Airflow scheduler will register itself as on mesos +framework_name = Airflow + +# Number of cpu cores required for running one task instance using +# 'airflow run --local -p ' +# command on a mesos slave +task_cpu = 1 + +# Memory in MB required for running one task instance using +# 'airflow run --local -p ' +# command on a mesos slave +task_memory = 256 + +# Enable framework checkpointing for mesos +# See http://mesos.apache.org/documentation/latest/slave-recovery/ +checkpoint = False + +# Failover timeout in milliseconds. +# When checkpointing is enabled and this option is set, Mesos waits +# until the configured timeout for +# the MesosExecutor framework to re-register after a failover. Mesos +# shuts down running tasks if the +# MesosExecutor framework fails to re-register within this timeframe. +# failover_timeout = 604800 + +# Enable framework authentication for mesos +# See http://mesos.apache.org/documentation/latest/configuration/ +authenticate = False + +# Mesos credentials, if authentication is enabled +# default_principal = admin +# default_secret = admin + +# Optional Docker Image to run on slave before running the command +# This image should be accessible from mesos slave i.e mesos slave +# should be able to pull this docker image before executing the command. +# docker_image_slave = puckel/docker-airflow + [kerberos] ccache = /tmp/airflow_krb5_ccache # gets augmented with fqdn @@ -594,7 +598,7 @@ hide_sensitive_variable_fields = True # Elasticsearch host host = # Format of the log_id, which is used to query for a given tasks logs -log_id_template = {{dag_id}}-{{task_id}}-{{execution_date}}-{{try_number}} +log_id_template = {dag_id}-{task_id}-{execution_date}-{try_number} # Used to mark the end of a log stream for a task end_of_log_mark = end_of_log # Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id @@ -674,7 +678,7 @@ git_password = git_sync_root = /git git_sync_dest = repo # Mount point of the volume if git-sync is being used. -# i.e. {AIRFLOW_HOME}/dags +# i.e. /opt/airflow/dags git_dags_folder_mount_point = # To get Git-sync SSH authentication set up follow this format @@ -765,7 +769,9 @@ tolerations = # List of supported params in **kwargs are similar for all core_v1_apis, hence a single config variable for all apis # See: # https://raw.githubusercontent.com/kubernetes-client/python/master/kubernetes/client/apis/core_v1_api.py -kube_client_request_args = +# Note that if no _request_timeout is specified, the kubernetes client will wait indefinitely for kubernetes +# api responses, which will cause the scheduler to hang. The timeout is specified as [connect timeout, read timeout] +kube_client_request_args = {"_request_timeout" : [60,60] } # Worker pods security context options # See: @@ -779,17 +785,15 @@ run_as_user = # that allows for the key to be read, e.g. 65533 fs_group = -# Annotations configuration as a single line formatted JSON object. -# See the naming convention in: -# https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ -worker_annotations = - - [kubernetes_node_selectors] # The Key-value pairs to be given to worker pods. # The worker pods will be scheduled to the nodes of the specified key-value pairs. # Should be supplied in the format: key = value +[kubernetes_annotations] +# The Key-value annotations pairs to be given to worker pods. +# Should be supplied in the format: key = value + [kubernetes_environment_variables] # The scheduler sets the following environment variables into your workers. You may define as # many environment variables as needed and the kubernetes launcher will set them in the launched workers. diff --git a/airflow/data/default_airflow.cfg b/airflow/data/default_airflow.cfg index ae46feb..8d9d360 100644 --- a/airflow/data/default_airflow.cfg +++ b/airflow/data/default_airflow.cfg @@ -67,10 +67,6 @@ colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatte log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s -# Specify prefix pattern like mentioned below with stream handler TaskHandlerWithCustomFormatter -# task_log_prefix_template = {{ti.dag_id}}-{{ti.task_id}}-{{execution_date}}-{{try_number}} -task_log_prefix_template = - # Log filename format log_filename_template = {{{{ ti.dag_id }}}}/{{{{ ti.task_id }}}}/{{{{ ts }}}}/{{{{ try_number }}}}.log log_processor_filename_template = {{{{ filename }}}}.log @@ -122,10 +118,9 @@ sql_alchemy_max_overflow = 10 # a lower config value will allow the system to recover faster. sql_alchemy_pool_recycle = 1800 -# Check connection at the start of each connection pool checkout. -# Typically, this is a simple statement like “SELECT 1”. -# More information here: https://docs.sqlalchemy.org/en/13/core/pooling.html#disconnect-handling-pessimistic -sql_alchemy_pool_pre_ping = True +# How many seconds to retry re-establishing a DB connection after +# disconnects. Setting this to 0 disables retries. +sql_alchemy_reconnect_timeout = 300 # The schema to use for the metadata database # SqlAlchemy supports databases with the concept of multiple schemas. @@ -157,14 +152,11 @@ plugins_folder = {AIRFLOW_HOME}/plugins fernet_key = {FERNET_KEY} # Whether to disable pickling dags -donot_pickle = True +donot_pickle = False -# How long before timing out a python file import +# How long before timing out a python file import while filling the DagBag dagbag_import_timeout = 30 -# How long before timing out a DagFileProcessor, which processes a dag file -dag_file_processor_timeout = 50 - # The class to use for running task instances in a subprocess task_runner = StandardTaskRunner @@ -195,8 +187,8 @@ enable_xcom_pickling = True # it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED killed_task_cleanup_time = 60 -# Whether to override params with dag_run.conf. If you pass some key-value pairs through `airflow dags backfill -c` or -# `airflow dags trigger -c`, the key-value pairs will override the existing ones in params. +# Whether to override params with dag_run.conf. If you pass some key-value pairs through `airflow backfill -c` or +# `airflow trigger_dag -c`, the key-value pairs will override the existing ones in params. dag_run_conf_overrides_params = False # Worker initialisation check to validate Metadata Database connection @@ -205,9 +197,6 @@ worker_precheck = False # When discovering DAGs, ignore any files that don't contain the strings `DAG` and `airflow`. dag_discovery_safe_mode = True -# The number of retries each task is going to have by default. Can be overridden at dag or task level. -default_task_retries = 0 - [cli] # In what way should the cli access the API. The LocalClient will use the @@ -247,9 +236,6 @@ default_gpus = 0 [hive] # Default mapreduce queue for HiveOperator tasks default_hive_mapred_queue = -# Template for mapred_job_name in HiveOperator, supports the following named parameters: -# hostname, dag_id, task_id, execution_date -mapred_job_name_template = Airflow HiveOperator task for {{hostname}}.{{dag_id}}.{{task_id}}.{{execution_date}} [webserver] # The base url of your website as airflow cannot guess what domain or @@ -283,8 +269,7 @@ worker_refresh_batch_size = 1 worker_refresh_interval = 30 # Secret key used to run your flask app -# It should be as random as possible -secret_key = {SECRET_KEY} +secret_key = temporary_key # Number of workers to run the Gunicorn web server workers = 4 @@ -298,8 +283,25 @@ access_logfile = - error_logfile = - # Expose the configuration file in the web server +# This is only applicable for the flask-admin based web UI (non FAB-based). +# In the FAB-based web UI with RBAC feature, +# access to configuration is controlled by role permissions. expose_config = False +# Set to true to turn on authentication: +# https://airflow.apache.org/security.html#web-authentication +authenticate = False + +# Filter the list of dags by owner name (requires authentication to be enabled) +filter_by_owner = False + +# Filtering mode. Choices include user (default) and ldapgroup. +# Ldap group filtering requires using the ldap backend +# +# Note that the ldap server needs the "memberOf" overlay to be set up +# in order to user the ldapgroup mode. +owner_mode = user + # Default DAG view. Valid values are: # tree, graph, duration, gantt, landing_times dag_default_view = tree @@ -323,6 +325,9 @@ hide_paused_dags_by_default = False # Consistent page size across all listing views in the UI page_size = 100 +# Use FAB-based webserver with RBAC feature +rbac = False + # Define the color of navigation bar navbar_color = #007A87 @@ -362,10 +367,6 @@ smtp_ssl = False smtp_port = 25 smtp_mail_from = airflow@example.com -[sentry] -# Sentry (https://docs.sentry.io) integration -sentry_dsn = - [celery] # This section only applies if you are using the CeleryExecutor in @@ -487,12 +488,9 @@ job_heartbeat_sec = 5 # how often the scheduler should run (in seconds). scheduler_heartbeat_sec = 5 -# The number of times to try to schedule each DAG file -# -1 indicates unlimited number -num_runs = -1 - -# The number of seconds to wait between consecutive DAG file processing -processor_poll_interval = 1 +# after how much time should the scheduler terminate in seconds +# -1 indicates to run continuously (see also num_runs) +run_duration = -1 # after how much time (seconds) a new DAGs should be picked up from the filesystem min_file_process_interval = 0 @@ -541,11 +539,6 @@ statsd_host = localhost statsd_port = 8125 statsd_prefix = airflow -# If you want to avoid send all the available metrics to StatsD, -# you can configure an allow list of prefixes to send only the metrics that -# start with the elements of the list (e.g: scheduler,executor,dagrun) -statsd_allow_list = - # The scheduler can run multiple threads in parallel to schedule dags. # This defines how many threads will run. max_threads = 2 @@ -574,6 +567,48 @@ search_scope = LEVEL # broken schema, or do not return a schema. ignore_malformed_schema = False +[mesos] +# Mesos master address which MesosExecutor will connect to. +master = localhost:5050 + +# The framework name which Airflow scheduler will register itself as on mesos +framework_name = Airflow + +# Number of cpu cores required for running one task instance using +# 'airflow run --local -p ' +# command on a mesos slave +task_cpu = 1 + +# Memory in MB required for running one task instance using +# 'airflow run --local -p ' +# command on a mesos slave +task_memory = 256 + +# Enable framework checkpointing for mesos +# See http://mesos.apache.org/documentation/latest/slave-recovery/ +checkpoint = False + +# Failover timeout in milliseconds. +# When checkpointing is enabled and this option is set, Mesos waits +# until the configured timeout for +# the MesosExecutor framework to re-register after a failover. Mesos +# shuts down running tasks if the +# MesosExecutor framework fails to re-register within this timeframe. +# failover_timeout = 604800 + +# Enable framework authentication for mesos +# See http://mesos.apache.org/documentation/latest/configuration/ +authenticate = False + +# Mesos credentials, if authentication is enabled +# default_principal = admin +# default_secret = admin + +# Optional Docker Image to run on slave before running the command +# This image should be accessible from mesos slave i.e mesos slave +# should be able to pull this docker image before executing the command. +# docker_image_slave = puckel/docker-airflow + [kerberos] ccache = /tmp/airflow_krb5_ccache # gets augmented with fqdn @@ -765,7 +800,9 @@ tolerations = # List of supported params in **kwargs are similar for all core_v1_apis, hence a single config variable for all apis # See: # https://raw.githubusercontent.com/kubernetes-client/python/master/kubernetes/client/apis/core_v1_api.py -kube_client_request_args = +# Note that if no _request_timeout is specified, the kubernetes client will wait indefinitely for kubernetes +# api responses, which will cause the scheduler to hang. The timeout is specified as [connect timeout, read timeout] +kube_client_request_args = {{"_request_timeout" : [60,60] }} # Worker pods security context options # See: @@ -779,17 +816,15 @@ run_as_user = # that allows for the key to be read, e.g. 65533 fs_group = -# Annotations configuration as a single line formatted JSON object. -# See the naming convention in: -# https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ -worker_annotations = - - [kubernetes_node_selectors] # The Key-value pairs to be given to worker pods. # The worker pods will be scheduled to the nodes of the specified key-value pairs. # Should be supplied in the format: key = value +[kubernetes_annotations] +# The Key-value annotations pairs to be given to worker pods. +# Should be supplied in the format: key = value + [kubernetes_environment_variables] # The scheduler sets the following environment variables into your workers. You may define as # many environment variables as needed and the kubernetes launcher will set them in the launched workers.