How to deploy elasticsearch with docker swarm? - docker

I create 3 virtual machine use docker-machine,there are:
NAME ACTIVE DRIVER STATE URL SWARM DOCKER ERRORS
cluster - virtualbox Running tcp://192.168.99.101:2376 v18.09.5
cluster2 - virtualbox Running tcp://192.168.99.102:2376 v18.09.5
master - virtualbox Running tcp://192.168.99.100:2376 v18.09.5
and then I create a docker swarm in master machine:
docker-machine ssh master "docker swarm init ----advertise-addr 192.168.99.100"
and in cluster and cluster2 join master:
docker-machine ssh cluster "docker swarm join --advertise-addr 192.168.99.101 --token xxxx 192.168.99.100:2377"
docker-machine ssh cluster2 "docker swarm join --advertise-addr 192.168.99.102 --token xxxx 192.168.99.100:2377"
the docker node ls info:
ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION
r4a6y9wie4zp3pl4wi4e6wqp8 cluster Ready Active 18.09.5
sg9gq6s3k6vty7qap7co6eppn cluster2 Ready Active 18.09.5
xb6telu8cn3bfmume1kcektkt * master Ready Active Leader 18.09.5
there is deploy config swarm.yml:
version: "3.3"
services:
elasticsearch:
image: elasticsearch:7.0.0
ports:
- "9200:9200"
- "9300:9300"
environment:
- cluster.name=elk
- network.host=_eth1:ipv4_
- network.bind_host=_eth1:ipv4_
- network.publish_host=_eth1:ipv4_
- discovery.seed_hosts=192.168.99.100,192.168.99.101
- cluster.initial_master_nodes=192.168.99.100,192.168.99.101
- bootstrap.memory_lock=false
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
networks:
- backend
deploy:
mode: replicated
replicas: 3
#endpoint_mode: dnsrr
restart_policy:
condition: none
resources:
limits:
cpus: "1.0"
memory: "1024M"
reservations:
memory: 20M
networks:
backend:
# driver: overlay
# attachable: true
i pull elasticsearch image to virtual machie:
docker-machine ssh master "docker image pull elasticsearch:7.0.0"
docker-machine ssh cluster "docker image pull elasticsearch:7.0.0"
docker-machine ssh cluster2 "docker image pull elasticsearch:7.0.0"
before run i run this command fix some elasticearch bootstrap error:
docker-machine ssh master "sudo sysctl -w vm.max_map_count=262144"
docker-machine ssh cluster "sudo sysctl -w vm.max_map_count=262144"
docker-machine ssh cluster2 "sudo sysctl -w vm.max_map_count=262144"
and then i run `docker stack deploy -c swarm.yml es, the elasticsearch cluster cannot work.
docker-machine ssh master
docker service logs es_elasticsearch -f
show:
es_elasticsearch.1.uh1x0s9qr7mb#cluster | {"type": "server", "timestamp": "2019-04-25T16:28:47,143+0000", "level": "WARN", "component": "o.e.c.c.ClusterFormationFailureHelper", "cluster.name": "elk", "node.name": "e8dba5562417", "message": "master not discovered yet, this node has not previously joined a bootstrapped (v7+) cluster, and this node must discover master-eligible nodes [192.168.99.100, 192.168.99.101] to bootstrap a cluster: have discovered []; discovery will continue using [192.168.99.100:9300, 192.168.99.101:9300] from hosts providers and [{e8dba5562417}{Jy3t0AAkSW-jY-IygOCjOQ}{z7MYIf5wTfOhCX1r25wNPg}{10.255.0.46}{10.255.0.46:9300}{ml.machine_memory=1037410304, xpack.installed=true, ml.max_open_jobs=20}] from last-known cluster state; node term 0, last-accepted version 0 in term 0" }
es_elasticsearch.2.swswlwmle9e9#cluster2 | {"type": "server", "timestamp": "2019-04-25T16:28:47,389+0000", "level": "WARN", "component": "o.e.c.c.ClusterFormationFailureHelper", "cluster.name": "elk", "node.name": "af5d88a04b42", "message": "master not discovered yet, this node has not previously joined a bootstrapped (v7+) cluster, and this node must discover master-eligible nodes [192.168.99.100, 192.168.99.101] to bootstrap a cluster: have discovered []; discovery will continue using [192.168.99.100:9300, 192.168.99.101:9300] from hosts providers and [{af5d88a04b42}{zhxMeNMAQN2evKDlsA33qA}{fpYPTvJ6STmyqrgxlMkD_w}{10.255.0.47}{10.255.0.47:9300}{ml.machine_memory=1037410304, xpack.installed=true, ml.max_open_jobs=20}] from last-known cluster state; node term 0, last-accepted version 0 in term 0" }
es_elasticsearch.3.x8ouukovhh80#master | {"type": "server", "timestamp": "2019-04-25T16:28:48,818+0000", "level": "WARN", "component": "o.e.c.c.ClusterFormationFailureHelper", "cluster.name": "elk", "node.name": "0e7e4d96b31a", "message": "master not discovered yet, this node has not previously joined a bootstrapped (v7+) cluster, and this node must discover master-eligible nodes [192.168.99.100, 192.168.99.101] to bootstrap a cluster: have discovered []; discovery will continue using [192.168.99.100:9300, 192.168.99.101:9300] from hosts providers and [{0e7e4d96b31a}{Xs9966RjTEWvEbuj4-ySYA}{-eV4lvavSHq6JhoW0qWu6A}{10.255.0.48}{10.255.0.48:9300}{ml.machine_memory=1037410304, xpack.installed=true, ml.max_open_jobs=20}] from last-known cluster state; node term 0, last-accepted version 0 in term 0" }
I guess the cluster formation failed may be due to network configuration error. I don't know how to fix it, I try many times modify the config, fail and fail again.

try, this is working :) docker-compose.yml
version: "3.7"
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.2.0
hostname: "{{.Node.Hostname}}"
environment:
- node.name={{.Node.Hostname}}
- cluster.name=my-cluster
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
- discovery.seed_hosts=elasticsearch
- cluster.initial_master_nodes=node1,node2,node3
- node.ml=false
- xpack.ml.enabled=false
- xpack.monitoring.enabled=false
- xpack.security.enabled=false
- xpack.watcher.enabled=false
- bootstrap.memory_lock=false
volumes:
- elasticsearch-data:/usr/share/elasticsearch/data
deploy:
mode: global
endpoint_mode: dnsrr
resources:
limits:
memory: 4G
nginx:
image: nginx:1.17.1-alpine
ports:
- 9200:9200
deploy:
mode: global
command: |
/bin/sh -c "echo '
user nobody nogroup;
worker_processes auto;
events {
worker_connections 1024;
}
http {
client_max_body_size 4g;
resolver 127.0.0.11 ipv6=off;
server {
listen *:9200;
location / {
proxy_set_header Connection keep-alive;
set $$url http://elasticsearch:9200;
proxy_pass $$url;
proxy_set_header Host $$http_host;
proxy_set_header X-Real-IP $$remote_addr;
proxy_set_header X-Forwarded-For $$proxy_add_x_forwarded_for;
}
}
}' | tee /etc/nginx/nginx.conf && nginx -t && nginx -g 'daemon off;'"
volumes:
elasticsearch-data:

Trying to manually specify all the specific IP's and bindings is tricky because of the swarm overlaying network.
Instead, simply make your ES nodes discoverable and let Swarm take care of the node discovery and communication. To make them discoverable, we can use a predictable name like the Swarm node hostname.
Try change your environment settings in the swarm.yml file as follows:
environment:
- network.host=0.0.0.0
- discovery.seed_hosts=elasticsearch #Service name, to let Swarm handle discovery
- cluster.initial_master_nodes=master,cluster,cluster2 #Swarm nodes host names
- node.name={{.Node.Hostname}} #To create a predictable node name
This of course assumes that we already known the swarm hostnames, which you pointed out in the screenshot above. Without knowing these values, we would have no way of having a predictable set of node names to look for. In that case, you could create 1 ES node entry with a particular node name, and then another entry which references the first entry's node name as the cluster.initial_master_nodes.

Use dnsrr mode without ports. Expose elasticsearch with nginx ;)
See my docker-compose.yml

In my experience https://github.com/shazChaudhry/docker-elastic works perfectly, and just one file from the entire repo is enough. I downloaded https://github.com/shazChaudhry/docker-elastic/blob/master/docker-compose.yml and removed the logstash bits, I didn't need that. Then added the following to .bashrc
export ELASTICSEARCH_HOST=$(hostname)
export ELASTICSEARCH_PASSWORD=foobar
export ELASTICSEARCH_USERNAME=elastic
export ELASTIC_VERSION=7.4.2
export INITIAL_MASTER_NODES=$ELASTICSEARCH_HOST
And docker stack deploy --compose-file docker-compose.yml elastic works.

Ideas I gleaned from Ahmet Vehbi Olgaç 's docker-compose.yml, which worked for me:
Use deployment / mode: global. This will cause the swarm to deploy one replica to each swarm worker, for each node that is configured like this.
Use deployment / endpoint_mode: dnsrr. This will let all containers in the swarm access the nodes by the service name.
Use hostname: {{.Node.Hostname}} or a similar template-based expression. This ensures a unique name for each deployed container.
Use environment / node.name={{.Node.Hostname}}. Again, you can vary the pattern. The point is that each es node should get a unique name.
Use cluster.initial_master_nodes=*hostname1*,*hostname2*,.... Assuming you know the hostnames of your docker worker machines. Use whatever pattern you used in #3, but substitute out the whole hostname, and include all the hostnames.
If you don't know your hostnames, you can do what Andrew Cachia's answer suggests: set up one container (do not replicate it) to act solely as the master seed and give it a predictable hostname, then have all other nodes refer to that node as the master seed. However, this introduces a single point of failure.

Elasticsearch 8.5.0 answer.
For my needs, I didn't want to add a reverse-proxy/load balancer, but I do want to expose port 9200 on the swarm nodes where Elasticsearch replicas are running (using just swarm), so that external clients can access the Elasticsearch REST API. So I used endpoint mode dnsrr (ref) and exposed port 9200 on the hosts where the replicas run.
If you don't need to expose port 9200 (i.e., nothing will connect to the elasticsearch replicas outside of swarm), remove the ports: config from the elasticsearch service.
I also only want elasticsearch replicas to run on a subset of my swarm nodes (3 of them). I created docker node label elasticsearch on those three nodes. Then mode: global and constraint node.labels.elasticsearch==True will ensure 1 replica runs on each of those nodes.
I run kibana on one of those 3 nodes too: swarm can pick which one, since port 5601 is exposed on swarm's ingress overlay network.
Lines you'll likely need to edit are maked with ######.
# docker network create -d overlay --attachable elastic-net
# cat elastic-stack-env
#!/bin/bash
export STACK_VERSION=8.5.0 # Elasticsearch and Kibana version
export ES_PORT=9200 # port to expose Elasticsearch HTTP API to the host
export KIBANA_PORT=5601 # port to expose Kibana to the host
read -p "Enter elastic user password: " ELASTIC_PASSWORD
read -p "Enter kibana_system user password: " KIBANA_PASSWORD
export KIBANA_URL=https://kibana.my-domain.com:$KIBANA_PORT #######
export SHARED_DIR=/some/nfs/or/shared/storage/elastic #######
export KIBANA_SSL_KEY_PATH=config/certs/kibana.key
export KIBANA_SSL_CERT_PATH=config/certs/kibana.crt
export ELASTIC_NODES=swarm_node1,swarm_node2,swarm_node3 #######
# ELASTIC_NODES must match what docker reports from {{.Node.Hostname}}
export KIBANA_SSL_CERT_AUTH_PATH=config/certs/My_Root_CA.crt #######
export CLUSTER_NAME=docker-cluster
export MEM_LIMIT=4294967296 # 4 GB; increase or decrease based on the available host memory (in bytes)
# cat elastic-stack.yml
version: "3.8"
services:
elasticsearch:
image: localhost:5000/elasticsearch:${STACK_VERSION:?} ####### I have a local registry
deploy:
endpoint_mode: dnsrr
mode: global # but note constraints below
placement:
constraints:
- node.labels.elasticsearch==True
resources:
limits:
memory:
${MEM_LIMIT}
dns: 127.0.0.11 # use docker DNS only (may not be required)
networks:
- elastic-net
volumes:
- ${SHARED_DIR:?}/certs:/usr/share/elasticsearch/config/certs
- /path/to/some/local/storage/elasticsearch:/usr/share/elasticsearch/data
ports: ##### remove if nothing outside of swarm needs to access port 9200
- target: 9200
published: ${ES_PORT} # we publish this port so that external clients can access the ES REST API
protocol: tcp
mode: host # required when using dnsrr
environment: # https://www.elastic.co/guide/en/elasticsearch/reference/master/settings.html
# https://www.elastic.co/guide/en/elasticsearch/reference/master/docker.html#docker-configuration-methods
- node.name={{.Node.Hostname}} # see Andrew Cachia's answer
- cluster.name=${CLUSTER_NAME}
- discovery.seed_hosts=elasticsearch # use service name here, since (docker's) DNS is used:
# https://www.elastic.co/guide/en/elasticsearch/reference/current/important-settings.html#unicast.hosts
- cluster.initial_master_nodes=${ELASTIC_NODES} # use node.names here
# https://www.elastic.co/guide/en/elasticsearch/reference/current/important-settings.html#initial_master_nodes
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
- xpack.security.enabled=true
- xpack.security.http.ssl.enabled=true
- xpack.security.http.ssl.key=certs/elasticsearch/elasticsearch.key
- xpack.security.http.ssl.certificate=certs/elasticsearch/elasticsearch.crt
- xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt
- xpack.security.http.ssl.verification_mode=certificate
- xpack.security.transport.ssl.enabled=true
- xpack.security.transport.ssl.key=certs/elasticsearch/elasticsearch.key
- xpack.security.transport.ssl.certificate=certs/elasticsearch/elasticsearch.crt
- xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
- xpack.security.transport.ssl.verification_mode=certificate
- xpack.license.self_generated.type=basic
healthcheck:
test:
[ "CMD-SHELL",
"curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
]
interval: 10s
timeout: 10s
retries: 120
logging: # we use rsyslog
driver: syslog
options:
syslog-facility: "local2"
kibana:
# this service depends on the setup service (defined below), but docker stack has no
# way to specify dependencies, but more importantly, there's been a move away from this:
# https://stackoverflow.com/a/47714157/215945
image: localhost:5000/kibana:${STACK_VERSION:?} ######
hostname: kibana
deploy:
placement:
constraints:
- node.labels.elasticsearch==True # run KB on any one of the ES nodes
resources:
limits:
memory:
${MEM_LIMIT}
dns: 127.0.0.11 # use docker DNS only (may not be required)
networks:
- elastic-net
volumes:
- ${SHARED_DIR:?}/kibana:/usr/share/kibana/data
- ${SHARED_DIR:?}/certs:/usr/share/kibana/config/certs
ports:
- ${KIBANA_PORT}:5601
environment: # https://www.elastic.co/guide/en/kibana/master/settings.html
# https://www.elastic.co/guide/en/kibana/master/docker.html#environment-variable-config
# CAPS_WITH_UNDERSCORES must be used with Kibana
- SERVER_NAME=kibana
- ELASTICSEARCH_HOSTS=["https://elasticsearch:9200"]
- ELASTICSEARCH_USERNAME=kibana_system
- ELASTICSEARCH_PASSWORD=${KIBANA_PASSWORD}
- ELASTICSEARCH_SSL_CERTIFICATEAUTHORITIES=config/certs/ca/ca.crt
- SERVER_PUBLICBASEURL=${KIBANA_URL}
# if you don't want to use https/TLS with Kibana, comment-out
# the next four lines
- SERVER_SSL_ENABLED=true
- SERVER_SSL_KEY=${KIBANA_SSL_KEY_PATH}
- SERVER_SSL_CERTIFICATE=${KIBANA_SSL_CERT_PATH}
- SERVER_SSL_CERTIFICATEAUTHORITIES=${KIBANA_SSL_CERT_AUTH_PATH}
- TELEMETRY_OPTIN=false
healthcheck:
test:
[
"CMD-SHELL",
"curl -sIk https://localhost:5601 | grep -q 'HTTP/1.1 302 Found'",
]
interval: 10s
timeout: 10s
retries: 120
logging:
driver: syslog
options:
syslog-facility: "local2"
setup:
image: localhost:5000/elasticsearch:${STACK_VERSION:?} #######
deploy:
placement:
constraints:
- node.labels.elasticsearch==True
restart_policy: # https://docs.docker.com/compose/compose-file/compose-file-v3/#restart_policy
condition: none
volumes:
- ${SHARED_DIR:?}/certs:/usr/share/elasticsearch/config/certs
dns: 127.0.0.11 # use docker DNS only (may not be required)
networks:
- elastic-net
command: >
bash -c '
until curl -s --cacert config/certs/ca/ca.crt https://elasticsearch:9200 | grep -q "missing authentication credentials"
do
echo "waiting 30 secs for Elasticsearch availability..."
sleep 30
done
echo "setting kibana_system password"
until curl -s -X POST --cacert config/certs/ca/ca.crt -u elastic:${ELASTIC_PASSWORD} -H "Content-Type: application/json" https://elasticsearch:9200/_security/user/kibana_system/_password -d "{\"password\":\"${KIBANA_PASSWORD}\"}" | grep -q "^{}"
do
echo "waiting 10 secs before trying to set password again..."
sleep 10
done
echo "done"
'
logging:
driver: syslog
options:
syslog-facility: "local2"
networks:
elastic-net:
external: true
Deploy:
# . ./elastic-stack-env
# docker stack deploy -c elastic-stack.yml elastic
# # ... after Kibana comes up, you can remove the setup service if you want:
# docker service rm elastic_setup
Here's how I created the Elasticsearch CA and cert:
# cat elastic-certs.yml
version: "3.8"
services:
setup:
image: localhost:5000/elasticsearch:${STACK_VERSION:?} #######
volumes:
- ${SHARED_DIR:?}/certs:/usr/share/elasticsearch/config/certs
user: "0:0"
command: >
bash -c '
if [ ! -f certs/ca.zip ]; then
echo "Creating CA";
bin/elasticsearch-certutil ca --silent --pem -out config/certs/ca.zip;
unzip config/certs/ca.zip -d config/certs;
fi;
if [ ! -f certs/certs.zip ]; then
echo "Creating certs";
echo -ne \
"instances:\n"\
" - name: elasticsearch\n"\
" dns:\n"\
" - elasticsearch\n"\
" - localhost\n"\
" ip:\n"\
" - 127.0.0.1\n"\
> config/certs/instances.yml;
bin/elasticsearch-certutil cert --silent --pem -out config/certs/certs.zip --in config/certs/instances.yml --ca-cert config/certs/ca/ca.crt --ca-key config/certs/ca/ca.key;
unzip config/certs/certs.zip -d config/certs;
echo "Setting file permissions"
chown -R root:root config/certs;
find . -type d -exec chmod 750 \{\} \;;
find . -type f -exec chmod 640 \{\} \;;
fi;
sleep infinity
'
healthcheck:
test: ["CMD-SHELL", "[ -f config/certs/elasticsearch/elasticsearch.crt ]"]
interval: 1s
timeout: 5s
retries: 120
# . ./elastic-stack-env
# docker stack deploy -c elastic-certs.yml elastic-certs
# # ... ensure files are created under $SHARED_DIR/certs, then
# docker stack rm elastic-certs
How I created the Kibana cert is outside the scope of this question.
I run a Fluent Bit swarm service (mode: global, docker network elastic-net) to send logs to the elasticsearch service. Although outside the scope of this question, here's the salient config:
[OUTPUT]
name es
match <whatever is appropriate for you here>
host elasticsearch
port 9200
index my-index-default
http_user fluentbit
http_passwd ${FLUENTBIT_PASSWORD}
tls on
tls.ca_file /certs/ca/ca.crt
tls.crt_file /certs/elasticsearch/elasticsearch.crt
tls.key_file /certs/elasticsearch/elasticsearch.key
retry_limit false
suppress_type_name on
# trace_output on
Host elasticsearch will be resolved by docker's DNS server to the three IP addresses of the elasticsearch replicas, so there is no single point of failure.

Related

Running ELK on docker, Kibana says: Unable to retrieve version information from Elasticsearch nodes

I was referring to example given in the elasticsearch documentation for starting elastic stack (elastic and kibana) on docker using docker compose. It gives example of docker compose version 2.2 file. So, I tried to convert it to docker compose version 3.8 file. Also, it creates three elastic nodes and has security enabled. I want to keep it minimal to start with. So I tried to turn off security and also reduce the number of elastic nodes to 2. This is how my current compose file looks like:
version: "3.8"
services:
es01:
image: docker.elastic.co/elasticsearch/elasticsearch:8.0.0-amd64
volumes:
- esdata01:/usr/share/elasticsearch/data
ports:
- 9200:9200
environment:
- node.name=es01
- cluster.name=docker-cluster
- cluster.initial_master_nodes=es01
- bootstrap.memory_lock=true
- xpack.security.enabled=false
deploy:
resources:
limits:
memory: 1g
ulimits:
memlock:
soft: -1
hard: -1
healthcheck:
# [
# "CMD-SHELL",
# # "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
# ]
# Changed to:
test: ["CMD-SHELL", "curl -f http://localhost:9200 || exit 1"]
interval: 10s
timeout: 10s
retries: 120
kibana:
depends_on:
- es01
image: docker.elastic.co/kibana/kibana:8.0.0-amd64
volumes:
- kibanadata:/usr/share/kibana/data
ports:
- 5601:5601
environment:
- SERVERNAME=kibana
- ELASTICSEARCH_HOSTS=https://localhost:9200
deploy:
resources:
limits:
memory: 1g
healthcheck:
test:
[
"CMD-SHELL",
"curl -s -I http://localhost:5601 | grep -q 'HTTP/1.1 302 Found'",
]
interval: 10s
timeout: 10s
retries: 120
volumes:
esdata01:
driver: local
kibanadata:
driver: local
Then, I tried to run it:
docker stack deploy -c docker-compose.nosec.noenv.yml elk
Creating network elk_default
Creating service elk_es01
Creating service elk_kibana
When I tried to check their status, it displayed following:
$ docker container list
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
3dcd08134e38 docker.elastic.co/kibana/kibana:8.0.0-amd64 "/bin/tini -- /usr/l…" 3 minutes ago Up 3 minutes (health: starting) 5601/tcp elk_kibana.1.ng8aspz9krfnejfpsnqzl2sci
7b548a43c45c docker.elastic.co/elasticsearch/elasticsearch:8.0.0-amd64 "/bin/tini -- /usr/l…" 3 minutes ago Up 3 minutes (healthy) 9200/tcp, 9300/tcp elk_es01.1.d9a107j6wkz42shti3n6kpfmx
I noticed that kibana's status gets stuck at (health: starting). When I checked Kibana's logs with command docker service logs -f elk_kibana, it had following WARN and ERROR lines:
[WARN ][plugins.security.config] Generating a random key for xpack.security.encryptionKey. To prevent sessions from being invalidated on restart, please set xpack.security.encryptionKey in the kibana.yml or use the bin/kibana-encryption-keys command.
[WARN ][plugins.security.config] Session cookies will be transmitted over insecure connections. This is not recommended.
[WARN ][plugins.security.config] Generating a random key for xpack.security.encryptionKey. To prevent sessions from being invalidated on restart, please set xpack.security.encryptionKey in the kibana.yml or use the bin/kibana-encryption-keys command.
[WARN ][plugins.security.config] Session cookies will be transmitted over insecure connections. This is not recommended.
[WARN ][plugins.reporting.config] Generating a random key for xpack.reporting.encryptionKey. To prevent sessions from being invalidated on restart, please set xpack.reporting.encryptionKey in the kibana.yml or use the bin/kibana-encryption-keys command.
[WARN ][plugins.reporting.config] Found 'server.host: "0.0.0.0"' in Kibana configuration. Reporting is not able to use this as the Kibana server hostname. To enable PNG/PDF Reporting to work, 'xpack.reporting.kibanaServer.hostname: localhost' is automatically set in the configuration. You can prevent this message by adding 'xpack.reporting.kibanaServer.hostname: localhost' in kibana.yml.
[ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 127.0.0.1:9200
It seems that kibana is not able to connect with Elasticsearch, but why? Is it because of disabling of security and that we cannot have security disabled?
PS-1: Earlier, when I set elasticsearch host as follows in kibana's environment in the docker compose file:
ELASTICSEARCH_HOSTS=https://es01:9200 # that is 'es01' instead of `localhost`
it gave me following error:
[ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. getaddrinfo ENOTFOUND es01
So, after checking this question, I changed es01 to localhost as specified earlier (that is in complete docker compose file content before PS-1.)
PS-2: Replacing localhost with 192.168.0.104 gives following error
[ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.0.104:9200
[ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. write EPROTO 140274197346240:error:1408F10B:SSL routines:ssl3_get_record:wrong version number:../deps/openssl/openssl/ssl/record/ssl3_record.c:332:
Try this :
ELASTICSEARCH_HOSTS=http://es01:9200
I don't know why it can run in my PC, since Elasticsearch is supossed use SSL. But in your case using http working just fine.

Got issue in acessing url from outside docker container

I have a docker container inside which Prometheus metrics is running on port 127.0.0.1:9615
I want to access those metrics from my host machine so I did the port binding 0.0.0.0:9615->9615. But still not able to curl that url localhost:9615/metrics gives me a response
curl: (56) Recv failure: Connection reset by peer
My docker-compose file looks like that
version: '2'
services:
polkadot:
container_name: polkadot
image: parity/polkadot
ports:
- 30333:30333 # p2p port
- 9933:9933 # rpc port
- 9944:9944 # ws port
- 9615:9615
command: [
"--name", "PolkaDocker",
"--ws-external",
"--rpc-external",
"--rpc-cors", "all"
]
What mistake am I doing?
After pulling down your docker-compose.yaml it seems like you were just missing one additional CLI flag --prometheus-external.
Updated docker-compose.yaml:
version: '2'
services:
polkadot:
container_name: polkadot
image: parity/polkadot
ports:
- 30333:30333 # p2p port
- 9933:9933 # rpc port
- 9944:9944 # ws port
- 9615:9615
command: [
"--name", "PolkaDocker",
"--ws-external",
"--rpc-external",
"--rpc-cors", "all",
"--prometheus-external" # NEW FLAG HERE
]
Now if you hit localhost:9615/metrics you should see data:
# HELP polkadot_block_height Block height info of the chain
# TYPE polkadot_block_height gauge
polkadot_block_height{status="best"} 0
polkadot_block_height{status="finalized"} 0
# HELP polkadot_block_verification_and_import_time Time taken to verify and import blocks
# TYPE polkadot_block_verification_and_import_time histogram
polkadot_block_verification_and_import_time_bucket{le="0.005"} 1076
...
Based on the CLI polkadot --help the flag is described like so:
$ polkadot --help
polkadot 0.9.8-3a10ee63c-x86_64-linux-gnu
Parity Technologies <admin#parity.io>
Polkadot Relay-chain Client Node
USAGE:
polkadot [FLAGS] [OPTIONS]
polkadot <SUBCOMMAND>
FLAGS:
...
--prometheus-external
Listen to all Prometheus data source interfaces.
Default is local.

How to solve "Couldn't start vault with IPC_LOCK. Disabling IPC_LOCK" and "Cluster address must be set when using raft storage" errors?

I use this following stack file to deploy the vault service in docker swarm mode.
The stack file:
version: '3.8'
services:
faume-vault:
image: vault:1.6.0
environment:
TZ: UTC
VAULT_ADDR: 'http://0.0.0.0:8200'
VAULT_LOCAL_CONFIG: |-
{
"disable_cache": true,
"disable_mlock": true,
"ui": true,
"backend": {
"raft": {
"node_id": "vault",
"path": "/vault"
}
},
"default_lease_ttl": "168h",
"max_lease_ttl": "720h",
"seal": {
"awskms": {
"access_key": "xxxxxxxxxxxxxxxxxxxxxxx"
"secret_key": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
"kms_key_id": "xxxxxxxxxxxxxxxxxxxxxxx"
}
},
"listener": {
"tcp": {
"address": "0.0.0.0:8200",
"cluster_address": "0.0.0.0:8201",
"tls_disable": true
}
},
"cluster_addr": "http://vault.faume.local:8201",
"api_addr": "http://vault.faume.local:8200"
}
command: server
volumes:
- 'faume-vault:/vault'
ports:
- '8200:8200'
networks:
faume:
aliases:
- vault.faume.local
deploy:
mode: replicated
replicas: 1
restart_policy:
condition: on-failure
placement:
constraints:
- node.role==manager
volumes:
faume-vault:
networks:
faume:
When deploy the stack file, the service is created without errors, but service does not run.
Service logs are as following:
faume-vault_faume-vault.1.xztddsvjaa2c#DevOps-Dhanushka | Couldn't start vault with IPC_LOCK. Disabling IPC_LOCK, please use --privileged or --cap-add IPC_LOCK
faume-vault_faume-vault.1.xkh8abjlzyel#DevOps-Dhanushka | Couldn't start vault with IPC_LOCK. Disabling IPC_LOCK, please use --privileged or --cap-add IPC_LOCK
faume-vault_faume-vault.1.xkh8abjlzyel#DevOps-Dhanushka | 2021-07-26T08:20:15.705Z [INFO] proxy environment: http_proxy= https_proxy= no_proxy=
faume-vault_faume-vault.1.xztddsvjaa2c#DevOps-Dhanushka | 2021-07-26T08:20:31.553Z [INFO] proxy environment: http_proxy= https_proxy= no_proxy=
faume-vault_faume-vault.1.xkh8abjlzyel#DevOps-Dhanushka | Cluster address must be set when using raft storage
faume-vault_faume-vault.1.xztddsvjaa2c#DevOps-Dhanushka | Cluster address must be set when using raft storage
faume-vault_faume-vault.1.5g7wzqm7fn0f#DevOps-Dhanushka | Couldn't start vault with IPC_LOCK. Disabling IPC_LOCK, please use --privileged or --cap-add IPC_LOCK
faume-vault_faume-vault.1.5g7wzqm7fn0f#DevOps-Dhanushka | 2021-07-26T08:20:23.070Z [INFO] proxy environment: http_proxy= https_proxy= no_proxy=
faume-vault_faume-vault.1.5g7wzqm7fn0f#DevOps-Dhanushka | Cluster address must be set when using raft storage
faume-vault_faume-vault.1.kf0k9eoou749#DevOps-Dhanushka | Couldn't start vault with IPC_LOCK. Disabling IPC_LOCK, please use --privileged or --cap-add IPC_LOCK
faume-vault_faume-vault.1.kf0k9eoou749#DevOps-Dhanushka | Cluster address must be set when using raft storage
faume-vault_faume-vault.1.kf0k9eoou749#DevOps-Dhanushka | 2021-07-26T08:20:39.894Z [INFO] proxy environment: http_proxy= https_proxy= no_proxy=
It seems error is "mlock". But I have configure "mlock" variable successfully. Can you please, make some suggestions?
You need to run your faume-vault container in privileged mode.
Just add privileged: true option.
This is supported by docker swarm only in recent releases (see privileged mode in docker compose in a swarm) do please ensure you are running a recent enough version.
Please make sure you understand the security concerns involved with running privileged pods.

Docker swarm : can't curl to a service container

I ve a service running under a stack swarm :
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
de74ba4d48c1 myregistry/myApi:1.0 "java -Dfile.encodin…" 3 minutes ago Up 3 minutes 8300/tcp myApiCtn
As you can see , my service is running on the 8300 port.
The probleme is that when i run curl ; it seems to not reply:
[user#server home]$ curl http://localhost:8300/api/elk/batch
curl: (52) Empty reply from server
In another side if i ran my container manually (without stack and without swarm services )
(docker run ...)
-> curl works well
My docker-compose is the following :
---
version: '3.4'
services:
api-batch:
image: myRegistry/myImageApi
networks:
- net_common
- default
stdin_open: true
volumes:
- /opt/application/current/logs:/opt/application/current/logs
- /var/opt/data/flat/flf/:/var/opt/data/flat/flf/
tty: true
ports:
- target: 8300
published: 8300
protocol: tcp
deploy:
mode: global
resources:
limits:
memory: 1024M
placement:
constraints:
- node.labels.type == test
healthcheck:
disable: true
networks:
net_common:
external: true
Where my networks list is the following :
NETWORK ID NAME DRIVER SCOPE
17795bfee9ca bridge bridge local
0faecb070730 docker_gwbridge bridge local
51c34d251495 host host local
j2nnf26asn3k ingress overlay swarm
3all3tmn3qn9 net_common overlay swarm
b7alw2yi5fk9 srcd-current_default overlay swarm
Any suggestion to make it work under swarm service ?

How does service discovery work with modern docker/docker-compose?

I'm using Docker 1.11.1 and docker-compose 1.8.0-rc2.
In the good old days (so, last year), you could set up a docker-compose.yml file like this:
app:
image: myapp
frontend:
image: myfrontend
links:
- app
And then start up the environment like this:
docker scale app=3 frontend=1
And your frontend container could inspect the environment variables
for variables named APP_1_PORT, APP_2_PORT, etc to discover the
available backend hosts and configure itself accordingly.
Times have changed. Now, we do this...
version: '2'
services:
app:
image: myapp
frontend:
image: myfrontend
links:
- app
...and instead of environment variables, we get DNS. So inside the
frontend container, I can ask for app_app_1 or app_app_2 or
app_app_3 and get the corresponding ip address. I can also ask for
app and get the address of app_app_1.
But how do I discover all of the available backend containers? I
guess I could loop over getent hosts ... until it fails:
counter=1
while :; do
getent hosts app_$counter || break
backends="$backends app_$counter"
let counter++
done
But that seems ugly and fragile.
I've heard rumors about round-robin dns, but (a) that doesn't seem to
be happening in my test environment, and (b) that doesn't necessarily
help if your frontend needs simultaneous connections to the backends.
How is simple container and service discovery meant to work in the
modern Docker world?
Docker's built-in Nameserver & Loadbalancer
Docker comes with a built-in nameserver. The server is, by default, reachable via 127.0.0.11:53.
Every container has by default a nameserver entry in /etc/resolv.conf, so it is not required to specify the address of the nameserver from within the container. That is why you can find your service from within the network with service or task_service_n.
If you do task_service_n then you will get the address of the corresponding service replica.
If you only ask for the service docker will perform internal load balancing between container in the same network and external load balancing to handle requests from outside.
When swarm is used, docker will additionally use two special networks.
The ingress network, which is actually an overlay network and handles incomming trafic to the swarm. It allows to query any service from any node in the swarm.
The docker_gwbridge, a bridge network, which connects the overlay networks of the individual hosts to an their physical network. (including ingress)
When using swarm to deploy services, the behavior as described in the examples below will not work unless endpointmode is set to dns roundrobin instead of vip.
endpoint_mode: vip - Docker assigns the service a virtual IP (VIP) that acts as the front end for clients to reach the service on a network. Docker routes requests between the client and available worker nodes for the service, without client knowledge of how many nodes are participating in the service or their IP addresses or ports. (This is the default.)
endpoint_mode: dnsrr - DNS round-robin (DNSRR) service discovery does not use a single virtual IP. Docker sets up DNS entries for the service such that a DNS query for the service name returns a list of IP addresses, and the client connects directly to one of these. DNS round-robin is useful in cases where you want to use your own load balancer, or for Hybrid Windows and Linux applications.
Example
For example deploy three replicas from dig/docker-compose.yml
version: '3.8'
services:
whoami:
image: "traefik/whoami"
deploy:
replicas: 3
DNS Lookup
You can use tools such as dig or nslookup to do a DNS lookup against the nameserver in the same network.
docker run --rm --network dig_default tutum/dnsutils dig whoami
; <<>> DiG 9.9.5-3ubuntu0.2-Ubuntu <<>> whoami
;; global options: +cmd
;; Got answer:
;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 58433
;; flags: qr rd ra; QUERY: 1, ANSWER: 3, AUTHORITY: 0, ADDITIONAL: 0
;; QUESTION SECTION:
;whoami. IN A
;; ANSWER SECTION:
whoami. 600 IN A 172.28.0.3
whoami. 600 IN A 172.28.0.2
whoami. 600 IN A 172.28.0.4
;; Query time: 0 msec
;; SERVER: 127.0.0.11#53(127.0.0.11)
;; WHEN: Mon Nov 16 22:36:37 UTC 2020
;; MSG SIZE rcvd: 90
If you are only interested in the IP, you can provide the +short option
docker run --rm --network dig_default tutum/dnsutils dig +short whoami
172.28.0.3
172.28.0.4
172.28.0.2
Or look for specific service
docker run --rm --network dig_default tutum/dnsutils dig +short dig_whoami_2
172.28.0.4
Load balancing
The default loadbalancing happens on the transport layer or layer 4 of the OSI Model. So it is TCP/UDP based. That means it is not possible to inpsect and manipulate http headers with this method. In the enterprise edition it is apparently possible to use labels similar to the ones treafik is using in the example a bit further down.
docker run --rm --network dig_default curlimages/curl -Ls http://whoami
Hostname: eedc94d45bf4
IP: 127.0.0.1
IP: 172.28.0.3
RemoteAddr: 172.28.0.5:43910
GET / HTTP/1.1
Host: whoami
User-Agent: curl/7.73.0-DEV
Accept: */*
Here is the hostname from 10 times curl:
Hostname: eedc94d45bf4
Hostname: 42312c03a825
Hostname: 42312c03a825
Hostname: 42312c03a825
Hostname: eedc94d45bf4
Hostname: d922d86eccc6
Hostname: d922d86eccc6
Hostname: eedc94d45bf4
Hostname: 42312c03a825
Hostname: d922d86eccc6
Health Checks
Health checks, by default, are done by checking the process id (PID) of the container on the host kernel. If the process is running successfully, the container is considered healthy.
Oftentimes other health checks are required. The container may be running but the application inside has crashed. In many cases a TCP or HTTP check is preferred.
It is possible to bake a custom health checks into images. For example, using curl to perform L7 health checks.
FROM traefik/whoami
HEALTHCHECK CMD curl --fail http://localhost || exit 1
It is also possible to specify the health check via cli when starting the container.
docker run \
--health-cmd "curl --fail http://localhost || exit 1" \
--health-interval=5s \
--timeout=3s \
traefik/whoami
Example with Swarm
As initially mentioned, swarms behavior is different in that it will assign a virtual IP to services by default. Its actually not different its just docker or docker-compose doesn't create real services, it just imitates the behavior of swarm but still runs the container normally, as services can, in fact, only be created by manager nodes.
Keeping in mind we are on a swarm manager and thus the default mode is VIP
Create a overlay network that can be used by regular containers too
$ docker network create --driver overlay --attachable testnet
create some service with 2 replicas
$ docker service create --network testnet --replicas 2 --name digme nginx
Now lets use dig again and making sure we attach the container to the same network
$ docker run --network testnet --rm tutum/dnsutils dig digme
digme. 600 IN A 10.0.18.6
We see that indeed we only got one IP address back, so it appears that this is the virtual IP that has been assigned by docker.
Swarm allows actually to get the single IPs in this case without explicitly setting the endpoint mode.
We can query for tasks.<servicename> in this case that is tasks.digme
$ docker run --network testnet --rm tutum/dnsutils dig tasks.digme
tasks.digme. 600 IN A 10.0.18.7
tasks.digme. 600 IN A 10.0.18.8
This has brought us 2 A records pointing to the individual replicas.
Now lets create another service with endpointmode set to dns roundrobin
docker service create --endpoint-mode dnsrr --network testnet --replicas 2 --name digme2 nginx
$ docker run --network testnet --rm tutum/dnsutils dig digme2
digme2. 600 IN A 10.0.18.21
digme2. 600 IN A 10.0.18.20
This way we get both IPs without adding the prefix tasks.
Service Discovery & Loadbalancing Strategies
If the built in features are not sufficent, some strategies can be implemented to achieve better control. Below are some examples.
HAProxy
Haproxy can use the docker nameserver in combination with dynamic server templates to discover the running container. Then the traditional proxy features can be leveraged to achieve powerful layer 7 load balancing with http header manipulation and chaos engeering such as retries.
version: '3.8'
services:
loadbalancer:
image: haproxy
volumes:
- ./haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg:ro
ports:
- 80:80
- 443:443
whoami:
image: "traefik/whoami"
deploy:
replicas: 3
...
resolvers docker
nameserver dns1 127.0.0.11:53
resolve_retries 3
timeout resolve 1s
timeout retry 1s
hold other 10s
hold refused 10s
hold nx 10s
hold timeout 10s
hold valid 10s
hold obsolete 10s
...
backend whoami
balance leastconn
option httpchk
option redispatch 1
retry-on all-retryable-errors
retries 2
http-request disable-l7-retry if METH_POST
dynamic-cookie-key MY_SERVICES_HASHED_ADDRESS
cookie MY_SERVICES_HASHED_ADDRESS insert dynamic
server-template whoami- 6 whoami:80 check resolvers docker init-addr libc,none
...
Traefik
The previous method is already pretty decent. However, you may have noticed that it requires knowing which services should be discovered and also the number of replicas to discover is hard coded. Traefik, a container native edge router, solves both problems. As long as we enable Traefik via label, the service will be discovered. This decentralized the configuration. It is as if each service registers itself.
The label can also be used to inspect and manipulate http headers.
version: "3.8"
services:
traefik:
image: "traefik:v2.3"
command:
- "--log.level=DEBUG"
- "--api.insecure=true"
- "--providers.docker=true"
- "--providers.docker.exposedbydefault=false"
- "--entrypoints.web.address=:80"
ports:
- "80:80"
- "8080:8080"
volumes:
- "/var/run/docker.sock:/var/run/docker.sock:ro"
whoami:
image: "traefik/whoami"
labels:
- "traefik.enable=true"
- "traefik.port=80"
- "traefik.http.routers.whoami.entrypoints=web"
- "traefik.http.routers.whoami.rule=PathPrefix(`/`)"
- "traefik.http.services.whoami.loadbalancer.sticky=true"
- "traefik.http.services.whoami.loadbalancer.sticky.cookie.name=MY_SERVICE_ADDRESS"
deploy:
replicas: 3
Consul
Consul is a tool for service discovery and configuration management. Services have to be registered via API request. It is a more complex solution that probably only makes sense in bigger clusters, but can be very powerful. Usually it recommended running this on bare metal and not in a container. You could install it alongside the docker host on each server in your cluster.
In this example it has been paired with the registrator image, which takes care of registering the docker services in consuls catalog.
The catalog can be leveraged in many ways. One of them is to use consul-template.
Note that consul comes with its own DNS resolver so in this instance the docker DNS resolver is somewhat neglected.
version: '3.8'
services:
consul:
image: gliderlabs/consul-server:latest
command: "-advertise=${MYHOST} -server -bootstrap"
container_name: consul
hostname: ${MYHOST}
ports:
- 8500:8500
registrator:
image: gliderlabs/registrator:latest
command: "-ip ${MYHOST} consul://${MYHOST}:8500"
container_name: registrator
hostname: ${MYHOST}
depends_on:
- consul
volumes:
- /var/run/docker.sock:/tmp/docker.sock
proxy:
build: .
ports:
- 80:80
depends_on:
- consul
whoami:
image: "traefik/whoami"
deploy:
replicas: 3
ports:
- "80"
Dockerfile for custom proxy image with consul template backed in.
FROM nginx
RUN curl https://releases.hashicorp.com/consul-template/0.25.1/consul-template_0.25.1_linux_amd64.tgz \
> consul-template_0.25.1_linux_amd64.tgz
RUN gunzip -c consul-template_0.25.1_linux_amd64.tgz | tar xvf -
RUN mv consul-template /usr/sbin/consul-template
RUN rm /etc/nginx/conf.d/default.conf
ADD proxy.conf.ctmpl /etc/nginx/conf.d/
ADD consul-template.hcl /
CMD [ "/bin/bash", "-c", "/etc/init.d/nginx start && consul-template -config=consul-template.hcl" ]
Consul template takes a template file and renders it according to the content of consuls catalog.
upstream whoami {
{{ range service "whoami" }}
server {{ .Address }}:{{ .Port }};
{{ end }}
}
server {
listen 80;
location / {
proxy_pass http://whoami;
}
}
After the template has been changed, the restart command is executed.
consul {
address = "consul:8500"
retry {
enabled = true
attempts = 12
backoff = "250ms"
}
}
template {
source = "/etc/nginx/conf.d/proxy.conf.ctmpl"
destination = "/etc/nginx/conf.d/proxy.conf"
perms = 0600
command = "/etc/init.d/nginx reload"
command_timeout = "60s"
}
Feature Table
Built In
HAProxy
Traefik
Consul-Template
Resolver
Docker
Docker
Docker
Consul
Service Discovery
Automatic
Server Templates
Label System
KV Store + Template
Health Checks
Yes
Yes
Yes
Yes
Load Balancing
L4
L4, L7
L4, L7
L4, L7
Sticky Session
No
Yes
Yes
Depends on proxy
Metrics
No
Stats Page
Dashboard
Dashboard
You can view some of the code samples in more detail on github.

Resources