data loss while sending from fluentd to aws kinesis firehose - fluentd

We are using fluentd to send logs to aws kinesis firehose. We can see few records not sent to aws kinesis firehose every now and then.
Here is our settings in fluentd.
<system>
log_level info
</system>
<source>
#type tail
path "/var/log/app/tracy.log*"
pos_file "/var/tmp/tracy.log.pos"
pos_file_compaction_interval 72h
#log_level "error"
tag "tracylog"
<parse>
#type "json"
time_key False
</parse>
</source>
<source>
#type monitor_agent
bind 127.0.0.1
port 24220
</source>
<match tracylog>
#type "kinesis_firehose"
region "${awsRegion}"
delivery_stream_name "${delivery_stream_name}"
<instance_profile_credentials>
</instance_profile_credentials>
<buffer>
# Frequency of ingestion
flush_interval 30s
flush_thread_count 4
chunk_limit_size 1m
</buffer>
</match>

A few changes in the config fixed my issue:
<system>
log_level info
</system>
<source>
#type tail
path "/var/log/app/tracy.log*"
pos_file "/var/tmp/tracy.log.pos"
pos_file_compaction_interval 72h
read_from_head true
follow_inodes true
#log_level "error"
tag "tracylog"
<parse>
#type "json"
time_key False
</parse>
</source>
<source>
#type monitor_agent
bind 127.0.0.1
port 24220
</source>
<match tracylog>
#type "kinesis_firehose"
region "${awsRegion}"
delivery_stream_name "${delivery_stream_name}"
<instance_profile_credentials>
</instance_profile_credentials>
<buffer>
flush_interval 2
flush_thread_interval 0.1
flush_thread_burst_interval 0.01
flush_thread_count 8
</buffer>

Related

Fluentd: Copy logs locally as well push it to loki

I am using fluentd for log aggregation. My use case is I want to collect logs and store them in a single file as a backup as well as push it to loki to view in grafana.
Following is the config file:
<source>
#type forward
port 10091
bind 0.0.0.0
format none
</source>
<filter *.**>
#type record_transformer
enable_ruby true
remove_keys source,container_id, container_name , ts, tsNs
<record>
service ${tag_parts[1]}
# message ${record["msg"] ? record["msg"] : record["message"] ? record["message"] : record["MESSAGE"]}
</record>
</filter>
<match SERVER1.*>
#type copy
<store>
#type loki
url "http://LOKI_IP:PORT"
flush_interval 1s
flush_at_shutdown true
buffer_chunk_limit 1m
extra_labels {"agent":"SERVER1"}
<label>
filename
</label>
</store>
<store>
#type file
path /fluentd/Log/SERVER1.%Y-%m-%d.%H:%M:%S.log
<buffer time>
timekey 1h
timekey_use_utc true
timekey_wait 2s
flush_interval 1h
</buffer>
</store>
</match>
<match SERVER2.*>
#type copy
<store>
#type loki
url "http://LOKI_IP:PORT"
flush_interval 1s
flush_at_shutdown true
buffer_chunk_limit 1m
extra_labels {"agent":"SERVER2"}
<label>
filename
</label>
</store>
<store>
#type file
path /fluentd/Log/SERVER2.%Y-%m-%d.%H:%M:%S.log
<buffer time>
timekey 1h
timekey_use_utc true
timekey_wait 2s
flush_interval 1h
</buffer>
</store>
</match>
Here SERVER1 and SERVER2 are sending logs to fluentd. I tried collecting logs from both servers in single file by adding
<source>
#type forward
port 10091
bind 0.0.0.0
format none
</source>
<filter *.**>
#type record_transformer
enable_ruby true
remove_keys source,container_id, container_name , ts, tsNs
<record>
service ${tag_parts[1]}
# message ${record["msg"] ? record["msg"] : record["message"] ? record["message"] : record["MESSAGE"]}
</record>
</filter>
<match *.**>
#type copy
<store>
#type file
path /fluentd/Log/access.%Y-%m-%d.%H:%M:%S.log
<buffer time>
timekey 10s
timekey_use_utc true
timekey_wait 2s
flush_interval 10s
</buffer>
</store>
</match>
<match SERVER1.*>
#type copy
<store>
#type loki
url "http://LOKI_IP:PORT"
flush_interval 1s
flush_at_shutdown true
buffer_chunk_limit 1m
extra_labels {"agent":"SERVER1"}
<label>
filename
</label>
</store>
</match>
<match SERVER2.*>
#type copy
<store>
#type loki
url "http://LOKI_IP:PORT"
flush_interval 1s
flush_at_shutdown true
buffer_chunk_limit 1m
extra_labels {"agent":"SERVER2"}
<label>
filename
</label>
</store>
</match>
But with this, I am not able to see the latest logs in loki/grafana. Somehow it is only saving it locally and not sending it to loki.
So it's like SAVE IT LOCALLY or PUSH IT TO LOKI. I want both of these functionalities to have a single JSON file to have logs from both servers instead of different files from different servers. Can anyone help me out if it's possible at all?

create index for elasticsearch as namespaces names

im useing elasticsearch opendistro whith fluentd and i want to collect my kubernetes cluster logs , i want collect logs per namespace in index's . im lookin this answer but still having problem.also i added Fluentd-${record['kubernetes']['namespace_name']} but it couldn't defined my namespaces.
im using this conf for source
## logs from podman
<source>
#type tail
#id in_tail_container_logs
#label #KUBERNETES
path /var/log/containers/*.log
pos_file /var/log/fluentd-containers.log.pos
tag kubernetes.*
read_from_head true
<parse>
#type multi_format
<pattern>
format json
time_key time
time_type string
time_format "%Y-%m-%dT%H:%M:%S.%NZ"
keep_time_key false
</pattern>
<pattern>
format regexp
expression /^(?<time>.+) (?<stream>stdout|stderr)( (.))? (?<log>.*)$/
time_format '%Y-%m-%dT%H:%M:%S.%NZ'
keep_time_key false
</pattern>
</parse>
emit_unmatched_lines true
</source>
and about filters.conf
<label #KUBERNETES>
<match kubernetes.var.log.containers.fluentd**>
#type relabel
#label #FLUENT_LOG
</match>
<filter kubernetes.**>
#type kubernetes_metadata
#id filter_kube_metadata
</filter>
<filter kubernetes.**>
#id filter_parser
#type parser
key_name log
reserve_data true
remove_key_name_field true
<parse>
#type multi_format
<pattern>
format json
</pattern>
<pattern>
format none
</pattern>
</parse>
</filter>
<match **>
#type relabel
#label #OUTPUT
</match>
</label>
and finally in output
04_outputs.conf: |-
<label #OUTPUT>
<match **>
#type elasticsearch
host myhost
port 9200
user myuser
password mypass
scheme https
ssl_verify false
logstash_prefix Fluentd-${record['kubernetes']['namespace_name']}
logstash_format true
<buffer tag, $.kubernetes.namespace_name>
flush_thread_count 8
flush_interval 5s
chunk_limit_size 2M
queue_limit_length 32
retry_max_interval 30
retry_forever true
</buffer>
</match>
</label>
but in index still i haven't anything
I was recently working on a fluent-bit -> fluentd -> opensearch setup so just putting my solution here.
In my case, I was also getting the literal ${record['kubernetes']['namespace_name']} as my index instead of the actual namespace (tried different variations like accessor pattern, with or without quotes, double/single etc but didn't work). If you do not need the tag, you can use it to pass the index name by rewriting it:
<match kube.**>
#type rewrite_tag_filter
<rule>
key $['kubernetes']['namespace_name']
pattern ^(.+)$
tag $1
</rule>
</match>
And on your output,
logstash_prefix fluentd-${tag}
logstash_format true
Hope it helps even though this can be considered a hack.

fluentd localtime is working for stdout, but not elasticsearch

I'm tailing a syslog file which doesn't have the timezone. By default fluentd (incorrectly) assumes the timezone is UTC, so it shifts the time off by several hours.
I can fix this for stdout, using 'localtime true', but I can't find a setting to do the same thing for elasticsearch:
<source>
#type tail
# read_from_head true
<parse>
#type syslog
</parse>
path /tmp/syslog
pos_file /tmp/var_log_syslog.pos
tag syslog.file
</source>
<match syslog.**>
#type copy
<store>
#type elasticsearch
host elasticsearch
port 9200
logstash_format true
logstash_prefix fluentd
logstash_dateformat %Y%m%d
include_tag_key true
type_name access_log
tag_key #log_name
flush_interval 1s
utc_index false
</store>
<store>
#type stdout
localtime true
</store>
</match>
It looks like the desired behavior is the default behavior. Fluentd seems to use the localtime zone, but I was running it in a docker container and I forgot to set the container's timezone.

Using a single source in fluentd with different match types

So I am trying to capture the output from docker containers running on a host but after a change by the developers to use json as a logging output for the containers I am missing out on the containers start up message that are happening in the entrypoint.sh. I can see that someone has added a new filter section in the config file which works really nicely to capture json output but only json output.
Here is the template in use:
<source>
#type forward
port 24224
bind 0.0.0.0
tag GELF_TAG
</source>
<filter GELF_TAG.**>
#type parser
key_name log
reserve_data false
<parse>
#type json
</parse>
</filter>
<match GELF_TAG.**>
#type copy
<store>
#type gelf
host {{ graylog_server_fqdn }}
port 12201
protocol tcp
flush_interval 5s
</store>
<store>
#type stdout
</store>
</match>
How do I set up the config to be able to capture the entrypoint.sh output and the json output from the containers after they start?
EDIT.
The filter is rejecting messages sent to the docker containers stdout up until the application starts logging in json.
[warn]: #0 dump an error event: error_class=Fluent::Plugin::Parser::ParserError error="pattern not matched with data
So I tried to capture everything that was being drooped into the ERROR tag and I can see the missing messages but they still fail to parse using this config:
# Ansible
<source>
#type forward
port 24224
bind 0.0.0.0
tag GELF_TAG
</source>
<filter GELF_TAG.**>
#type parser
emit_invalid_record_to_error true
key_name log
reserve_data false
<parse>
#type json
</parse>
</filter>
<match {GELF_TAG.**,#ERROR}>
#type copy
<store>
#type gelf
host {{ graylog_server_fqdn }}
port 12201
protocol tcp
flush_interval 5s
</store>
<store>
#type stdout
</store>
</match>
Install the multi-format parser:
td-agent-gem install fluent-plugin-multi-format-parser -v 1.0.0
# Ansible
<source>
#type forward
port 24224
bind 0.0.0.0
tag GELF_TAG
</source>
<filter GELF_TAG.**>
#type parser
key_name log
reserve_data false
<parse>
#type multi_format
<pattern>
format json
time_key timestamp
</pattern>
<pattern>
format none
</pattern>
</parse>
</filter>
<match GELF_TAG.**>
#type copy
<store>
#type gelf
host {{ graylog_server_fqdn }}
port 12201
protocol tcp
flush_interval 5s
</store>
<store>
#type stdout
</store>
</match>
You can also use the 'rewrite_tag_filter' which is an output plugin. Using that you can change the tag for the different patterns, and then use the parsers/filters.

Fluentd logging driver sends unstructured log message

My environment has a setup where docker container logs are forwarded to fluentd, then fluentd forwards to splunk.
I have a issue with fluentd, some of the docker container logs are not in structured format. From the documentation i see that:
fluentd log driver sends the following metadata in the structured log message:
container_id,
container_name,
source,
log
My issue is few of the logs have unstructured metadata information:
for example:
Log 1:
{"log":"2019/03/12 13:59:49 [info] 6#6: *2425596 client closed connection while waiting for request, client: 10.17.84.12, server: 0.0.0.0:80","container_id":"789459f8f8a52c8b4b","container_name":"testingcontainer-1ed-fwij4-EcsTaskDefinition-1TF1DH,"source":"stderr"}
Log 2:
{"container_id":"26749a26500dd04e92fc","container_name":"/4C4DTHQR2V6C-EcsTaskDefinition-1908NOZPKPKY0-1","source":"stdout","log":"\u001B[0mGET \u001B[32m200 \u001B[0m0.634 ms - -\u001B[0m"}
These two logs have different order of metadata information(log1-[log, conatiner-name, container_id, source])(log2- [container_id, conatiner-name, source, log]). Because of this i'm getting some issues in splunk. How can i resolve this to get same order of metadata info?
my fluend config file is
<source>
#type forward
#id input1
#label #mainstream
#log_level trace
port 24224
</source>
<label #mainstream>
<match *.**>
type copy
<store>
#type file
#id output_docker1
path /fluentd/log/docker.*.log
symlink_path /fluentd/log/docker.log
append true
time_slice_format %Y%m%d
time_slice_wait 1m
time_format %Y%m%dT%H%M%S%z
utc
buffer_chunk_limit 512m
</store>
<store>
#type s3
#id output_docker2
#log_level trace
s3_bucket bucketwert-1
s3_region us-east-1
path logs/
buffer_path /fluentd/log/docker.log
s3_object_key_format %{path}%{time_slice}_sbx_docker_%{index}.%{file_extension}
flush_interval 3600s
time_slice_format %Y%m%d
time_format %Y%m%dT%H%M%S%z
utc
buffer_chunk_limit 512m
</store>
</match>
</label>
How about fluent-plugin-record-sort?
Or you can use built-in record_trandformer plugin like the following if you know all keys in a record:
<source>
#type dummy
tag dummy
dummy [
{"log": "log1", "container_id": "123", "container_name": "name1", "source": "stderr"},
{"container_id": "456", "container_name": "name2", "source": "stderr", "log": "log2"}
]
</source>
<filter dummy>
#type record_transformer
renew_record true
keep_keys log,container_id,container_name,source
</filter>
<match dummy>
#type stdout
</match>
UPDATE(not tested):
<source>
#type forward
#id input1
#label #mainstream
#log_level trace
port 24224
</source>
<label #mainstream>
<filter>
#type record_transformer
renew_record true
keep_keys log,container_id,container_name,source
</filter>
<match *.**>
#type copy
<store>
#type file
#id output_docker1
path /fluentd/log/docker.*.log
symlink_path /fluentd/log/docker.log
append true
time_slice_format %Y%m%d
time_slice_wait 1m
time_format %Y%m%dT%H%M%S%z
utc
buffer_chunk_limit 512m
</store>
<store>
#type s3
#id output_docker2
#log_level trace
s3_bucket bucketwert-1
s3_region us-east-1
path logs/
buffer_path /fluentd/log/docker.log
s3_object_key_format %{path}%{time_slice}_sbx_docker_%{index}.%{file_extension}
flush_interval 3600s
time_slice_format %Y%m%d
time_format %Y%m%dT%H%M%S%z
utc
buffer_chunk_limit 512m
</store>
</match>
</label>

Resources