1
1
Fork 0
mirror of https://gitlab.archlinux.org/archlinux/infrastructure.git synced 2024-05-27 20:36:06 +02:00

Implement centralized logging

Fix #263
This commit is contained in:
Kristian Klausen 2021-02-26 22:14:05 +01:00 committed by Jelle van der Waa
parent 99d769221d
commit 7235e726d6
No known key found for this signature in database
GPG Key ID: C06086337C50773E
42 changed files with 254 additions and 1 deletions

View File

@ -111,7 +111,7 @@ Medium-fast-ish packet.net Arch Linux box.
## monitoring.archlinux.org
Prometheus and Grafana server which collects performance/metrics from our services and runs alertmanager.
Prometheus, Loki and Grafana server which collects performance/metrics and logs from our services and runs alertmanager.
### Services
- Alertmanager

View File

@ -20,3 +20,4 @@
- { role: keycloak }
- { role: borg_client, tags: ["borg"] }
- { role: fail2ban }
- { role: promtail }

View File

@ -14,3 +14,4 @@
- { role: syncarchive }
- { role: archive_web }
- { role: prometheus_exporters }
- { role: promtail }

View File

@ -38,3 +38,4 @@
- { role: archweb, archweb_planet: true }
- { role: fail2ban }
- { role: prometheus_exporters }
- { role: promtail }

View File

@ -20,3 +20,4 @@
- { role: fail2ban }
- { role: aurweb, aurweb_domain: 'aur-dev.archlinux.org', aurweb_version: 'pu' }
- { role: prometheus_exporters }
- { role: promtail }

View File

@ -9,6 +9,7 @@
- { role: sshd, sshd_enable_includes: true }
- { role: root_ssh }
- { role: prometheus_exporters }
- { role: promtail }
- { role: certbot }
- { role: nginx }
- { role: mariadb, mariadb_query_cache_type: '0', mariadb_innodb_file_per_table: true, mariadb_innodb_buffer_pool_size: '1G' }

View File

@ -18,3 +18,4 @@
- { role: postfix, postfix_relayhost: "mail.archlinux.org" }
- { role: fail2ban }
- { role: prometheus_exporters }
- { role: promtail }

View File

@ -18,3 +18,4 @@
- { role: postfix, postfix_relayhost: "mail.archlinux.org" }
- { role: fail2ban }
- { role: prometheus_exporters }
- { role: promtail }

View File

@ -14,3 +14,4 @@
- { role: archbuild }
- { role: fail2ban }
- { role: prometheus_exporters }
- { role: promtail }

View File

@ -26,3 +26,4 @@
- { role: postfix, postfix_relayhost: "mail.archlinux.org" }
- { role: fail2ban }
- { role: prometheus_exporters }
- { role: promtail }

View File

@ -10,4 +10,5 @@
- { role: root_ssh }
- { role: fail2ban }
- { role: prometheus_exporters }
- { role: promtail }
- { role: gitlab_runner }

View File

@ -16,4 +16,5 @@
gitlab_pages_https_addresses: ['116.203.6.156:443', '[2a01:4f8:c2c:5d2d::2]:443']}
- { role: borg_client, tags: ["borg"] }
- { role: prometheus_exporters }
- { role: promtail }
- { role: fail2ban }

View File

@ -14,4 +14,5 @@
- { role: public_html, public_domain: "pkgbuild.com", tags: ['nginx'] }
- { role: borg_client, tags: ["borg"] }
- { role: prometheus_exporters }
- { role: promtail }
- { role: fail2ban }

View File

@ -29,6 +29,7 @@
- rspamd
- { role: mariadb, mariadb_query_cache_type: '0', mariadb_innodb_file_per_table: true }
- { role: prometheus_exporters }
- { role: promtail }
# luna is hosting mailman lists; this postfix role does not cater to this yet
# TODO: make postfix role handle mailman config?
# - { role: postfix, tags: ["postfix"], postfix_relayhost: "mail.archlinux.org" }

View File

@ -18,3 +18,4 @@
- { role: archusers }
- { role: fail2ban }
- { role: prometheus_exporters }
- { role: promtail }

View File

@ -14,6 +14,7 @@
- { role: nginx }
- { role: fail2ban }
- { role: prometheus_exporters }
- { role: promtail }
- { role: postgres }
- { role: uwsgi }
- { role: archmanweb, archmanweb_version: 'v1.1' }

View File

@ -23,3 +23,4 @@
- { role: matrix }
- { role: fail2ban }
- { role: prometheus_exporters }
- { role: promtail }

View File

@ -19,3 +19,4 @@
postgres_effective_cache_size: 1GB
- { role: hedgedoc, hedgedoc_domain: "md.archlinux.org" }
- { role: prometheus_exporters }
- { role: promtail }

View File

@ -13,4 +13,5 @@
- { role: archweb, archweb_site: false, archweb_services: false, archweb_mirrorcheck: true }
- { role: arch32_mirror, tags: ['nginx'] }
- { role: prometheus_exporters }
- { role: promtail }
- { role: fail2ban }

View File

@ -11,6 +11,8 @@
- { role: borg_client, tags: ["borg"], when: "'borg_clients' in group_names" }
- { role: prometheus }
- { role: prometheus_exporters }
- { role: loki }
- { role: promtail }
- { role: certbot }
- { role: nginx }
- { role: grafana, grafana_domain: 'monitoring.archlinux.org' }

View File

@ -20,3 +20,4 @@
- { role: patchwork }
- { role: fail2ban }
- { role: prometheus_exporters }
- { role: promtail }

View File

@ -12,3 +12,4 @@
- { role: sshd }
- { role: root_ssh }
- { role: prometheus_exporters }
- { role: promtail }

View File

@ -18,3 +18,4 @@
postgres_effective_cache_size: 1GB
- { role: quassel, quassel_domain: "quassel.archlinux.org" }
- { role: prometheus_exporters }
- { role: promtail }

View File

@ -11,4 +11,5 @@
- { role: root_ssh }
- { role: rebuilderd_worker }
- { role: prometheus_exporters }
- { role: promtail }
- { role: fail2ban }

View File

@ -11,5 +11,6 @@
- { role: nginx }
- { role: redirects }
- { role: prometheus_exporters }
- { role: promtail }
- { role: hardening }
- { role: ping }

View File

@ -14,4 +14,5 @@
- { role: nginx }
- { role: rebuilderd }
- { role: prometheus_exporters }
- { role: promtail }
- { role: fail2ban }

View File

@ -20,3 +20,4 @@
security_tracker_dir: "/srv/http/security-tracker"
- { role: fail2ban }
- { role: prometheus_exporters }
- { role: promtail }

View File

@ -20,4 +20,5 @@
postgres_ssl_hosts6: ['::/0']
- { role: terraform_state }
- { role: prometheus_exporters }
- { role: promtail }
- { role: fail2ban }

View File

@ -19,3 +19,4 @@
- { role: archwiki }
- { role: fail2ban }
- { role: prometheus_exporters }
- { role: promtail }

View File

@ -5,4 +5,8 @@ datasources:
type: prometheus
access: proxy
url: http://localhost:9090
- name: Loki
type: loki
access: proxy
url: http://localhost:3100

View File

@ -2,6 +2,11 @@ upstream grafana {
server localhost:3000;
}
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
server {
listen 80;
listen [::]:80;
@ -36,5 +41,7 @@ server {
access_log /var/log/nginx/{{ grafana_domain }}/access.log main;
proxy_pass http://grafana;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
}
}

View File

@ -0,0 +1,2 @@
logging_domain: logging.archlinux.org
loki_nginx_htpasswd: /etc/nginx/auth/loki

View File

@ -0,0 +1,62 @@
# Enables authentication through the X-Scope-OrgID header, which must be present
# if true. If false, the OrgID will always be set to "fake".
auth_enabled: false
server:
http_listen_address: 127.0.0.1
http_listen_port: 3100
grpc_listen_address: 127.0.0.1
grpc_listen_port: 9095
ingester:
wal:
enabled: true
dir: /var/lib/loki/wal
replay_memory_ceiling: 200MB
lifecycler:
address: 127.0.0.1
ring:
kvstore:
store: inmemory
replication_factor: 1
final_sleep: 0s
chunk_idle_period: 1h # Any chunk not receiving new logs in this time will be flushed
max_chunk_age: 1h # All chunks will be flushed when they hit this age, default is 1h
chunk_target_size: 1536000 # Loki will attempt to build chunks up to 1.5MB, flushing first if chunk_idle_period or max_chunk_age is reached first
chunk_encoding: zstd
chunk_retain_period: 30s # Must be greater than index read cache TTL if using an index cache (Default index read cache TTL is 5m)
max_transfer_retries: 0 # Chunk transfers disabled
schema_config:
configs:
- from: 2020-10-24
store: boltdb-shipper
object_store: filesystem
schema: v11
index:
prefix: index_
period: 24h
storage_config:
boltdb_shipper:
active_index_directory: /var/lib/loki/boltdb-shipper-active
cache_location: /var/lib/loki/boltdb-shipper-cache
cache_ttl: 24h # Can be increased for faster performance over longer query periods, uses more disk space
shared_store: filesystem
filesystem:
directory: /var/lib/loki/chunks
compactor:
working_directory: /var/lib/loki/boltdb-shipper-compactor
shared_store: filesystem
limits_config:
reject_old_samples: true
reject_old_samples_max_age: 168h
chunk_store_config:
max_look_back_period: 672h
table_manager:
retention_deletes_enabled: true
retention_period: 672h # 28 days

View File

@ -0,0 +1,3 @@
---
- name: restart loki
service: name=loki state=restarted

36
roles/loki/tasks/main.yml Normal file
View File

@ -0,0 +1,36 @@
---
- name: create ssl cert
include_role:
name: certificate
vars:
domains: ["{{ logging_domain }}"]
- name: install loki and logcli
pacman: name=loki,logcli state=present
- name: install loki configuration
copy: src=loki.yaml dest=/etc/loki/ owner=root group=root mode=0644
notify: restart loki
- name: install python-passlib
pacman: name=python-passlib
- name: create htpasswd for nginx loki endpoint
htpasswd:
path: "{{ loki_nginx_htpasswd }}"
name: "{{ vault_loki_nginx_user }}"
password: "{{ vault_loki_nginx_passwd }}"
owner: root
group: http
mode: 0640
- name: make nginx log dir
file: path=/var/log/nginx/{{ logging_domain }} state=directory owner=root group=root mode=0755
- name: set up nginx
template: src=nginx.d.conf.j2 dest="/etc/nginx/nginx.d/logging.conf" owner=root group=root mode=644
notify: reload nginx
tags: ['nginx']
- name: start and enable loki
systemd: name=loki.service enabled=yes daemon_reload=yes state=started

View File

@ -0,0 +1,45 @@
server {
listen 80;
listen [::]:80;
server_name {{ logging_domain }};
access_log /var/log/nginx/{{ logging_domain }}/access.log main;
error_log /var/log/nginx/{{ logging_domain }}/error.log;
include snippets/letsencrypt.conf;
location / {
access_log off;
return 301 https://$server_name$request_uri;
}
}
server {
listen 443 ssl http2;
listen [::]:443 ssl http2;
server_name {{ logging_domain }};
access_log /var/log/nginx/{{ logging_domain }}/access.log main;
error_log /var/log/nginx/{{ logging_domain }}/error.log;
ssl_certificate /etc/letsencrypt/live/{{ logging_domain }}/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/{{ logging_domain }}/privkey.pem;
ssl_trusted_certificate /etc/letsencrypt/live/{{ logging_domain }}/chain.pem;
location = /loki/api/v1/push {
auth_basic "Loki :)";
auth_basic_user_file {{ loki_nginx_htpasswd }};
proxy_pass http://127.0.0.1:3100$request_uri;
}
# We return a 200 so the monitoring is happy!
location = / {
default_type text/plain;
return 200 'Nothing to see here..\n';
}
location / {
return 404;
}
}

View File

@ -23,6 +23,7 @@ blackbox_targets:
- https://git.archlinux.org
- https://gitlab.archlinux.org
- https://ipxe.archlinux.org
- https://logging.archlinux.org
- https://lists.archlinux.org
- https://mailman.archlinux.org
- https://man.archlinux.org

View File

@ -13,6 +13,12 @@ alerting:
- localhost:9093
scrape_configs:
- job_name: loki
static_configs:
- targets: ['127.0.0.1:3100']
labels:
instance: "{{ ansible_fqdn }}"
- job_name: 'node_exporter'
static_configs:
{% for host in groups['node_exporters'] %}
@ -23,6 +29,16 @@ scrape_configs:
{% endfor %}
- job_name: 'promtail'
static_configs:
{% for host in groups['node_exporters'] %}
- targets: ['{{ host }}:9080']
labels:
instance: "{{ host }}"
{% endfor %}
- job_name: 'gitlab_runner_exporter'
static_configs:
{% for host in groups['gitlab_runners'] %}

View File

@ -0,0 +1 @@
logging_domain: logging.archlinux.org

View File

@ -0,0 +1,3 @@
---
- name: restart promtail
service: name=promtail state=restarted

View File

@ -0,0 +1,16 @@
---
- name: install promtail
pacman: name=promtail state=present
- name: install promtail configuration
template: src=promtail.yaml.j2 dest=/etc/loki/promtail.yaml owner=root group=promtail mode=0640
notify: restart promtail
- name: open promtail ipv4 port for monitoring.archlinux.org
ansible.posix.firewalld: state=enabled permanent=true immediate=yes
rich_rule="rule family=ipv4 source address={{ hostvars['monitoring.archlinux.org']['ipv4_address'] }} port protocol=tcp port=9080 accept"
tags:
- firewall
- name: start and enable promtail
systemd: name=promtail.service enabled=yes daemon_reload=yes state=started

View File

@ -0,0 +1,28 @@
server:
http_listen_address: 127.0.0.1
http_listen_port: 9080
grpc_listen_address: 127.0.0.1
grpc_listen_port: 0 # 0 means random
positions:
filename: /var/lib/promtail/positions.yaml
clients:
- url: https://{{ logging_domain }}/loki/api/v1/push
basic_auth:
username: '{{ vault_loki_nginx_user }}'
password: '{{ vault_loki_nginx_passwd }}'
scrape_configs:
- job_name: journal
journal:
json: true
max_age: 72h
path: /var/log/journal
labels:
job: systemd-journal
relabel_configs:
- source_labels: ['__journal__systemd_unit']
target_label: unit
- source_labels: ["__journal__hostname"]
target_label: instance