1
1
mirror of https://gitlab.archlinux.org/archlinux/infrastructure.git synced 2025-01-18 08:06:16 +01:00
infrastructure/roles/archwiki/templates/nginx.d.conf.j2
Kristian Klausen 9aefd87258
archwiki: Add simple challenge for Chinese IP addresses
The wiki has been hammered with requests from some stupid Chinese
bots/crawlers. Adding a simple challenge (requiring a cookie to be set),
seems to be enough to throw them off.

This was initially added for all pages, but as that could affect Chinese
search engines (concern raised on the forum[1]), it was changed to only
affect "action views", which search engines are not supposed to crawl.

[1] https://bbs.archlinux.org/viewtopic.php?pid=2185963#p2185963
2024-07-31 21:28:49 +02:00

170 lines
5.2 KiB
Django/Jinja

fastcgi_cache_path /var/lib/nginx/cache levels=1:2 keys_zone=wiki:100m inactive=60m;
fastcgi_cache_key "$scheme$request_method$host$request_uri";
# rate limit API endpoint
limit_req_zone $binary_remote_addr zone=api_zone:10m rate=5r/s;
# limit general requests to 10 r/s to block DoS attempts with a burst of 10.
limit_req_zone $binary_remote_addr zone=archwikilimit:10m rate=10r/s;
limit_req_status 429;
upstream archwiki {
server unix://{{ archwiki_socket }};
}
# Challenge the client if the cookie "challenge" is not set to
# the value of "archwiki_nginx_challenge_value".
map $cookie_challenge $challenge_required2 {
default 1;
{{ archwiki_nginx_challenge_value }} 0;
}
# Challenge the client if it is requesting an "action view" and
# $challenge_required2 is true.
map $request_uri $challenge_required {
default 0;
~^/index\.php\? $challenge_required2;
}
geoip2 /var/lib/GeoIP/GeoLite2-Country.mmdb {
auto_reload 60m;
$geoip2_data_country_iso_code country iso_code;
}
# Challenge the client if it is from China and $challenge_required is
# true. This is enough to "throw off" some bots/crawlers from China.
map $geoip2_data_country_iso_code $challenge {
default 0;
CN $challenge_required;
}
server {
listen 80;
listen [::]:80;
server_name {{ archwiki_domain }};
access_log /var/log/nginx/{{ archwiki_domain }}/access.log reduced;
access_log /var/log/nginx/{{ archwiki_domain }}/access.log.json json_reduced;
error_log /var/log/nginx/{{ archwiki_domain }}/error.log;
include snippets/letsencrypt.conf;
location / {
access_log off;
return 301 https://$server_name$request_uri;
}
}
server {
listen 443 ssl;
listen [::]:443 ssl;
http2 on;
server_name {{ archwiki_domain }};
access_log /var/log/nginx/{{ archwiki_domain }}/access.log reduced;
access_log /var/log/nginx/{{ archwiki_domain }}/access.log.json json_reduced;
error_log /var/log/nginx/{{ archwiki_domain }}/error.log;
ssl_certificate /etc/letsencrypt/live/{{ archwiki_domain }}/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/{{ archwiki_domain }}/privkey.pem;
ssl_trusted_certificate /etc/letsencrypt/live/{{ archwiki_domain }}/chain.pem;
root {{ archwiki_dir }}/public;
index index.php;
# Block search bot that apparently never heared the term rate limiting
if ($http_user_agent ~ "Bytespider$" ) {
return 403;
}
location = /robots.txt {
alias {{ archwiki_dir }}/robots.txt;
}
location ^~ /. {
log_not_found off;
deny all;
}
# Redirect old URLs to the new short-url (/title/<page>)
location ~ ^/index.php/(.*)$ {
return 301 /title/$1$is_args$args;
}
# Handling for the article path (pretty URLs)
location ^~ /title/ {
rewrite ^ /index.php;
}
# Handling for MediaWiki REST API, see https://www.mediawiki.org/wiki/API:REST_API
location ^~ /rest.php/ {
rewrite ^ /rest.php;
}
# special case for '/load.php' type URLs to cache css/js in nginx to relieve php-fpm
location = /load.php {
access_log /var/log/nginx/{{ archwiki_domain }}/access.log main;
access_log /var/log/nginx/{{ archwiki_domain }}/access.log.json json_main;
fastcgi_pass archwiki;
fastcgi_index index.php;
include fastcgi.conf;
fastcgi_cache wiki;
fastcgi_cache_valid 200 10m;
add_header X-Cache $upstream_cache_status;
}
# mediawiki API endpoint
location ~ ^/api\.php {
limit_req zone=api_zone burst=10 delay=5;
try_files $uri =404;
access_log /var/log/nginx/{{ archwiki_domain }}/access.log main;
access_log /var/log/nginx/{{ archwiki_domain }}/access.log.json json_main;
fastcgi_pass archwiki;
fastcgi_index index.php;
include fastcgi.conf;
}
# normal PHP FastCGI handler
location ~ ^/[^/]+\.php$ {
if ($challenge) {
add_header Set-Cookie "challenge={{ archwiki_nginx_challenge_value }}; SameSite=Strict";
return 303 $scheme://$server_name/$request_uri;
}
try_files $uri =404;
access_log /var/log/nginx/{{ archwiki_domain }}/access.log main;
access_log /var/log/nginx/{{ archwiki_domain }}/access.log.json json_main;
fastcgi_pass archwiki;
fastcgi_index index.php;
include fastcgi.conf;
limit_req zone=archwikilimit burst=10 nodelay;
}
# MediaWiki assets
location ~ ^/(?:images|resources/(?:assets|lib|src)|(?:skins|extensions)/.+\.(?:css|js|gif|jpg|jpeg|png|svg|wasm)$) {
expires 30d;
add_header Pragma public;
add_header Cache-Control "public, must-revalidate, proxy-revalidate";
}
location /images/ {
# Add the nosniff header to the images folder (required for mw 1.40+)
add_header X-Content-Type-Options nosniff;
}
location /images/deleted {
# Deny access to deleted images folder
deny all;
}
# block all other directories
location ~ ^/[^/]+/ {
log_not_found off;
deny all;
}
}