mirror of
https://gitlab.archlinux.org/archlinux/infrastructure.git
synced 2025-01-18 08:06:16 +01:00
The wiki has been hammered with requests from some stupid Chinese bots/crawlers. Adding a simple challenge (requiring a cookie to be set), seems to be enough to throw them off. This was initially added for all pages, but as that could affect Chinese search engines (concern raised on the forum[1]), it was changed to only affect "action views", which search engines are not supposed to crawl. [1] https://bbs.archlinux.org/viewtopic.php?pid=2185963#p2185963
170 lines
5.2 KiB
Django/Jinja
170 lines
5.2 KiB
Django/Jinja
fastcgi_cache_path /var/lib/nginx/cache levels=1:2 keys_zone=wiki:100m inactive=60m;
|
|
fastcgi_cache_key "$scheme$request_method$host$request_uri";
|
|
|
|
# rate limit API endpoint
|
|
limit_req_zone $binary_remote_addr zone=api_zone:10m rate=5r/s;
|
|
|
|
# limit general requests to 10 r/s to block DoS attempts with a burst of 10.
|
|
limit_req_zone $binary_remote_addr zone=archwikilimit:10m rate=10r/s;
|
|
|
|
limit_req_status 429;
|
|
|
|
upstream archwiki {
|
|
server unix://{{ archwiki_socket }};
|
|
}
|
|
|
|
# Challenge the client if the cookie "challenge" is not set to
|
|
# the value of "archwiki_nginx_challenge_value".
|
|
map $cookie_challenge $challenge_required2 {
|
|
default 1;
|
|
{{ archwiki_nginx_challenge_value }} 0;
|
|
}
|
|
|
|
# Challenge the client if it is requesting an "action view" and
|
|
# $challenge_required2 is true.
|
|
map $request_uri $challenge_required {
|
|
default 0;
|
|
~^/index\.php\? $challenge_required2;
|
|
}
|
|
|
|
geoip2 /var/lib/GeoIP/GeoLite2-Country.mmdb {
|
|
auto_reload 60m;
|
|
$geoip2_data_country_iso_code country iso_code;
|
|
}
|
|
|
|
# Challenge the client if it is from China and $challenge_required is
|
|
# true. This is enough to "throw off" some bots/crawlers from China.
|
|
map $geoip2_data_country_iso_code $challenge {
|
|
default 0;
|
|
CN $challenge_required;
|
|
}
|
|
|
|
server {
|
|
listen 80;
|
|
listen [::]:80;
|
|
server_name {{ archwiki_domain }};
|
|
|
|
access_log /var/log/nginx/{{ archwiki_domain }}/access.log reduced;
|
|
access_log /var/log/nginx/{{ archwiki_domain }}/access.log.json json_reduced;
|
|
error_log /var/log/nginx/{{ archwiki_domain }}/error.log;
|
|
|
|
include snippets/letsencrypt.conf;
|
|
|
|
location / {
|
|
access_log off;
|
|
return 301 https://$server_name$request_uri;
|
|
}
|
|
}
|
|
|
|
server {
|
|
listen 443 ssl;
|
|
listen [::]:443 ssl;
|
|
http2 on;
|
|
server_name {{ archwiki_domain }};
|
|
|
|
access_log /var/log/nginx/{{ archwiki_domain }}/access.log reduced;
|
|
access_log /var/log/nginx/{{ archwiki_domain }}/access.log.json json_reduced;
|
|
error_log /var/log/nginx/{{ archwiki_domain }}/error.log;
|
|
|
|
ssl_certificate /etc/letsencrypt/live/{{ archwiki_domain }}/fullchain.pem;
|
|
ssl_certificate_key /etc/letsencrypt/live/{{ archwiki_domain }}/privkey.pem;
|
|
ssl_trusted_certificate /etc/letsencrypt/live/{{ archwiki_domain }}/chain.pem;
|
|
|
|
root {{ archwiki_dir }}/public;
|
|
index index.php;
|
|
|
|
# Block search bot that apparently never heared the term rate limiting
|
|
if ($http_user_agent ~ "Bytespider$" ) {
|
|
return 403;
|
|
}
|
|
|
|
location = /robots.txt {
|
|
alias {{ archwiki_dir }}/robots.txt;
|
|
}
|
|
|
|
location ^~ /. {
|
|
log_not_found off;
|
|
deny all;
|
|
}
|
|
|
|
# Redirect old URLs to the new short-url (/title/<page>)
|
|
location ~ ^/index.php/(.*)$ {
|
|
return 301 /title/$1$is_args$args;
|
|
}
|
|
|
|
# Handling for the article path (pretty URLs)
|
|
location ^~ /title/ {
|
|
rewrite ^ /index.php;
|
|
}
|
|
|
|
# Handling for MediaWiki REST API, see https://www.mediawiki.org/wiki/API:REST_API
|
|
location ^~ /rest.php/ {
|
|
rewrite ^ /rest.php;
|
|
}
|
|
|
|
# special case for '/load.php' type URLs to cache css/js in nginx to relieve php-fpm
|
|
location = /load.php {
|
|
access_log /var/log/nginx/{{ archwiki_domain }}/access.log main;
|
|
access_log /var/log/nginx/{{ archwiki_domain }}/access.log.json json_main;
|
|
fastcgi_pass archwiki;
|
|
fastcgi_index index.php;
|
|
include fastcgi.conf;
|
|
|
|
fastcgi_cache wiki;
|
|
fastcgi_cache_valid 200 10m;
|
|
|
|
add_header X-Cache $upstream_cache_status;
|
|
}
|
|
|
|
# mediawiki API endpoint
|
|
location ~ ^/api\.php {
|
|
limit_req zone=api_zone burst=10 delay=5;
|
|
try_files $uri =404;
|
|
access_log /var/log/nginx/{{ archwiki_domain }}/access.log main;
|
|
access_log /var/log/nginx/{{ archwiki_domain }}/access.log.json json_main;
|
|
fastcgi_pass archwiki;
|
|
fastcgi_index index.php;
|
|
include fastcgi.conf;
|
|
}
|
|
|
|
# normal PHP FastCGI handler
|
|
location ~ ^/[^/]+\.php$ {
|
|
if ($challenge) {
|
|
add_header Set-Cookie "challenge={{ archwiki_nginx_challenge_value }}; SameSite=Strict";
|
|
return 303 $scheme://$server_name/$request_uri;
|
|
}
|
|
|
|
try_files $uri =404;
|
|
access_log /var/log/nginx/{{ archwiki_domain }}/access.log main;
|
|
access_log /var/log/nginx/{{ archwiki_domain }}/access.log.json json_main;
|
|
fastcgi_pass archwiki;
|
|
fastcgi_index index.php;
|
|
include fastcgi.conf;
|
|
|
|
limit_req zone=archwikilimit burst=10 nodelay;
|
|
}
|
|
|
|
# MediaWiki assets
|
|
location ~ ^/(?:images|resources/(?:assets|lib|src)|(?:skins|extensions)/.+\.(?:css|js|gif|jpg|jpeg|png|svg|wasm)$) {
|
|
expires 30d;
|
|
add_header Pragma public;
|
|
add_header Cache-Control "public, must-revalidate, proxy-revalidate";
|
|
}
|
|
|
|
location /images/ {
|
|
# Add the nosniff header to the images folder (required for mw 1.40+)
|
|
add_header X-Content-Type-Options nosniff;
|
|
}
|
|
|
|
location /images/deleted {
|
|
# Deny access to deleted images folder
|
|
deny all;
|
|
}
|
|
|
|
# block all other directories
|
|
location ~ ^/[^/]+/ {
|
|
log_not_found off;
|
|
deny all;
|
|
}
|
|
}
|