mirror of
https://gitlab.archlinux.org/archlinux/infrastructure.git
synced 2025-01-18 08:06:16 +01:00
We have used MediaWiki's file cache[2] until now, but recently the wiki has been hammered with requests from some stupid Chinese bots/crawlers. Caching at the web server level is faster as we avoid the PHP overhead and it seems to make a difference (performance wise), especially when the bots/crawlers are hitting us. This is usual done with Varnish[3], but I went with a simple Python service (30 LOC) for handling the PURGE requests as that is much simpler thn adding Varnish to our stack. [1] https://www.mediawiki.org/w/index.php?title=Manual:Performance_tuning&oldid=6670283#Page_view_caching [2] https://www.mediawiki.org/wiki/Manual:File_cache [3] https://www.mediawiki.org/wiki/Manual:Varnish_caching Fix #315
31 lines
1.0 KiB
Python
31 lines
1.0 KiB
Python
#!/usr/bin/env python
|
|
import hashlib
|
|
import http.server
|
|
import pathlib
|
|
import socketserver
|
|
import urllib.parse
|
|
|
|
socketserver.ThreadingTCPServer.allow_reuse_address = True
|
|
|
|
|
|
class Handler(http.server.BaseHTTPRequestHandler):
|
|
def do_PURGE(self):
|
|
self.send_response(http.HTTPStatus.OK)
|
|
self.end_headers()
|
|
o = urllib.parse.urlparse(self.path)
|
|
for method in ["GET", "HEAD"]:
|
|
# Please keep in sync with "fastcgi_cache_key" in nginx.d.conf.j2
|
|
if o.query:
|
|
cache_key = f"https{method}{o.netloc}{o.path}?{o.query}"
|
|
else:
|
|
cache_key = f"https{method}{o.netloc}{o.path}"
|
|
hash = hashlib.md5(cache_key.encode("utf-8")).hexdigest()
|
|
# Please keep in sync with "fastcgi_cache_path" in nginx.d.conf.j2
|
|
pathlib.Path(
|
|
f"/var/lib/nginx/cache/{hash[-1]}/{hash[-3:-1]}/{hash}"
|
|
).unlink(missing_ok=True)
|
|
|
|
|
|
httpd = http.server.ThreadingHTTPServer(("127.0.0.1", 1080), Handler)
|
|
httpd.serve_forever()
|