dotfiles/.local/share/qutebrowser/userscripts/qute-gemini

341 lines
13 KiB
Python
Executable File

#!/usr/bin/env python3
# qute-gemini - Open Gemini links in qutebrowser and render them as HTML
#
# SPDX-FileCopyrightText: 2019-2020 solderpunk
# SPDX-FileCopyrightText: 2020 Aaron Janse
# SPDX-FileCopyrightText: 2020 petedussin
# SPDX-FileCopyrightText: 2020-2021 Sotiris Papatheodorou
# SPDX-License-Identifier: GPL-3.0-or-later
import cgi
import html
import os
import socket
import ssl
import sys
import tempfile
import urllib.parse
from typing import Tuple
_version = "1.0.0"
_max_redirects = 5
_error_page_template = '''<?xml version="1.0" encoding="UTF-8"?>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<title>Error opening page: URL</title>
<style>
CSS
</style>
</head>
<body>
<h1>qute-gemini error</h1>
<p>Error while opening:<br/><a href="URL">URL_TEXT</a></p>
<p>DESCRIPTION</p>
</body>
</html>
'''
_status_code_desc = {
"1": "Gemini status code 1 Input. This is not implemented in qute-gemini.",
"10": "Gemini status code 10 Input. This is not implemented in qute-gemini.",
"11": "Gemini status code 11 Sensitive Input. This is not implemented in qute-gemini.",
"3": "Gemini status code 3 Redirect. Stopped after " + str(_max_redirects) + " redirects.",
"30": "Gemini status code 30 Temporary Redirect. Stopped after " + str(_max_redirects) + " redirects.",
"31": "Gemini status code 31 Permanent Redirect. Stopped after " + str(_max_redirects) + " redirects.",
"4": "Gemini status code 4 Temporary Failure. Server message: META",
"40": "Gemini status code 40 Temporary Failure. Server message: META",
"41": "Gemini status code 41 Server Unavailable. The server is unavailable due to overload or maintenance. Server message: META",
"42": "Gemini status code 42 CGI Error. A CGI process, or similar system for generating dynamic content, died unexpectedly or timed out. Server message: META",
"43": "Gemini status code 43 Proxy Error. A proxy request failed because the server was unable to successfully complete a transaction with the remote host. Server message: META",
"44": "Gemini status code 44 Slow Down. Rate limiting is in effect. Please wait META seconds before making another request to this server.",
"5": "Gemini status code 5 Permanent Failure. Server message: META",
"50": "Gemini status code 50 Permanent Failure. Server message: META",
"51": "Gemini status code 51 Not Found. he requested resource could not be found but may be available in the future. Server message: META",
"52": "Gemini status code 52 Gone. The resource requested is no longer available and will not be available again. Server message: META",
"53": "Gemini status code 53 Proxy Request Refused. The request was for a resource at a domain not served by the server and the server does not accept proxy requests. Server message: META",
"59": "Gemini status code 59 Bad Request. The server was unable to parse the client's request, presumably due to a malformed request. Server message: META",
"6": "Gemini status code 6 Client Certificate Required. This is not implemented in qute-gemini.",
}
def qute_url() -> str:
"""Get the URL passed to the script by qutebrowser."""
return os.environ["QUTE_URL"]
def qute_fifo() -> str:
"""Get the FIFO or file to write qutebrowser commands to."""
return os.environ["QUTE_FIFO"]
def html_href(url: str, description: str) -> str:
return "".join(['<a href="', url, '">', description, "</a>"])
def qute_gemini_css_path() -> str:
"""Return the path where the custom CSS file is expected to be."""
try:
base_dir = os.environ["XDG_DATA_HOME"]
except KeyError:
base_dir = os.path.join(os.environ["HOME"], ".local/share")
return os.path.join(base_dir, "qutebrowser/userscripts/qute-gemini.css")
def gemini_absolutise_url(base_url: str, relative_url: str) -> str:
"""Absolutise relative gemini URLs.
Adapted from gcat: https://github.com/aaronjanse/gcat
"""
if "://" not in relative_url:
# Python's URL tools somehow only work with known schemes?
base_url = base_url.replace("gemini://", "http://")
relative_url = urllib.parse.urljoin(base_url, relative_url)
relative_url = relative_url.replace("http://", "gemini://")
return relative_url
def gemini_fetch_url(url: str) -> Tuple[str, str, str, str, str]:
"""Fetch a Gemini URL and return the content as a string.
url: URL with gemini:// or no scheme.
Returns 4 strings: the content, the URL the content was fetched from, the
Gemini status code, the value of the meta field and an error message.
Adapted from gcat: https://github.com/aaronjanse/gcat
"""
# Parse the URL to get the hostname and port
parsed_url = urllib.parse.urlparse(url)
if not parsed_url.scheme:
url = "gemini://" + url
parsed_url = urllib.parse.urlparse(url)
if parsed_url.scheme != "gemini":
return "", "Received non-gemini:// URL: " + url
if parsed_url.port is not None:
useport = parsed_url.port
else:
useport = 1965
# Do the Gemini transaction, looping for redirects
redirects = 0
while True:
# Send the request
s = socket.create_connection((parsed_url.hostname, useport))
context = ssl.SSLContext(ssl.PROTOCOL_TLS)
context.check_hostname = False
context.verify_mode = ssl.CERT_NONE
s = context.wrap_socket(s, server_hostname = parsed_url.netloc)
s.sendall((url + "\r\n").encode("UTF-8"))
# Get the status code and meta
fp = s.makefile("rb")
header = fp.readline().decode("UTF-8").strip()
status, meta = header.split()[:2]
# Follow up to 5 redirects
if status.startswith("3"):
url = gemini_absolutise_url(url, meta)
parsed_url = urllib.parse.urlparse(url)
redirects += 1
if redirects > _max_redirects:
# Too many redirects
break
# Otherwise we're done
else:
break
# Process the response
content = ""
error_msg = ""
# 2x Success
if status.startswith("2"):
media_type, media_type_opts = cgi.parse_header(meta)
# Decode according to declared charset defaulting to UTF-8
if meta.startswith("text/gemini"):
charset = media_type_opts.get("charset", "UTF-8")
content = fp.read().decode(charset)
else:
error_msg = "Expected media type text/gemini but received " \
+ media_type
# Handle errors
else:
# Try matching a 2-digit and then a 1-digit status code
try:
error_msg = _status_code_desc[status[0:2]]
except KeyError:
try:
error_msg = _status_code_desc[status[0]]
except KeyError:
error_msg = "The server sent back something weird."
# Substitute the contents of meta into the error message if needed
error_msg = error_msg.replace("META", meta)
return content, url, status, meta, error_msg
def gemtext_to_html(gemtext: str, url: str, original_url: str,
status: str, meta: str) -> str:
"""Convert gemtext to HTML.
title: Used as the document title.
url: The URL the gemtext was received from. Used to resolve
relative URLs in the gemtext content.
original_url: The URL the original request was made at.
status: The Gemini status code returned by the server.
meta: The meta returned by the server.
Returns the HTML representation as a string.
"""
# Accumulate converted gemtext lines
lines = ['<?xml version="1.0" encoding="UTF-8"?>',
'<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">',
"\t<head>",
"\t\t<title>" + html.escape(url) + "</title>",
"\t\t<style>",
get_css(),
"\t\t</style>",
"\t</head>",
"\t<body>",
"\t<article>"]
in_pre = False
in_list = False
# Add an extra newline to ensure list tags are closed properly
for line in (gemtext + "\n").splitlines():
# Add the list closing tag
if not line.startswith("*") and in_list:
lines.append("\t\t</ul>")
in_list = False
# Blank line, ignore
if not line:
pass
# Link
elif line.startswith("=>"):
l = line[2:].split(None, 1)
# Use the URL itself as the description if there is none
if len(l) == 1:
l.append(l[0])
# Encode the link description
l[1] = html.escape(l[1])
# Resolve relative URLs
l[0] = gemini_absolutise_url(url, l[0])
lines.append("\t\t<p>" + html_href(l[0], l[1]) + "</p>")
# Preformated toggle
elif line.startswith("```"):
if in_pre:
lines.append("\t\t</pre>")
else:
lines.append("\t\t<pre>")
in_pre = not in_pre
# Preformated
elif in_pre:
lines.append(line)
# Header
elif line.startswith("###"):
lines.append("\t\t<h3>" + html.escape(line[3:].strip()) + "</h3>")
elif line.startswith("##"):
lines.append("\t\t<h2>" + html.escape(line[2:].strip()) + "</h2>")
elif line.startswith("#"):
lines.append("\t\t<h1>" + html.escape(line[1:].strip()) + "</h1>")
# List
elif line.startswith("*"):
if not in_list:
lines.append("\t\t<ul>")
in_list = True
lines.append("\t\t\t<li>" + html.escape(line[1:].strip()) + "</li>")
# Quote
elif line.startswith(">"):
lines.extend(["\t\t<blockquote>",
"\t\t\t<p>" + line[1:].strip() + "</p>",
"\t\t</blockquote>"])
# Normal text
else:
lines.append("\t\t<p>" + html.escape(line.strip()) + "</p>")
url_html = html_href(url, html.escape(url))
original_url_html = html_href(original_url, html.escape(original_url))
lines.extend(["",
"\t</article>",
"\t<details>",
"\t\t<summary>",
"\t\t\tContent from " + url_html,
"\t\t</summary>",
"\t\t<dl>",
"\t\t\t<dt>Original URL</dt>",
"\t\t\t<dd>" + original_url_html + "</dd>",
"\t\t\t<dt>Status</dt>",
"\t\t\t<dd>" + status + "</dd>",
"\t\t\t<dt>Meta</dt>",
"\t\t\t<dd>" + meta + "</dd>",
"\t\t\t<dt>Fetched by</dt>",
'\t\t\t<dd><a href="https://git.sr.ht/~sotirisp/qute-gemini">qute-gemini ' + str(_version) + "</a></dd>",
"\t\t</dl>",
"\t</details>",
"\t</body>",
"</html>"])
return "\n".join(lines)
def get_css() -> str:
# Search for qute-gemini.css in the directory this script is located in
css_file = qute_gemini_css_path()
if os.path.isfile(css_file):
# Return the file contents
with open(css_file, "r") as f:
return f.read().strip()
else:
# Use no CSS
return ""
def qute_error_page(url: str, description: str) -> str:
"""Return a data URI error page like qutebrowser does.
url: The URL of the page that failed to load.
description: A description of the error.
Returns a data URI containing the error page.
"""
# Generate the HTML error page
html_page = _error_page_template.replace("URL", url)
html_page = html_page.replace("URL_TEXT", html.escape(url))
html_page = html_page.replace("DESCRIPTION", html.escape(description))
html_page = html_page.replace("CSS", get_css())
# URL encode and return as a data URI
return "data:text/html;charset=UTF-8," + urllib.parse.quote(html_page)
def open_gemini(url: str, open_args: str) -> None:
"""Open Gemini URL in qutebrowser."""
# Get the Gemini content
content, content_url, status, meta, error_msg = gemini_fetch_url(url)
if error_msg:
# Generate an error page in a data URI
open_url = qute_error_page(url, error_msg)
else:
# Success, convert to HTML in a temporary file
tmpf = tempfile.NamedTemporaryFile("w", suffix=".html", delete=False)
tmp_filename = tmpf.name
tmpf.close()
with open(tmp_filename, "w") as f:
f.write(gemtext_to_html(content, content_url, url, status, meta))
open_url = " file://" + tmp_filename
# Open the HTML file in qutebrowser
with open(qute_fifo(), "w") as qfifo:
qfifo.write("open " + open_args + open_url)
def open_other(url: str, open_args: str) -> None:
"""Open non-Gemini URL in qutebrowser."""
with open(qute_fifo(), "w") as qfifo:
qfifo.write("open " + open_args + " " + url)
if __name__ == "__main__":
# Open in the current or a new tab depending on the script name
if sys.argv[0].endswith("-tab"):
open_args = "-t"
else:
open_args = ""
# Select how to open the URL depending on its scheme
url = qute_url()
parsed_url = urllib.parse.urlparse(url)
if parsed_url.scheme == "gemini":
open_gemini(url, open_args)
else:
open_other(url, open_args)