341 lines
13 KiB
Python
Executable File
341 lines
13 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# qute-gemini - Open Gemini links in qutebrowser and render them as HTML
|
|
#
|
|
# SPDX-FileCopyrightText: 2019-2020 solderpunk
|
|
# SPDX-FileCopyrightText: 2020 Aaron Janse
|
|
# SPDX-FileCopyrightText: 2020 petedussin
|
|
# SPDX-FileCopyrightText: 2020-2021 Sotiris Papatheodorou
|
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
import cgi
|
|
import html
|
|
import os
|
|
import socket
|
|
import ssl
|
|
import sys
|
|
import tempfile
|
|
import urllib.parse
|
|
|
|
from typing import Tuple
|
|
|
|
|
|
_version = "1.0.0"
|
|
|
|
_max_redirects = 5
|
|
|
|
_error_page_template = '''<?xml version="1.0" encoding="UTF-8"?>
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
|
<head>
|
|
<title>Error opening page: URL</title>
|
|
<style>
|
|
CSS
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<h1>qute-gemini error</h1>
|
|
<p>Error while opening:<br/><a href="URL">URL_TEXT</a></p>
|
|
<p>DESCRIPTION</p>
|
|
</body>
|
|
</html>
|
|
'''
|
|
|
|
_status_code_desc = {
|
|
"1": "Gemini status code 1 Input. This is not implemented in qute-gemini.",
|
|
"10": "Gemini status code 10 Input. This is not implemented in qute-gemini.",
|
|
"11": "Gemini status code 11 Sensitive Input. This is not implemented in qute-gemini.",
|
|
"3": "Gemini status code 3 Redirect. Stopped after " + str(_max_redirects) + " redirects.",
|
|
"30": "Gemini status code 30 Temporary Redirect. Stopped after " + str(_max_redirects) + " redirects.",
|
|
"31": "Gemini status code 31 Permanent Redirect. Stopped after " + str(_max_redirects) + " redirects.",
|
|
"4": "Gemini status code 4 Temporary Failure. Server message: META",
|
|
"40": "Gemini status code 40 Temporary Failure. Server message: META",
|
|
"41": "Gemini status code 41 Server Unavailable. The server is unavailable due to overload or maintenance. Server message: META",
|
|
"42": "Gemini status code 42 CGI Error. A CGI process, or similar system for generating dynamic content, died unexpectedly or timed out. Server message: META",
|
|
"43": "Gemini status code 43 Proxy Error. A proxy request failed because the server was unable to successfully complete a transaction with the remote host. Server message: META",
|
|
"44": "Gemini status code 44 Slow Down. Rate limiting is in effect. Please wait META seconds before making another request to this server.",
|
|
"5": "Gemini status code 5 Permanent Failure. Server message: META",
|
|
"50": "Gemini status code 50 Permanent Failure. Server message: META",
|
|
"51": "Gemini status code 51 Not Found. he requested resource could not be found but may be available in the future. Server message: META",
|
|
"52": "Gemini status code 52 Gone. The resource requested is no longer available and will not be available again. Server message: META",
|
|
"53": "Gemini status code 53 Proxy Request Refused. The request was for a resource at a domain not served by the server and the server does not accept proxy requests. Server message: META",
|
|
"59": "Gemini status code 59 Bad Request. The server was unable to parse the client's request, presumably due to a malformed request. Server message: META",
|
|
"6": "Gemini status code 6 Client Certificate Required. This is not implemented in qute-gemini.",
|
|
}
|
|
|
|
|
|
def qute_url() -> str:
|
|
"""Get the URL passed to the script by qutebrowser."""
|
|
return os.environ["QUTE_URL"]
|
|
|
|
|
|
def qute_fifo() -> str:
|
|
"""Get the FIFO or file to write qutebrowser commands to."""
|
|
return os.environ["QUTE_FIFO"]
|
|
|
|
|
|
def html_href(url: str, description: str) -> str:
|
|
return "".join(['<a href="', url, '">', description, "</a>"])
|
|
|
|
|
|
def qute_gemini_css_path() -> str:
|
|
"""Return the path where the custom CSS file is expected to be."""
|
|
try:
|
|
base_dir = os.environ["XDG_DATA_HOME"]
|
|
except KeyError:
|
|
base_dir = os.path.join(os.environ["HOME"], ".local/share")
|
|
return os.path.join(base_dir, "qutebrowser/userscripts/qute-gemini.css")
|
|
|
|
|
|
def gemini_absolutise_url(base_url: str, relative_url: str) -> str:
|
|
"""Absolutise relative gemini URLs.
|
|
|
|
Adapted from gcat: https://github.com/aaronjanse/gcat
|
|
"""
|
|
if "://" not in relative_url:
|
|
# Python's URL tools somehow only work with known schemes?
|
|
base_url = base_url.replace("gemini://", "http://")
|
|
relative_url = urllib.parse.urljoin(base_url, relative_url)
|
|
relative_url = relative_url.replace("http://", "gemini://")
|
|
return relative_url
|
|
|
|
|
|
def gemini_fetch_url(url: str) -> Tuple[str, str, str, str, str]:
|
|
"""Fetch a Gemini URL and return the content as a string.
|
|
|
|
url: URL with gemini:// or no scheme.
|
|
Returns 4 strings: the content, the URL the content was fetched from, the
|
|
Gemini status code, the value of the meta field and an error message.
|
|
|
|
Adapted from gcat: https://github.com/aaronjanse/gcat
|
|
"""
|
|
# Parse the URL to get the hostname and port
|
|
parsed_url = urllib.parse.urlparse(url)
|
|
if not parsed_url.scheme:
|
|
url = "gemini://" + url
|
|
parsed_url = urllib.parse.urlparse(url)
|
|
if parsed_url.scheme != "gemini":
|
|
return "", "Received non-gemini:// URL: " + url
|
|
if parsed_url.port is not None:
|
|
useport = parsed_url.port
|
|
else:
|
|
useport = 1965
|
|
# Do the Gemini transaction, looping for redirects
|
|
redirects = 0
|
|
while True:
|
|
# Send the request
|
|
s = socket.create_connection((parsed_url.hostname, useport))
|
|
context = ssl.SSLContext(ssl.PROTOCOL_TLS)
|
|
context.check_hostname = False
|
|
context.verify_mode = ssl.CERT_NONE
|
|
s = context.wrap_socket(s, server_hostname = parsed_url.netloc)
|
|
s.sendall((url + "\r\n").encode("UTF-8"))
|
|
# Get the status code and meta
|
|
fp = s.makefile("rb")
|
|
header = fp.readline().decode("UTF-8").strip()
|
|
status, meta = header.split()[:2]
|
|
# Follow up to 5 redirects
|
|
if status.startswith("3"):
|
|
url = gemini_absolutise_url(url, meta)
|
|
parsed_url = urllib.parse.urlparse(url)
|
|
redirects += 1
|
|
if redirects > _max_redirects:
|
|
# Too many redirects
|
|
break
|
|
# Otherwise we're done
|
|
else:
|
|
break
|
|
# Process the response
|
|
content = ""
|
|
error_msg = ""
|
|
# 2x Success
|
|
if status.startswith("2"):
|
|
media_type, media_type_opts = cgi.parse_header(meta)
|
|
# Decode according to declared charset defaulting to UTF-8
|
|
if meta.startswith("text/gemini"):
|
|
charset = media_type_opts.get("charset", "UTF-8")
|
|
content = fp.read().decode(charset)
|
|
else:
|
|
error_msg = "Expected media type text/gemini but received " \
|
|
+ media_type
|
|
# Handle errors
|
|
else:
|
|
# Try matching a 2-digit and then a 1-digit status code
|
|
try:
|
|
error_msg = _status_code_desc[status[0:2]]
|
|
except KeyError:
|
|
try:
|
|
error_msg = _status_code_desc[status[0]]
|
|
except KeyError:
|
|
error_msg = "The server sent back something weird."
|
|
# Substitute the contents of meta into the error message if needed
|
|
error_msg = error_msg.replace("META", meta)
|
|
return content, url, status, meta, error_msg
|
|
|
|
|
|
def gemtext_to_html(gemtext: str, url: str, original_url: str,
|
|
status: str, meta: str) -> str:
|
|
"""Convert gemtext to HTML.
|
|
|
|
title: Used as the document title.
|
|
url: The URL the gemtext was received from. Used to resolve
|
|
relative URLs in the gemtext content.
|
|
original_url: The URL the original request was made at.
|
|
status: The Gemini status code returned by the server.
|
|
meta: The meta returned by the server.
|
|
Returns the HTML representation as a string.
|
|
"""
|
|
# Accumulate converted gemtext lines
|
|
lines = ['<?xml version="1.0" encoding="UTF-8"?>',
|
|
'<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">',
|
|
"\t<head>",
|
|
"\t\t<title>" + html.escape(url) + "</title>",
|
|
"\t\t<style>",
|
|
get_css(),
|
|
"\t\t</style>",
|
|
"\t</head>",
|
|
"\t<body>",
|
|
"\t<article>"]
|
|
in_pre = False
|
|
in_list = False
|
|
# Add an extra newline to ensure list tags are closed properly
|
|
for line in (gemtext + "\n").splitlines():
|
|
# Add the list closing tag
|
|
if not line.startswith("*") and in_list:
|
|
lines.append("\t\t</ul>")
|
|
in_list = False
|
|
# Blank line, ignore
|
|
if not line:
|
|
pass
|
|
# Link
|
|
elif line.startswith("=>"):
|
|
l = line[2:].split(None, 1)
|
|
# Use the URL itself as the description if there is none
|
|
if len(l) == 1:
|
|
l.append(l[0])
|
|
# Encode the link description
|
|
l[1] = html.escape(l[1])
|
|
# Resolve relative URLs
|
|
l[0] = gemini_absolutise_url(url, l[0])
|
|
lines.append("\t\t<p>" + html_href(l[0], l[1]) + "</p>")
|
|
# Preformated toggle
|
|
elif line.startswith("```"):
|
|
if in_pre:
|
|
lines.append("\t\t</pre>")
|
|
else:
|
|
lines.append("\t\t<pre>")
|
|
in_pre = not in_pre
|
|
# Preformated
|
|
elif in_pre:
|
|
lines.append(line)
|
|
# Header
|
|
elif line.startswith("###"):
|
|
lines.append("\t\t<h3>" + html.escape(line[3:].strip()) + "</h3>")
|
|
elif line.startswith("##"):
|
|
lines.append("\t\t<h2>" + html.escape(line[2:].strip()) + "</h2>")
|
|
elif line.startswith("#"):
|
|
lines.append("\t\t<h1>" + html.escape(line[1:].strip()) + "</h1>")
|
|
# List
|
|
elif line.startswith("*"):
|
|
if not in_list:
|
|
lines.append("\t\t<ul>")
|
|
in_list = True
|
|
lines.append("\t\t\t<li>" + html.escape(line[1:].strip()) + "</li>")
|
|
# Quote
|
|
elif line.startswith(">"):
|
|
lines.extend(["\t\t<blockquote>",
|
|
"\t\t\t<p>" + line[1:].strip() + "</p>",
|
|
"\t\t</blockquote>"])
|
|
# Normal text
|
|
else:
|
|
lines.append("\t\t<p>" + html.escape(line.strip()) + "</p>")
|
|
url_html = html_href(url, html.escape(url))
|
|
original_url_html = html_href(original_url, html.escape(original_url))
|
|
lines.extend(["",
|
|
"\t</article>",
|
|
"\t<details>",
|
|
"\t\t<summary>",
|
|
"\t\t\tContent from " + url_html,
|
|
"\t\t</summary>",
|
|
"\t\t<dl>",
|
|
"\t\t\t<dt>Original URL</dt>",
|
|
"\t\t\t<dd>" + original_url_html + "</dd>",
|
|
"\t\t\t<dt>Status</dt>",
|
|
"\t\t\t<dd>" + status + "</dd>",
|
|
"\t\t\t<dt>Meta</dt>",
|
|
"\t\t\t<dd>" + meta + "</dd>",
|
|
"\t\t\t<dt>Fetched by</dt>",
|
|
'\t\t\t<dd><a href="https://git.sr.ht/~sotirisp/qute-gemini">qute-gemini ' + str(_version) + "</a></dd>",
|
|
"\t\t</dl>",
|
|
"\t</details>",
|
|
"\t</body>",
|
|
"</html>"])
|
|
return "\n".join(lines)
|
|
|
|
|
|
def get_css() -> str:
|
|
# Search for qute-gemini.css in the directory this script is located in
|
|
css_file = qute_gemini_css_path()
|
|
if os.path.isfile(css_file):
|
|
# Return the file contents
|
|
with open(css_file, "r") as f:
|
|
return f.read().strip()
|
|
else:
|
|
# Use no CSS
|
|
return ""
|
|
|
|
|
|
def qute_error_page(url: str, description: str) -> str:
|
|
"""Return a data URI error page like qutebrowser does.
|
|
|
|
url: The URL of the page that failed to load.
|
|
description: A description of the error.
|
|
Returns a data URI containing the error page.
|
|
"""
|
|
# Generate the HTML error page
|
|
html_page = _error_page_template.replace("URL", url)
|
|
html_page = html_page.replace("URL_TEXT", html.escape(url))
|
|
html_page = html_page.replace("DESCRIPTION", html.escape(description))
|
|
html_page = html_page.replace("CSS", get_css())
|
|
# URL encode and return as a data URI
|
|
return "data:text/html;charset=UTF-8," + urllib.parse.quote(html_page)
|
|
|
|
|
|
def open_gemini(url: str, open_args: str) -> None:
|
|
"""Open Gemini URL in qutebrowser."""
|
|
# Get the Gemini content
|
|
content, content_url, status, meta, error_msg = gemini_fetch_url(url)
|
|
if error_msg:
|
|
# Generate an error page in a data URI
|
|
open_url = qute_error_page(url, error_msg)
|
|
else:
|
|
# Success, convert to HTML in a temporary file
|
|
tmpf = tempfile.NamedTemporaryFile("w", suffix=".html", delete=False)
|
|
tmp_filename = tmpf.name
|
|
tmpf.close()
|
|
with open(tmp_filename, "w") as f:
|
|
f.write(gemtext_to_html(content, content_url, url, status, meta))
|
|
open_url = " file://" + tmp_filename
|
|
# Open the HTML file in qutebrowser
|
|
with open(qute_fifo(), "w") as qfifo:
|
|
qfifo.write("open " + open_args + open_url)
|
|
|
|
|
|
def open_other(url: str, open_args: str) -> None:
|
|
"""Open non-Gemini URL in qutebrowser."""
|
|
with open(qute_fifo(), "w") as qfifo:
|
|
qfifo.write("open " + open_args + " " + url)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Open in the current or a new tab depending on the script name
|
|
if sys.argv[0].endswith("-tab"):
|
|
open_args = "-t"
|
|
else:
|
|
open_args = ""
|
|
# Select how to open the URL depending on its scheme
|
|
url = qute_url()
|
|
parsed_url = urllib.parse.urlparse(url)
|
|
if parsed_url.scheme == "gemini":
|
|
open_gemini(url, open_args)
|
|
else:
|
|
open_other(url, open_args)
|