#!/usr/bin/env python3 # qute-gemini - Open Gemini links in qutebrowser and render them as HTML # # SPDX-FileCopyrightText: 2019-2020 solderpunk # SPDX-FileCopyrightText: 2020 Aaron Janse # SPDX-FileCopyrightText: 2020 petedussin # SPDX-FileCopyrightText: 2020-2021 Sotiris Papatheodorou # SPDX-License-Identifier: GPL-3.0-or-later import cgi import html import os import socket import ssl import sys import tempfile import urllib.parse from typing import Tuple _version = "1.0.0" _max_redirects = 5 _error_page_template = ''' Error opening page: URL

qute-gemini error

Error while opening:
URL_TEXT

DESCRIPTION

''' _status_code_desc = { "1": "Gemini status code 1 Input. This is not implemented in qute-gemini.", "10": "Gemini status code 10 Input. This is not implemented in qute-gemini.", "11": "Gemini status code 11 Sensitive Input. This is not implemented in qute-gemini.", "3": "Gemini status code 3 Redirect. Stopped after " + str(_max_redirects) + " redirects.", "30": "Gemini status code 30 Temporary Redirect. Stopped after " + str(_max_redirects) + " redirects.", "31": "Gemini status code 31 Permanent Redirect. Stopped after " + str(_max_redirects) + " redirects.", "4": "Gemini status code 4 Temporary Failure. Server message: META", "40": "Gemini status code 40 Temporary Failure. Server message: META", "41": "Gemini status code 41 Server Unavailable. The server is unavailable due to overload or maintenance. Server message: META", "42": "Gemini status code 42 CGI Error. A CGI process, or similar system for generating dynamic content, died unexpectedly or timed out. Server message: META", "43": "Gemini status code 43 Proxy Error. A proxy request failed because the server was unable to successfully complete a transaction with the remote host. Server message: META", "44": "Gemini status code 44 Slow Down. Rate limiting is in effect. Please wait META seconds before making another request to this server.", "5": "Gemini status code 5 Permanent Failure. Server message: META", "50": "Gemini status code 50 Permanent Failure. Server message: META", "51": "Gemini status code 51 Not Found. he requested resource could not be found but may be available in the future. Server message: META", "52": "Gemini status code 52 Gone. The resource requested is no longer available and will not be available again. Server message: META", "53": "Gemini status code 53 Proxy Request Refused. The request was for a resource at a domain not served by the server and the server does not accept proxy requests. Server message: META", "59": "Gemini status code 59 Bad Request. The server was unable to parse the client's request, presumably due to a malformed request. Server message: META", "6": "Gemini status code 6 Client Certificate Required. This is not implemented in qute-gemini.", } def qute_url() -> str: """Get the URL passed to the script by qutebrowser.""" return os.environ["QUTE_URL"] def qute_fifo() -> str: """Get the FIFO or file to write qutebrowser commands to.""" return os.environ["QUTE_FIFO"] def html_href(url: str, description: str) -> str: return "".join(['', description, ""]) def qute_gemini_css_path() -> str: """Return the path where the custom CSS file is expected to be.""" try: base_dir = os.environ["XDG_DATA_HOME"] except KeyError: base_dir = os.path.join(os.environ["HOME"], ".local/share") return os.path.join(base_dir, "qutebrowser/userscripts/qute-gemini.css") def gemini_absolutise_url(base_url: str, relative_url: str) -> str: """Absolutise relative gemini URLs. Adapted from gcat: https://github.com/aaronjanse/gcat """ if "://" not in relative_url: # Python's URL tools somehow only work with known schemes? base_url = base_url.replace("gemini://", "http://") relative_url = urllib.parse.urljoin(base_url, relative_url) relative_url = relative_url.replace("http://", "gemini://") return relative_url def gemini_fetch_url(url: str) -> Tuple[str, str, str, str, str]: """Fetch a Gemini URL and return the content as a string. url: URL with gemini:// or no scheme. Returns 4 strings: the content, the URL the content was fetched from, the Gemini status code, the value of the meta field and an error message. Adapted from gcat: https://github.com/aaronjanse/gcat """ # Parse the URL to get the hostname and port parsed_url = urllib.parse.urlparse(url) if not parsed_url.scheme: url = "gemini://" + url parsed_url = urllib.parse.urlparse(url) if parsed_url.scheme != "gemini": return "", "Received non-gemini:// URL: " + url if parsed_url.port is not None: useport = parsed_url.port else: useport = 1965 # Do the Gemini transaction, looping for redirects redirects = 0 while True: # Send the request s = socket.create_connection((parsed_url.hostname, useport)) context = ssl.SSLContext(ssl.PROTOCOL_TLS) context.check_hostname = False context.verify_mode = ssl.CERT_NONE s = context.wrap_socket(s, server_hostname = parsed_url.netloc) s.sendall((url + "\r\n").encode("UTF-8")) # Get the status code and meta fp = s.makefile("rb") header = fp.readline().decode("UTF-8").strip() status, meta = header.split()[:2] # Follow up to 5 redirects if status.startswith("3"): url = gemini_absolutise_url(url, meta) parsed_url = urllib.parse.urlparse(url) redirects += 1 if redirects > _max_redirects: # Too many redirects break # Otherwise we're done else: break # Process the response content = "" error_msg = "" # 2x Success if status.startswith("2"): media_type, media_type_opts = cgi.parse_header(meta) # Decode according to declared charset defaulting to UTF-8 if meta.startswith("text/gemini"): charset = media_type_opts.get("charset", "UTF-8") content = fp.read().decode(charset) else: error_msg = "Expected media type text/gemini but received " \ + media_type # Handle errors else: # Try matching a 2-digit and then a 1-digit status code try: error_msg = _status_code_desc[status[0:2]] except KeyError: try: error_msg = _status_code_desc[status[0]] except KeyError: error_msg = "The server sent back something weird." # Substitute the contents of meta into the error message if needed error_msg = error_msg.replace("META", meta) return content, url, status, meta, error_msg def gemtext_to_html(gemtext: str, url: str, original_url: str, status: str, meta: str) -> str: """Convert gemtext to HTML. title: Used as the document title. url: The URL the gemtext was received from. Used to resolve relative URLs in the gemtext content. original_url: The URL the original request was made at. status: The Gemini status code returned by the server. meta: The meta returned by the server. Returns the HTML representation as a string. """ # Accumulate converted gemtext lines lines = ['', '', "\t", "\t\t" + html.escape(url) + "", "\t\t", "\t", "\t", "\t
"] in_pre = False in_list = False # Add an extra newline to ensure list tags are closed properly for line in (gemtext + "\n").splitlines(): # Add the list closing tag if not line.startswith("*") and in_list: lines.append("\t\t") in_list = False # Blank line, ignore if not line: pass # Link elif line.startswith("=>"): l = line[2:].split(None, 1) # Use the URL itself as the description if there is none if len(l) == 1: l.append(l[0]) # Encode the link description l[1] = html.escape(l[1]) # Resolve relative URLs l[0] = gemini_absolutise_url(url, l[0]) lines.append("\t\t

" + html_href(l[0], l[1]) + "

") # Preformated toggle elif line.startswith("```"): if in_pre: lines.append("\t\t") else: lines.append("\t\t
")
            in_pre = not in_pre
        # Preformated
        elif in_pre:
            lines.append(line)
        # Header
        elif line.startswith("###"):
            lines.append("\t\t

" + html.escape(line[3:].strip()) + "

") elif line.startswith("##"): lines.append("\t\t

" + html.escape(line[2:].strip()) + "

") elif line.startswith("#"): lines.append("\t\t

" + html.escape(line[1:].strip()) + "

") # List elif line.startswith("*"): if not in_list: lines.append("\t\t
", "\t
", "\t\t", "\t\t\tContent from " + url_html, "\t\t", "\t\t
", "\t\t\t
Original URL
", "\t\t\t
" + original_url_html + "
", "\t\t\t
Status
", "\t\t\t
" + status + "
", "\t\t\t
Meta
", "\t\t\t
" + meta + "
", "\t\t\t
Fetched by
", '\t\t\t
qute-gemini ' + str(_version) + "
", "\t\t
", "\t
", "\t", ""]) return "\n".join(lines) def get_css() -> str: # Search for qute-gemini.css in the directory this script is located in css_file = qute_gemini_css_path() if os.path.isfile(css_file): # Return the file contents with open(css_file, "r") as f: return f.read().strip() else: # Use no CSS return "" def qute_error_page(url: str, description: str) -> str: """Return a data URI error page like qutebrowser does. url: The URL of the page that failed to load. description: A description of the error. Returns a data URI containing the error page. """ # Generate the HTML error page html_page = _error_page_template.replace("URL", url) html_page = html_page.replace("URL_TEXT", html.escape(url)) html_page = html_page.replace("DESCRIPTION", html.escape(description)) html_page = html_page.replace("CSS", get_css()) # URL encode and return as a data URI return "data:text/html;charset=UTF-8," + urllib.parse.quote(html_page) def open_gemini(url: str, open_args: str) -> None: """Open Gemini URL in qutebrowser.""" # Get the Gemini content content, content_url, status, meta, error_msg = gemini_fetch_url(url) if error_msg: # Generate an error page in a data URI open_url = qute_error_page(url, error_msg) else: # Success, convert to HTML in a temporary file tmpf = tempfile.NamedTemporaryFile("w", suffix=".html", delete=False) tmp_filename = tmpf.name tmpf.close() with open(tmp_filename, "w") as f: f.write(gemtext_to_html(content, content_url, url, status, meta)) open_url = " file://" + tmp_filename # Open the HTML file in qutebrowser with open(qute_fifo(), "w") as qfifo: qfifo.write("open " + open_args + open_url) def open_other(url: str, open_args: str) -> None: """Open non-Gemini URL in qutebrowser.""" with open(qute_fifo(), "w") as qfifo: qfifo.write("open " + open_args + " " + url) if __name__ == "__main__": # Open in the current or a new tab depending on the script name if sys.argv[0].endswith("-tab"): open_args = "-t" else: open_args = "" # Select how to open the URL depending on its scheme url = qute_url() parsed_url = urllib.parse.urlparse(url) if parsed_url.scheme == "gemini": open_gemini(url, open_args) else: open_other(url, open_args)