diff --git a/gemfeed.py b/gemfeed.py index 39dd8ad..df3a764 100644 --- a/gemfeed.py +++ b/gemfeed.py @@ -9,10 +9,32 @@ import urllib.parse from feedgen.feed import FeedGenerator def is_world_readable(filename): + """ + Return True if the named file is world readable, otherwise return False. + """ st = os.stat(filename) return st.st_mode & stat.S_IROTH +def extract_first_heading(filename, default=""): + """ + Open a file which is presumed to contain text/gemini content and return + the contents of the first heading line (regardless of heading level). + If no heading lines are found, return the specified default. + """ + with open(filename) as fp: + for line in fp: + if line.startswith("#"): + while line[0] == "#": + line = line[1:] + return line.strip() + return default + def get_feed_title(): + """ + If an index.gmi or index.gemini file exists and is worldreadable, return + the content of the first heading line in the file, otherwise return a + default feed title. + """ default = "Just another Gemini feed" for index_file in ("index.gmi", "index.gemini"): if os.path.exists(index_file) and is_world_readable(index_file): @@ -20,6 +42,10 @@ def get_feed_title(): return default def find_files(n=10): + """ + Return the n most recently created world readable files with extensions of + .gmi or .gemini, as a list sorted from most to least recent. + """ files = [] for extension in ("gmi", "gemini"): files.extend(glob.glob("*.{}".format(extension))) @@ -30,16 +56,31 @@ def find_files(n=10): files.sort(key=os.path.getctime, reverse=True) return files[0:n] -def extract_first_heading(filename, default=""): - with open(filename) as fp: - for line in fp: - if line.startswith("#"): - while line[0] == "#": - line = line[1:] - return line.strip() - return default +def urljoin(base, url): + """ + Return an absolute URL formed by combining the provided base and relative + URLs. + + This is necessary because the various functions in Python's urllib to do + this do not function as expected if the URL scheme is not recognised, + which of course gemini:// is not. Thus, we need to do a little dance + where we transform gemini URLs to https URLs, join them, and then undo + the transformation. + """ + base = urllib.parse.urlsplit(base) + base = base._replace(scheme="https") + base = urllib.parse.urlunsplit(base) + joined = urllib.parse.urljoin(base, url) + joined = urllib.parse.urlsplit(joined) + joined = joined._replace(scheme="gemini") + return urllib.parse.urlunsplit(joined) def populate_entry_from_file(filename, base_url, entry): + """ + Set the id, title, updated and link attributes of the provided + FeedGenerator entry object according the contents of the named + Gemini file and the base URL. + """ url = urljoin(base_url, filename) entry.guid(url) entry.link(href=url, rel="alternate") @@ -49,15 +90,6 @@ def populate_entry_from_file(filename, base_url, entry): title = extract_first_heading(filename, filename) entry.title(title) -def urljoin(base, url): - base = urllib.parse.urlsplit(base) - base = base._replace(scheme="https") - base = urllib.parse.urlunsplit(base) - joined = urllib.parse.urljoin(base, url) - joined = urllib.parse.urlsplit(joined) - joined = joined._replace(scheme="gemini") - return urllib.parse.urlunsplit(joined) - def main(): # Get default title from index page, if there is one