#!/usr/bin/env python3

"""
uriel 1.3.5

Yet another static website generator.

Named for the archangel Uriel in the novel Unsong, whose job was to perform
the fantastic and mundane work necessary to keep the world functioning.

Copyright 2021-2025 Nathan Rosenquist

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""

import ast
import calendar
import datetime
import hashlib
import os
import re
import shutil
import subprocess
import sys
import time
import traceback

# the name of the program
PROGRAM_NAME = "uriel"

# exit codes
EXIT_OK = 0
EXIT_FAIL = 1

# sub-directories under the project root directory
STATIC_ROOT = "static"
NODES_ROOT = "nodes"
TEMPLATES_ROOT = "templates"
LIB_ROOT = "lib"
PUBLIC_ROOT = "public"

# node index filename
NODE_INDEX = "index"

# html index filename
HTML_INDEX = "index.html"

# default template to use when rendering nodes
DEFAULT_TEMPLATE = "default.html"

# HTML escape character map
HTML_ESCAPE_MAP = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;",
    ">": "&gt;",
    "<": "&lt;"
}

# maximum number of RSS feed entries
RSS_MAX_ENTRIES = 20

# maximum number of sitemap entries
SITEMAP_MAX_ENTRIES = 50000

# static URL path -> hash URL path
static_hash_urls = {}

class UrielError(Exception):
    """
    General purpose exception that will result in a program error, but
    without showing a stack trace to the user.

    """

    pass


class SojuError(Exception):
    """
    Represents an error from user-defined soju code that should not throw
    a stack trace.

    """

    pass


class HandlerError(Exception):
    """
    Represents an error from user-defined handler code that should not throw
    a stack trace.

    """

    pass


class Token:
    """
    Represents a single token, either a literal or a substitution parameter.

    Lines are parsed one at a time. Each line can be divided into one or
    more tokens. A token is either a literal, or a substitution parameter,
    e.g. {{foo:bar}}

    """

    # possible types
    # {{$TOKEN_TYPE_CONSTANT:*}}
    # all are lvalues for substitution parameters, except for "literal",
    # which is just a literal string
    LITERAL = "literal"
    VALUE = "value"
    VALUE_UNESCAPED = "value-unescaped"
    INCLUDE = "include"
    BREADCRUMBS = "breadcrumbs"
    CREATED = "created"
    MODIFIED = "modified"
    STATIC_URL = "static-url"
    STATIC_HASH_URL = "static-hash-url"
    RSS = "rss"
    NODE = "node"
    NODE_URL = "node-url"
    NODE_NAME = "node-name"
    NODE_TITLE = "node-title"
    NODE_LINK = "node-link"
    NODE_LIST = "node-list"
    TAG_LIST = "tag-list"
    SOJU = "soju"

    # allowed lvalue keywords inside of a {{ parameter }}
    # (e.g. {{some_keyword:*}}
    # (all are allowed except "literal")
    KEYWORDS = set([
        VALUE,
        VALUE_UNESCAPED,
        INCLUDE,
        CREATED,
        MODIFIED,
        BREADCRUMBS,
        STATIC_URL,
        STATIC_HASH_URL,
        RSS,
        NODE,
        NODE_URL,
        NODE_NAME,
        NODE_TITLE,
        NODE_LINK,
        NODE_LIST,
        TAG_LIST,
        SOJU,
    ])

    def __init__(self, s):
        """
        Accepts a fragment of text, which may or may not be a parameter.

        Sets the "type" and "value" fields according to the string contents.

        """

        # remember the original string that was used to make the token
        self.original_string = s

        # does it look like this was supposed to be a parameter,
        # but we don't understand it?
        self.unidentified_parameter = False

        # parameter
        if s.startswith("{{") and s.endswith("}}"):
            # strip off the {{ and }}
            content = s[2:-2]

            # parse (key, value) inside of "{{ key: value }}"
            try:
                (keyword, value) = content.split(":", maxsplit=1)
                keyword = keyword.strip()
                value = value.strip()
            except ValueError:
                keyword = None
                value = s

            # turn the parameter key into a token type
            self.type = None
            if keyword in Token.KEYWORDS:
                self.type = keyword

            # if we didn't find a token type that we understood,
            # make a note of it, but pass the entire value on as a literal
            if self.type is None:
                self.unidentified_parameter = True
                self.type = Token.LITERAL
                value = self.original_string

            self.value = value

        # literal
        else:
            self.type = Token.LITERAL
            self.value = s

    def has_unidentified_parameter(self):
        """
        Does this token contain something that looks syntactically like a
        substitution parameter, but not one that we recognize?

        """

        return self.unidentified_parameter

    def __eq__(self, other):
        if self.type == other.type:
            if self.value == other.value:
                if self.original_string == other.original_string:
                    return True

        return False

    def __str__(self):
        return "[token type=" + self.type + ", value='" + self.value + "']"

    def __repr__(self):
        return self.__str__()


class TemplateStack:
    """
    Nodes and templates can include other templates.

    This class represents the stack of templates, in the order they were
    included.

    """

    def __init__(self):
        """
        Constructor.

        """

        self.templates = []

    def push(self, template, node_path, in_node_body):
        """
        Add a template to the stack.

        Accepts the template to add to the stack, and the node path.

        """

        # if an include loop is detected, show a stack trace and raise an error
        if template in self.templates:
            log("include loop error:")

            # indent errors as we work our way down
            # from the node, through the templates, to wherever the problem is
            indent = 1

            # node
            log("%s%s/%s" % (indent_spaces(indent), NODES_ROOT, node_path))

            # highlight the range of the loop
            loop_started = False

            indent += 1
            triple_arrow = ">>>" + (((indent * 2) - 3) * " ")
            single_arrow = "  >" + (((indent * 2) - 3) * " ")

            while self.has_more_elements():
                t = self.shift()

                if t == template:
                    loop_started = True
                    log("%s%s/%s <<< LOOP STARTS HERE" %
                        (triple_arrow, TEMPLATES_ROOT, t))
                elif loop_started:
                    log("%s%s/%s" % (single_arrow, TEMPLATES_ROOT, t))
                else:
                    log("%s%s/%s" % (indent_spaces(indent), TEMPLATES_ROOT, t))

            # if we're back in the node body, indicate that with a parameter,
            # followed by the node path
            if in_node_body:
                # {{node:body}}
                indent += 1
                triple_arrow = ">>>" + (((indent * 2) - 3) * " ")
                single_arrow = "  >" + (((indent * 2) - 3) * " ")
                log("%s{{node:body}}" % (single_arrow))

                # node path
                indent += 1
                triple_arrow = ">>>" + (((indent * 2) - 3) * " ")
                single_arrow = "  >" + (((indent * 2) - 3) * " ")
                log("%s%s/%s" % (single_arrow, NODES_ROOT, node_path))

                # {{include:template}}
                indent += 1
                triple_arrow = ">>>" + (((indent * 2) - 3) * " ")
                single_arrow = "  >" + (((indent * 2) - 3) * " ")
                log("%s{{include:%s}}" % (single_arrow, template))

                # indent the final template that comes after this
                indent += 1
                triple_arrow = ">>>" + (((indent * 2) - 3) * " ")
                single_arrow = "  >" + (((indent * 2) - 3) * " ")

            # log the first instance of the repeated template loop
            log("%s%s/%s <<< WOULD REPEAT FOREVER" %
                (triple_arrow, TEMPLATES_ROOT, template))

            raise UrielError(
                "include loop detected in: '%s/%s'" %
                (TEMPLATES_ROOT, template))

        self.templates.append(template)

    def pop(self):
        """
        Remove the most recent template from the stack and return it.

        """

        return self.templates.pop()

    def shift(self):
        """
        Remove the first template from the stack and return it.

        """

        template = self.templates[0]

        self.templates = self.templates[1:]

        return template

    def has_more_elements(self):
        """
        Does this stack still have more templates that can be removed?

        """

        if len(self.templates) > 0:
            return True

        return False


class Page:
    """
    A Page is a combination of a Node and a template.

    """

    def __init__(self, project_root, node, use_canonical_url=False):
        """
        Accepts the project root, the template, and a Node.

        Optionally accepts use_canonical_url, a boolean that influences
        whether substitution parameter URLs are rewritten to use the
        Canonical-URL. The default is False.

        """

        self.project_root = project_root
        self.node = node

        # it is possible for line_error() to be called more than once
        # during the exception handling process. however, we only
        # want to print the first error message
        self.line_error_exception_raised = False

        # use the template from the node header, or default if not set
        if node.has_header("template"):
            self.template = node.get_header("template")
        else:
            self.template = DEFAULT_TEMPLATE

        # templates can include other templates
        # keep track of which one we're using right now, in case we
        # have to show an error message
        self.template_stack = TemplateStack()

        # make sure we don't get into a {{node:body}} inclusion loop
        self.node_body_semaphore = 0

        # remember whether we want to include the canonical URL in links
        self.use_canonical_url = use_canonical_url

    def line_error(self, token, reason, raise_exception=True):
        """
        Log a parameter error stack trace.

        Accepts a Token, the reason for the error, and whether or not
        this method should raise an exception (by default it does).

        """

        # if this is the first time this method has raised an exception,
        # print out debugging information to highlight the precise
        # location where the user can troubleshoot the error
        if not self.line_error_exception_raised:
            # are we using a template? (or is the template "null")
            has_template = self.template_stack.has_more_elements()

            # indent errors as we work our way down
            # from the node, through the templates, to wherever the problem is
            indent = 1

            log("parameter error:")

            # node
            log("%s%s/%s" %
                (indent_spaces(indent),
                 NODES_ROOT,
                 self.node.get_path()))

            # templates
            if has_template:
                indent += 1
                while self.template_stack.has_more_elements():
                    template = self.template_stack.shift()
                    log("%s%s/%s" %
                        (indent_spaces(indent),
                         TEMPLATES_ROOT,
                         template))

            # {{node:body}}
            #
            # if we ran into an error while we were processing the contents
            # of the node itself (e.g. {{node:body}}), list the node again,
            # since it is the file the user should go look at to find the
            # error. however, we only want to print the node twice if we
            # are using a template, otherwise it's just redundant.
            if has_template and (self.node_body_semaphore > 0):
                indent += 1
                log("%s%s/%s" %
                    (indent_spaces(indent),
                     NODES_ROOT,
                     self.node.get_path()))

            # token
            indent += 1
            log("%s'%s'" % (indent_spaces(indent), token.original_string))

        # if we were asked to raise an exception, raise it now
        if raise_exception:
            # remember that line_error() raised an exception
            # in case we end up here again before we exit
            self.line_error_exception_raised = True

            # raise the exception
            raise UrielError(reason)

        # otherwise, print out the error as a log message
        else:
            indent += 1
            log("%s%s" % (indent_spaces(indent), reason))

    def node_body_loop_error(self, token):
        """
        Log a {{node:body}} inclusion loop error stack trace.

        Accepts a Token.

        """

        # we don't have all of the information we need here to highlight
        # exactly where the loop started and stopped, but we can at least
        # print out the node, templates, and token that were involved.
        #
        # to highlight exactly where the loop started and stopped, we
        # would need more information about what included what, and where,
        # and not just a node and a template stack.
        #
        # includes can come from templates or the node body.
        # a typical template stack will include the node body somewhere in
        # one of the templates.
        #
        # it's only a loop if the node itself eventually includes the node
        # body again, but without adding a lot more tracking information
        # just for error reporting on this one case, this is the best we can
        # can reasonably do with the information available here

        log("node body include loop error:")

        # indent errors as we work our way down
        # from the node, through the templates, to wherever the problem is
        indent = 1

        # keep track of whether we have any templates at all
        has_templates = self.template_stack.has_more_elements()

        # node path
        log("%s%s/%s" %
            (indent_spaces(indent), NODES_ROOT, self.node.get_path()))

        # templates
        if has_templates:
            indent += 1
            while self.template_stack.has_more_elements():
                template = self.template_stack.shift()
                log("%s%s/%s" %
                    (indent_spaces(indent), TEMPLATES_ROOT, template))

        # node path again (if we had any templates, otherwise it's redundant)
        if has_templates:
            indent += 1
            log("%s%s/%s" %
                (indent_spaces(indent), NODES_ROOT, self.node.get_path()))

        # {{node:body}}
        indent += 1
        log("%s'%s'" % (indent_spaces(indent), token.original_string))

        raise UrielError(
            "node body include loop detected in node: '%s'" % \
            (self.node.get_path()))

    def tokenize(self, line):
        """
        Accepts a single line of text as input.

        Turns the input into one or more Token instances.

        Returns a list of Token.

        """

        # Token instances
        tokens = []

        # split the tokens
        index = 0
        while True:
            open_index = line[index:].find("{{")

            # there's a parameter coming up, but we're not there yet
            if open_index > 0:
                tokens.append(Token(line[index:index+open_index]))
                index += open_index

            # we're at the parameter now
            elif open_index == 0:
                close_index = line[index:].find("}}")
                # we have an opening {{, but no }}
                if -1 == close_index:
                    tokens.append(Token(line[index:]))
                    break

                # scoop up the "{{ ... }}" part and make it into a token
                next_index = index + close_index + 2
                tokens.append(Token(line[index:next_index]))
                index = next_index

            # no more parameters
            else:
                # if we're already at the end of the line,
                # don't create an empty token
                if index == len(line):
                    break

                # add the rest of the line as a token
                tokens.append(Token(line[index:]))
                break

        return tokens

    def text_to_html(self, text):
        """
        Turn a multi-line text string into a multi-line string with
        <br> tags at the end of each line.

        """

        lines = text.split("\n")

        return "<br>\n".join(lines)

    def get_node_by_path(self, token, path):
        """
        Get the Node that corresponds with the given path.

        Accepts a token, and a path (e.g. index, foo/index)

        Returns the requested Node.

        Raises a UrielError if the node can not be found.

        """

        # get the root node to start our search
        root = self.node.get_root_node()

        # find the node that matches the path, and return it
        try:
            node = root.find_node_by_path(path)
        except UrielError as e:
            self.line_error(token, "node not found: '%s'" % (path))

        return node

    def create_breadcrumbs(self):
        """
        Create a breadcrumbs HTML fragment for the Node associated with
        this Page.

        Return the HTML fragment as a string.

        """

        # list of nodes, starting from the leaf node and
        # going back up to the root
        nodes = []

        # list of breadcrumb HTML fragments, in the order we want
        breadcrumbs = []

        # build a list of the nodes from the leaf up to the root
        cur = self.node
        while True:
            nodes.append(cur)

            if cur.parent_node is None:
                break

            cur = cur.parent_node

        # remove the last element from the node list (the root)
        # because we don't want to include the home page in breadcrumbs
        nodes = nodes[:-1]

        # go through the nodes from (almost) the root to the bottom,
        # in the order that we want the breadcrumb HTML to appear
        first_breadcrumb = True
        for cur_node in reversed(nodes):
            if not first_breadcrumb:
                breadcrumbs.append(self.node.get_breadcrumb_separator())

            breadcrumbs.append(self.get_node_link(cur_node))

            first_breadcrumb = False

        return "".join(breadcrumbs)

    def get_soju_result(self, code):
        """
        Run the given code in the soju module, and return the result.

        """

        # make sure the soju module has been imported
        if "soju" not in globals():
            soju_file = os.path.join(self.project_root, LIB_ROOT, "soju.py")
            raise UrielError("soju module not found, '%s' does not exist" %
                             (soju_file))

        # set up the local environment so the code fragment passed in can
        # refer to these variables without qualifying them
        page = self
        node = self.node
        project_root = self.project_root
        use_canonical_url = self.use_canonical_url

        # don't export the "self" reference from this method into eval code
        del(self)

        # check whether the syntax of the code inside of the soju token value
        # is syntactically valid python. this does not execute the code yet,
        # and the symbols don't necessarily have to exist. it just checks
        # for things like mismatched parenthesis, and that sort of thing,
        # so we can report them without a full stack trace.
        #
        # by explicitly checking this before we run the code, and not
        # just catching SyntaxError when the code is executed, we still
        # allow SyntaxError exceptions within the user-defined code to bubble
        # up, in case they eval code of their own in a function, etc.
        try:
            ast.parse(code)
        except SyntaxError as e:
            raise SojuError("syntax error: " + e.msg)

        # execute the user-defined code, and save the result
        result = eval("soju." + code)

        # if the user-defined code didn't return a string, report the error
        if type(result) is not str:
            err = "%s returned %s instead of string"
            raise SojuError(err % (code, str(type(result))))

        return result

    def get_node_url(self, node):
        """
        Get the URL for the node.

        """

        # canonical
        if self.use_canonical_url:
            return node.get_canonical_url()

        # non-canonical
        return node.get_url()

    def get_node_link(self, node):
        """
        Get a link to the node, using the node title as the link text.

        """

        # canonical
        if self.use_canonical_url:
            return node.get_canonical_link()

        # non-canonical
        return node.get_link()

    def get_static_url(self, node, url):
        """
        Get the static URL, in relation to the node.

        """

        # canonical
        if self.use_canonical_url:
            if url.startswith("/"):
                root_node = self.node.get_root_node()
                url = url[1:]
                return root_node.get_canonical_url() + url

            return node.get_canonical_url() + url

        # non-canonical
        return url

    def get_maybe_canonical_html_fragment(self, node, header_basename):
        """
        Get the canonical or non-canonical HTML fragment associated with the
        given header basename.

        Accepts Node, and header basename (e.g. __node-list-html)

        Returns the value from the given header, or the version of the
        header that has canonical URLs.

        """

        # canonical
        if self.use_canonical_url:
            header = header_basename + "-canonical"

        # non-canonical
        else:
            header = header_basename

        return node.get_header(header)

    def merge_token_literal(self, token):
        """
        Merge a literal token and return the results.

        """

        if token.has_unidentified_parameter():
            self.line_error(token,
                            "unidentified parameter: '%s'" % (token.value))

        return token.value

    def merge_token_value(self, token):
        """
        Merge a {{value:*}} token and return the results.

        """

        try:
            unescaped_value = self.node.get_header(token.value)
            escaped_value = escape(unescaped_value)
            return escaped_value
        except KeyError:
            self.line_error(token,
                            "header '%s' not set on this node" % \
                            (token.value))

    def merge_token_value_unescaped(self, token):
        """
        Merge a {{value-unescaped:*}} token and return the results.

        """

        try:
            return self.node.get_header(token.value)
        except KeyError:
            self.line_error(token,
                            "header '%s' not set on this node" % \
                            (token.value))

    def merge_token_include(self, token):
        """
        Merge an {{include:*}} token and return the results.

        """

        # get the path to the template file
        template_file = os.path.join(self.project_root,
                                     TEMPLATES_ROOT,
                                     token.value)

        if not os.path.exists(template_file):
            self.line_error(
                token,
                "include file not found: '%s'" %
                (os.path.join(TEMPLATES_ROOT, token.value)))

        return self.merge_template(token.value)

    def merge_token_created(self, token):
        """
        Merge a {{created:*}} token and return the results.

        """

        if self.node.created is None:
            self.line_error(token, "'Created' header not set")

        try:
            formatted_date = self.node.created.strftime(token.value)
            return escape(formatted_date)
        except ValueError:
            self.line_error(token,
                            "invalid date format string: '%s'" %
                            (token.value))

    def merge_token_modified(self, token):
        """
        Merge a {{modified:*}} token and return the results.

        """

        if self.node.modified is None:
            self.line_error(token, "'Modified' header not set")

        try:
            formatted_date = self.node.modified.strftime(token.value)
            return escape(formatted_date)
        except ValueError:
            self.line_error(token,
                            "invalid date format string: '%s'" %
                            (token.value))

    def merge_token_breadcrumbs(self, token):
        """
        Merge a {{breadcrumbs:*}} token and return the results.

        """

        if "*" == token.value:
            return self.create_breadcrumbs()

        self.line_error(
            token,
            "invalid value for breadcrumbs parameter: '%s'" %
            (token.value))

    def get_static_url_abspath(self, token, url):
        """
        Get the absolute path on disk to the given static URL path.

        """

        # disallow ../ links for static URLs
        if -1 != url.find("../"):
            self.line_error(
                token,
                "directory traversal not allowed in static URL: '%s'" %
                (url))

        # if the static url path starts with a slash, and is
        # relative to the root of the site, start the directory path
        # from PUBLIC_ROOT
        if url.startswith("/"):
            root_node = self.node.get_root_node()
            public_root_dir = root_node.get_dest_dir()
            static_url_dirent = public_root_dir + "/" + url

        # if the static url path does not start with a slash,
        # start the directory path relative to the node that
        # included the static url reference
        else:
            static_url_dirent = self.node.get_dest_dir() + "/" + url

        return os.path.abspath(static_url_dirent)

    def merge_token_static_url(self, token):
        """
        Merge a {{static-url:*}} token and return the results.

        """

        url = token.value

        # get the path on disk to the URL path
        static_url_abspath = self.get_static_url_abspath(token, url)

        # make sure the file or directory exists on the filesystem
        if not os.path.exists(static_url_abspath):
            self.line_error(token,
                            "file not found: '%s'" %
                            (os.path.join(STATIC_ROOT, url)))

        return self.get_static_url(self.node, url)

    def merge_token_static_hash_url(self, token):
        """
        Merge a {{static-hash-url:*}} token and return the results.

        """

        url = token.value

        # get the full path on disk to the URL path
        static_url_abspath = self.get_static_url_abspath(token, url)

        # if we already generated this file, return the cached reference
        if static_url_abspath in static_hash_urls:
            return static_hash_urls[static_url_abspath]

        # make sure the file exists
        if not os.path.exists(static_url_abspath):
            self.line_error(token,
                            "file not found: '%s'" %
                            (os.path.join(STATIC_ROOT, url)))

        # make sure the path is actually a file
        if not os.path.isfile(static_url_abspath):
            self.line_error(token,
                            "path exists, but is not a file: '%s'" %
                            (os.path.join(STATIC_ROOT, url)))

        # get the destination directory we want to write the hash file into
        dest_dir = os.path.dirname(static_url_abspath)

        # get the destination for the relative URL directory
        dest_url_relative_dir = os.path.dirname(url)

        # get the source filename by itself
        src_base_filename = os.path.basename(static_url_abspath)

        # split the source filename into a base name and a file extension
        (basename, file_ext) = os.path.splitext(src_base_filename)

        # hash the file contents
        md5_hash = hashlib.md5()
        with open(static_url_abspath, "rb") as f:
            contents = f.read()
            md5_hash.update(contents)

        # construct the base hash filename
        hash_filename = md5_hash.hexdigest() + file_ext

        # construct the relative destination URL to the hashed file
        dest_hash_url = os.path.join(dest_url_relative_dir, hash_filename)

        # construct the absolute path to the hashed file
        dest_hash_abspath = os.path.join(dest_dir, hash_filename)

        # copy source file to dest hash file
        copy_file(static_url_abspath, dest_hash_abspath)

        # cache the results
        static_hash_urls[static_url_abspath] = dest_hash_url

        return dest_hash_url

    def merge_token_rss(self, token):
        """
        Merge an {{rss:*}} token and return the results.

        """

        # only support {{rss:url}}
        if "url" != token.value:
            self.line_error(token,
                            "invalid value for rss parameter: '%s'" %
                            (token.value))

        # make sure required headers are present
        if not self.node.has_header("rss-url"):
            self.line_error(token, "'RSS-URL' header not set")
        if not self.node.has_header("canonical-url"):
            self.line_error(token, "'Canonical-URL' header not set")

        # get relative RSS URL
        rss_url = self.node.get_header("rss-url")
        if rss_url.startswith("/"):
            rss_url = rss_url[1:]

        # get canonical RSS URL
        root_node = self.node.get_root_node()
        rss_canonical_url = root_node.get_canonical_url() + rss_url

        return rss_canonical_url

    def merge_token_node_body(self, token):
        """
        Merge a {{node:body}} token and return the results.

        """

        # if we're in a {{node:body}} loop, show an error
        if self.node_body_semaphore != 0:
            self.node_body_loop_error(token)

        # entering {{node:body}} merge
        self.node_body_semaphore += 1

        # if the format is set to text, add HTML line breaks
        if self.node.has_header("format"):
            node_format = self.node.get_header("format")
            if "text" == node_format:
                self.node.set_body(self.text_to_html(self.node.body))
            elif "html" == node_format:
                # default
                pass
            else:
                self.line_error(
                    Token("Format: %s" % (node_format)),
                    "unknown 'Format' header value: '%s'" % (node_format))

        # get node body
        body = self.node.get_body()

        # merge node body contents
        merged_body = self.merge_multiline(body)

        # leaving {{node:body}} merge
        self.node_body_semaphore -= 1

        return merged_body

    def merge_token_node(self, token):
        """
        Merge a {{node:*}} token and return the results.

        """

        # url
        if "url" == token.value:
            return self.get_node_url(self.node)
        # name
        elif "name" == token.value:
            return escape(self.node.get_name())
        # title
        elif "title" == token.value:
            return self.node.get_escaped_title()
        # link
        elif "link" == token.value:
            return self.get_node_link(self.node)
        # body
        elif "body" == token.value:
            return self.merge_token_node_body(token)
        else:
            self.line_error(token,
                            "invalid value for node parameter: '%s'" %
                            (token.value))

    def merge_token_node_url(self, token):
        """
        Merge a {{node-url:*}} token and return the results.

        """

        try:
            target_node = self.get_node_by_path(token, token.value)
        except UrielError as e:
            self.line_error(token, "node not found: '%s'" % (token.value))

        return self.get_node_url(target_node)

    def merge_token_node_name(self, token):
        """
        Merge a {{node-name:*}} token and return the results.

        """

        try:
            target_node = self.get_node_by_path(token, token.value)
        except UrielError as e:
            self.line_error(token, "node not found: '%s'" % (token.value))

        return escape(target_node.get_name())

    def merge_token_node_title(self, token):
        """
        Merge a {{node-title:*}} token and return the results.

        """

        try:
            target_node = self.get_node_by_path(token, token.value)
        except UrielError as e:
            self.line_error(token, "node not found: '%s'" % (token.value))

        return target_node.get_escaped_title()

    def merge_token_node_link(self, token):
        """
        Merge a {{node-link:*}} token and return the results.

        """

        try:
            target_node = self.get_node_by_path(token, token.value)
        except UrielError as e:
            self.line_error(token, "node not found: '%s'" % (token.value))

        return self.get_node_link(target_node)

    def merge_token_node_list(self, token):
        """
        Merge a {{node-list:*}} token and return the results.

        """

        if "*" == token.value:
            return self.get_maybe_canonical_html_fragment(
                self.node, "__node-list-html")

        self.line_error(token,
                        "invalid value for node-list parameter: '%s'" %
                        (token.value))

    def merge_token_tag_list(self, token):
        """
        Merge a {{tag-list:*}} token and return the results.

        """

        if "*" == token.value:
            if self.node.has_header("__tag-list-html"):
                return self.get_maybe_canonical_html_fragment(
                    self.node, "__tag-list-html")
            else:
                # __tag-list-html is an internal header, that is set
                # elsewhere on nodes with tags defined. show a useful
                # error here instead of something obtuse.
                self.line_error(token, "'Tags' header not set on this node")

        self.line_error(token,
                        "invalid value for tag-list parameter: '%s'" %
                        (token.value))

    def merge_token_soju(self, token):
        """
        Merge a {{soju:*}} token and return the results.

        """

        # try to run the user-defined soju code, and return the value
        try:
            return self.get_soju_result(token.value)

        # if we get a SojuError, log it, and raise a new SojuError
        # that summarizes where the user should look to start debugging
        except SojuError as e:
            # prepare the error message we want to log
            # if we have a user-provided error message from a SojuError
            # exception, surround it with quotes
            # otherwise, use the name of the exception class, without quotes
            soju_error_string = str(e)
            if "" == soju_error_string:
                soju_error_string = get_exception_reason(e)
            else:
                soju_error_string = "'" + soju_error_string + "'"

            # log the error within the context of how we got here
            self.line_error(token, soju_error_string, False)

            # create summary error message
            summary_error = "error in function call to 'soju." + \
                            token.value + "': " + soju_error_string

            # raise new chained exception with summary error message
            raise SojuError(summary_error) from e

        # for unexpected exceptions originating from user-defined soju code,
        # show the subset of the stack trace that applies to their code,
        # and raise a chained summary exception
        except Exception as e:
            # get a suitable error string to represent this error
            # this will either be the error string from the exception,
            # wrapped in quotes, or the name of the exception type itself
            error_string = str(e)
            if "" == error_string:
                error_string = get_exception_reason(e)
            else:
                error_string = "'" + error_string + "'"

            # log the error within the context of how we got here
            self.line_error(token, error_string, False)

            # extract a subset of the traceback from the exception we
            # caught, starting with the entry point into the user-defined
            # soju code
            soju_traceback_lines = []
            started_user_traceback = False
            for traceback_line in traceback.format_tb(e.__traceback__):
                if "soju.py" in traceback_line:
                    started_user_traceback = True

                if started_user_traceback:
                    soju_traceback_lines.append(traceback_line.rstrip())

            # print out a custom stack trace starting with the soju call,
            # so the user can debug their code with a minimum of
            # unnecessary noise from the machinery of the uriel program
            if len(soju_traceback_lines) > 0:
                log("Traceback (most recent call last):")
                for line in soju_traceback_lines:
                    log(line)

            # create summary error message
            summary_error = "error in function call to 'soju." + \
                            token.value + "': " + error_string

            # raise a new chained exception with summary error message
            raise SojuError(summary_error) from e

    def merge_token(self, token):
        """
        Merge the contents of a single token, and return the results.

        """

        # literal
        if Token.LITERAL == token.type:
            return self.merge_token_literal(token)

        # {{value:*}}
        elif Token.VALUE == token.type:
            return self.merge_token_value(token)

        # {{value-unescaped:*}}
        elif Token.VALUE_UNESCAPED == token.type:
            return self.merge_token_value_unescaped(token)

        # {{include:*}}
        elif Token.INCLUDE == token.type:
            return self.merge_token_include(token)

        # {{created:*}}
        elif Token.CREATED == token.type:
            return self.merge_token_created(token)

        # {{modified:*}}
        elif Token.MODIFIED == token.type:
            return self.merge_token_modified(token)

        # {{breadcrumbs:*}}
        elif Token.BREADCRUMBS == token.type:
            return self.merge_token_breadcrumbs(token)

        # {{static-url:*}}
        elif Token.STATIC_URL == token.type:
            return self.merge_token_static_url(token)

        # {{static-hash-url:*}}
        elif Token.STATIC_HASH_URL == token.type:
            return self.merge_token_static_hash_url(token)

        # {{rss:*}}
        elif Token.RSS == token.type:
            return self.merge_token_rss(token)

        # {{node:*}}
        elif Token.NODE == token.type:
            return self.merge_token_node(token)

        # {{node-url:*}}
        elif Token.NODE_URL == token.type:
            return self.merge_token_node_url(token)

        # {{node-name:*}}
        elif Token.NODE_NAME == token.type:
            return self.merge_token_node_name(token)

        # {{node-title:*}}
        elif Token.NODE_TITLE == token.type:
            return self.merge_token_node_title(token)

        # {{node-link:*}}
        elif Token.NODE_LINK == token.type:
            return self.merge_token_node_link(token)

        # {{node-list:*}}
        elif Token.NODE_LIST == token.type:
            return self.merge_token_node_list(token)

        # {{tag-list:*}}
        elif Token.TAG_LIST == token.type:
            return self.merge_token_tag_list(token)

        # {{soju:*}}
        elif Token.SOJU == token.type:
            return self.merge_token_soju(token)

        # unknown
        else:
            self.line_error(
                token,
                "invalid parameter or value: '%s'" %
                (token.original_string))

    def merge_line(self, line):
        """
        Accepts a single line of text as input.

        Renders any substitution parameters in the line into their
        corresponding values (including the entire contents of other
        included templates, and their substitution parameters, recursively).

        Returns the (possibly multiline) string of rendered text.

        """

        if line is None:
            return line

        # if the line doesn't have at least one set of {{ and }}
        # just return it as a literal
        if (-1 == line.find("{{")) or (-1 == line.find("}}")):
            return line

        # tokenize the line
        tokens = self.tokenize(line)

        #################################
        # PARSE SUBSTITUTION PARAMETERS #
        #################################

        # render the token, and place it in a list of rendered tokens
        rendered = []
        for token in tokens:
            merged_token = self.merge_token(token)
            rendered.append(merged_token)

        # return the rendered tokens as a multi-line string
        return "".join(rendered)

    def merge_lines(self, lines):
        """
        Accepts a list of string.

        Merges the template and node for each line of string.

        Returns a multi-line string with the merged contents.

        """

        merged_lines = []
        for line in lines:
            line = line.rstrip()
            merged_line = self.merge_line(line)
            merged_lines.append(merged_line)

        return "\n".join(merged_lines)

    def merge_multiline(self, multiline):
        """
        Accepts a multi-line string.

        Returns a multi-line string with merged values.

        """

        lines = multiline.split("\n")

        return self.merge_lines(lines)

    def merge_template(self, template):
        """
        Accepts a template as input.

        Merges the template with the Node values, and returns the
        rendered output.

        """

        # get the path to the template file
        template_file = os.path.join(self.project_root,
                                     TEMPLATES_ROOT,
                                     template)

        # if Template is set to "null"
        if "null" == self.template:
            # and we don't actually have a template file named "null"
            if not os.path.exists(template_file):
                # return a minimal template that just includes the node
                # contents, but allows for parameter substitution
                return self.merge_lines(["{{node:body}}"])

        # read all of the lines from the template
        lines = []
        try:
            with open(template_file) as f:
                for line in f.readlines():
                    lines.append(line)

        except FileNotFoundError:
            # the code that validates {{include:*}} parameters from other
            # templates should have already verified that the file exists.
            # if it fails now, we probably didn't ever have a template at all
            token = Token("Template: " + template)
            self.line_error(token,
                            "template not found: '" + template + "'")

        # add the template to the stack
        self.template_stack.push(template,
                                 self.node.get_path(),
                                 self.node_body_semaphore)

        # get the result as a string with the template and node merged together
        result = self.merge_lines(lines)

        # remove the current template from the stack
        self.template_stack.pop()

        # return the result
        return result

    def render(self):
        """
        Render the combination of this Page, using its template and Node.

        """

        # render the page, starting with the template defined in the node
        rendered = self.merge_template(self.template)

        # make sure the last character in the rendered HTML file is a newline
        if "" != rendered:
            if "\n" != rendered[-1]:
                rendered += "\n"

        return rendered

    def __str__(self):
        return str(self.node) + " [" + self.template + "]"


class Node:
    """
    Represents a dynamic page, either virtual or backed by a file.

    This class is intended to be treated as abstract, and not
    instantiated directly.

    """

    def __init__(self, project_root, path, parent_node=None):
        """
        Accepts the project root directory.

        Accepts the path (as a subdirectory of the node root).

        Optionally accepts a parent Node instance.

        """

        self.project_root = project_root
        self.nodes_root = os.path.join(project_root, NODES_ROOT)
        self.path = path
        self.parent_node = parent_node
        self.body = None
        self.rendered_body = None
        self.created = None
        self.modified = None

        # list of child Node instances
        self.children = []
        self.child_nodes_presorted = False

        # header -> value
        self.headers = {}

        # tag -> set of Node matching that tag
        self.tag_node_index = None

        #
        # cache performance optimizations
        #
        # url
        self.url_cache = None
        # title
        self.title_cache = None
        # name
        self.name_cache = None
        # node path cache (root node only)
        # path -> Node
        self.node_path_cache = None
        if parent_node is None:
            self.node_path_cache = {}
        #
        # tag node cache
        self.tag_node_cache = None
        #
        # root node cache
        self.root_node_cache = self
        if parent_node is not None:
            self.root_node_cache = parent_node.root_node_cache
        else:
            self.root_node_cache.node_path_cache["index"] = self
        #
        # sorted list of tags
        self.sorted_tags_cache = None
        #
        # map of tag -> Node representing that tag (e.g. /tag/<SOME_TAG>)
        self.tag_vnode_cache = None
        if parent_node is None:
            self.tag_vnode_cache = {}

        # check for duplicate node path elsewhere in the tree
        if parent_node is not None:
            root_node = parent_node.get_root_node()
            dupe = root_node.find_node_by_path(path, False)
            if dupe is not None:
                err = "can not create node, another node already exists " + \
                      "with this path: '%s'"
                raise Exception(err % (path))

        # import headers from parent node
        if self.parent_node:
            # copy parent headers into this node
            for (key, value) in self.parent_node.get_header_key_values():
                self.set_header(key, value)

            # N.B. the +/- keys are sorted and reversed, so that it is possible
            #      to push inheritance out multiple levels. this works because
            #      when sorted and reversed, ++foo sorts after +foo, and so on,
            #      which prevents ++foo from overwriting +foo before it gets
            #      a chance to be set.

            # +
            for key in reversed(self.get_header_keys()):
                if key.startswith("+"):
                    plus_key = key
                    stripped_key = key[1:]
                    value = self.get_header(plus_key)
                    self.set_header(stripped_key, value)
                    self.delete_header(plus_key)

            # -
            for key in reversed(self.get_header_keys()):
                if key.startswith("-"):
                    minus_key = key
                    stripped_key = key[1:]
                    value = self.get_header(minus_key)
                    if "*" != value:
                        err = "key '%s' can not have value '%s' " + \
                              "in node '%s' (value must be '*')"
                        raise UrielError(err % (key, value, self.get_path()))
                    if self.has_header(stripped_key):
                        self.delete_header(stripped_key)
                    self.delete_header(minus_key)

        # disable inheritance on fields that should be unique

        # Title
        if self.has_header("title"):
            self.delete_header("title")

        # Created
        elif self.has_header("created"):
            self.delete_header("created")

        # Modified
        elif self.has_header("modified"):
            self.delete_header("modified")

    def get_parent_node(self):
        """
        Get the parent Node, or None if this is the root.

        """

        return self.parent_node

    def get_path(self):
        """
        Get the node path.

        """

        return self.path

    def get_node_type(self):
        """
        Get the Node type as a string (e.g. "file", "virtual").

        Subclasses need to implement this, and return a string describing
        the type of node they represent.

        """

        raise Exception("not implemented in abstract parent class")

    def add_child(self, node):
        """
        Add the given Node as a child of this Node.

        """

        # add the child node
        self.children.append(node)

        # invalidate the pre-sorted flag
        self.child_nodes_presorted = False

        # update the node path cache on the root node
        self.root_node_cache.node_path_cache[node.get_path()] = node

    def get_children(self):
        """
        Get a list of child Node entries immediately under this Node.

        """

        # if the child nodes have not already been pre-sorted,
        # sort them now, in place, and remember that we did
        if not self.child_nodes_presorted:
            self.children = sorted(self.children)
            self.child_nodes_presorted = True

        # return the (pre-sorted) child nodes
        return self.children

    def get_url(self):
        """
        Get the URL path for this Node.

        URL paths are relative to the root of the site.

        Example URL paths:
            /
            /foo/
            /foo/bar/

        This method is the authoritative source of where a node will be
        placed in the rendered files.

        """

        # cache hit
        if self.url_cache is not None:
            return self.url_cache

        # set node_dir to the directory we want to use in our URL
        # if it's an index node, strip off the "index" part
        if self.get_path().endswith(NODE_INDEX):
            end_index = (len(NODE_INDEX) * -1) -1
            node_dir = self.get_path()[:end_index]
        else:
            node_dir = self.get_path()

        # if this is the root node, return a /
        if "" == node_dir:
            self.url_cache = "/"
            return self.url_cache

        # preserve node hierarchy by default
        target_dir = node_dir

        # flat URL (e.g. just the leaf node name, or directory if index)
        flat_url = node_dir.split("/")[-1:][0]

        # optionally flatten the URL to only the most specific node directory
        if self.get_boolean_header_value("flat-url", False):
            target_dir = flat_url

        # if this node doesn't have a flat URL, one of its parents might
        elif self.get_parent_node() is not None:
            # get the parent URL (without the slash)
            parent_url = self.get_parent_node().get_url()[1:]

            # set our target dir to the parent URL plus the flat URL
            target_dir = parent_url + flat_url

        # return the target dir, with a leading and trailing slash
        self.url_cache = "/" + target_dir + "/"
        return self.url_cache

    def get_canonical_url(self):
        """
        Get the canonical URL for this Node.

        """

        if not self.has_header("canonical-url"):
            raise UrielError(
                "Canonical-URL not set, but required by node '%s'" % \
                (self.get_path()))

        return self.get_header("canonical-url") + self.get_url()

    def get_name(self):
        """
        Get the name of this Node.

        This is generally the most specific dirent from the URL.

        As a special case, the name of the top-level index node is "index".

        """

        if self.name_cache is not None:
            return self.name_cache

        url = self.get_url()

        leaf = "index"

        url_parts = url.split("/")
        for part in url_parts:
            if "" != part:
                leaf = part

        self.name_cache = leaf

        return self.name_cache

    def get_display_name(self):
        """
        Get the display name of the node.

        This is constructed from the base node name, but is formatted to be
        more human readable.

        It is used as a fallback plan in case a Title header is not set.

        """

        # get the node name
        name = self.get_name()

        # we're going to split the node name into a bunch of parts,
        # and recombine them separated by spaces. we split on several
        # different characters, and subdivide the parts further and further.
        # to start, put the entire node name in the parts list as one element.
        parts = [name]

        # go through each single-character pattern we're splitting on
        for pattern in "-_ ":
            # temporarily save the parts that come out of the split
            # for this one character
            future_parts = []

            # go through all of the parts
            for part in parts:
                # further split this part on the current pattern
                tmp_parts = part.split(pattern)

                # for each part that came out of this split,
                # add them to what will become the new parts list
                for tmp_part in tmp_parts:
                    # by default, we pass the part straight through
                    future_part = tmp_part

                    # if this part doesn't have any capital letters,
                    # give it initial caps
                    if not re.match(r"[A-Z]", future_part):
                        future_part = future_part.capitalize()

                    # add it to the parts list
                    future_parts.append(future_part)

            # replace the higher-level parts list with the local one,
            # which is now further subdivided
            parts = future_parts

        # return all of the subdivided parts, separated by spaces
        return " ".join(parts)

    def get_title(self):
        """
        Get the title of the node, as set by the Title header.

        If that doesn't work, fall back on using the node display name.

        """

        if self.title_cache is not None:
            return self.title_cache

        # use the Title header, if we have it
        if self.has_header("title"):
            self.title_cache = self.get_header("title")
            return self.title_cache

        # otherwise fall back on the node name
        self.title_cache = self.get_display_name()
        return self.title_cache

    def get_escaped_title(self):
        """
        Get the escaped title of the node, as set by the Title header.

        If the Escape-Title: false header is set, return the Title
        without escaping.

        If that doesn't work, fall back on using the node display name.

        """

        if self.get_boolean_header_value("escape-title", True):
            return escape(self.get_title())

        return self.get_title()

    def get_link(self):
        """
        Get a link to this node, using its title as the link text.

        """

        return "<a href=\"" + self.get_url() + "\">" + \
               self.get_escaped_title() + "</a>"

    def get_canonical_link(self):
        """
        Get a canonical link to this node, using its title as the link text.

        """

        return "<a href=\"" + self.get_canonical_url() + "\">" + \
               self.get_escaped_title() + "</a>"

    def get_link_prefix(self):
        """
        Get the HTML prefix to use in generated lists of links.

        """

        if self.has_header("link-prefix"):
            return self.get_header("link-prefix")

        return "<p>"

    def get_link_suffix(self):
        """
        Get the HTML suffix to use in generated lists of links.

        """

        if self.has_header("link-suffix"):
            return self.get_header("link-suffix")

        return "</p>"

    def get_tags(self):
        """
        Get the set of tags associated with this Node.

        Returns a set of tags.

        """

        # return from cache if possible
        if self.sorted_tags_cache is not None:
            return self.sorted_tags_cache

        tag_set = set()

        if self.has_header("tags"):
            tag_list = self.get_header("tags").split(",")
            for tag in tag_list:
                if "" != tag:
                    tag = tag.strip()

                    # validate tag
                    if not re.match(r"^[a-z0-9\-]*$", tag):
                        raise UrielError(
                            "invalid tag '%s' in node '%s'" % \
                            (tag, self.get_path()))

                    tag_set.add(tag)

        self.sorted_tags_cache = sorted(tag_set)

        return self.sorted_tags_cache

    def get_dest_dir(self):
        """
        Get the destination directory in the public website that will
        contain this page.

        """

        # get the URL path
        url = self.get_url()

        # root index node
        if "/" == url:
            return os.path.join(self.project_root, PUBLIC_ROOT)

        # strip leading and trailing slashes from URL
        target_url = url[1:-1]

        return os.path.join(self.project_root, PUBLIC_ROOT, target_url)

    def get_dest_file(self):
        """
        Get the destination file in the public website that will
        contain this page.

        """

        return os.path.join(self.get_dest_dir(), HTML_INDEX)

    def get_boolean_header_value(self, header, default):
        """
        Get the boolean value for a header as True or False.

        If the header is not set, return the provided default value.

        Raises an UrielError if any other value is found.

        """

        # canonicalize the header name to lowercase
        lc_header = header.lower()

        if self.has_header(lc_header):
            value = self.get_header(lc_header)
            if "true" == value:
                return True
            elif "false" == value:
                return False
            else:
                err = "invalid value for '%s' " + \
                      "header in node '%s': '%s'"
                raise UrielError(err % (header, self.get_path(), value))

        return default

    def get_breadcrumb_separator(self):
        """
        Get the breadcrumb separator for this node.

        """

        separator = "&raquo;"
        maybe_space = " "

        # look at headers to see if we should change the separator
        if self.has_header("breadcrumb-separator"):
            separator = self.get_header("breadcrumb-separator")

        # should we surround the separator with spaces?
        if not self.get_boolean_header_value("breadcrumb-separator-spaces",
                                             True):
            maybe_space = ""

        return maybe_space + separator + maybe_space

    def find_node_by_path(self, path, raise_exceptions=True):
        """
        Find the Node that matches the given path.

        Returns the given node.

        Raises a UrielError if the node can not be found.

        If the optional raise_exceptions parameter is set to False,
        then instead of raising an exception, it will return None instead.

        """

        # if this node matches, return it
        if self.get_path() == path:
            return self

        # return the path from the cache
        # this should always be accurate, since the cache is built
        # as child nodes are added to the tree
        if self.root_node_cache:
            if path in self.root_node_cache.node_path_cache:
                return self.root_node_cache.node_path_cache[path]

        if raise_exceptions:
            raise UrielError("node not found: '%s'" % (path))

        return None

    def get_root_node(self):
        """
        Get the root Node, walking up the tree from whichever Node this is.

        """

        return self.root_node_cache

    def get_tag_node(self):
        """
        Get the tag node, by looking at the value of the Tag-Node header,
        and then returning the matching node.

        There is only one tag node, and the Tag-Node header should only be
        set on the root node.

        Returns the tag node, or None if the tag node is not configured.

        Raises a UrielError if the tag node is configured, but can not be
        found.

        """

        # return the tag node from the cache, if possible
        if self.tag_node_cache is not None:
            return self.tag_node_cache

        # find and return the tag node
        if self.has_header("tag-node"):
            # get the root node
            root = self.get_root_node()

            # find the tag node and cache it
            self.tag_node_cache = \
                root.find_node_by_path(self.get_header("tag-node"))

            # return the tag node
            return self.tag_node_cache

        return None

    def set_tag_node_index(self, tag_node_index):
        """
        Set (or replace) the tag node index on the root Node.

        """

        # find the root node
        root = self.get_root_node()

        # overwrite the tag node index on the root node
        root.tag_node_index = tag_node_index

    def get_tag_node_index(self):
        """
        Get the tag node index.

        The tag node index is a dict of tags, with sets of Node as values.

        Returns the tag node index, or raises a UrielError if the tag node
        index has not already been created (via create_tag_node_index()).

        """

        # whatever node this is, pull the tag index out of the root node
        tag_node_index = self.get_root_node().tag_node_index

        if tag_node_index is None:
            raise UrielError("tag node index not set")

        return tag_node_index

    def create_tag_node_index(self, node=None, tag_node_index=None):
        """
        Create tag / node index.

        The tag node index is a dict of tag -> set of node

        Optionally accepts a node, and a tag node index.

        Returns a tag to node index.

        """

        is_top_level_method_call = False

        # if we didn't have a Node passed in, start at the root
        if node is None:
            node = self.get_root_node()
            is_top_level_method_call = True

        # if we don't have a tag index, create it
        if tag_node_index is None:
            # tag -> set of Node matching that tag
            tag_node_index = {}

        # go through the tags for this node
        for tag in node.get_tags():
            # if this is a new tag we haven't seen before,
            # create a set for it and add it to the index
            if tag not in tag_node_index:
                tag_node_index[tag] = set()

            # add the node to the index for this tag
            tag_node_index[tag].add(node)

        # recurse
        for child in node.get_children():
            self.create_tag_node_index(child, tag_node_index)

        # if this is the top-level method call,
        # set the tag node index on the root node
        if is_top_level_method_call:
            self.set_tag_node_index(tag_node_index)

        return tag_node_index

    def get_vnode_for_tag(self, tag):
        """
        Get the virtual node that represents the given tag
        (e.g. the node for /tag/$SOME_TAG/)

        """

        # performance optimization:
        # check the global tag vnode cache first,
        # and just return the node from there if possible
        if tag in self.root_node_cache.tag_vnode_cache:
            return self.root_node_cache.tag_vnode_cache[tag]

        # search for the vnode by tag
        tag_root = self.get_tag_node()
        if tag_root is not None:
            for vnode in tag_root.get_children():
                if vnode.get_name() == tag:
                    # performance optimization:
                    # add the vnode to the tag vnode cache
                    self.root_node_cache.tag_vnode_cache[tag] = vnode

                    return vnode

        return None

    def get_body(self):
        """
        Get the unrendered Node body as a multi-line string.

        """

        return self.body

    def get_rendered_body(self):
        """
        Get the rendered Node body as a multi-line string.

        """

        return self.rendered_body

    def set_body(self, body):
        """
        Set the unrendered Node body. Accepts a multi-line string.

        """

        self.body = body

    def set_rendered_body(self, rendered_body):
        """
        Replace the contents of the Node body (e.g. with a rendered version).

        """

        self.rendered_body = rendered_body

    def has_header(self, header):
        """
        Does the given header exist on this node?

        Inside the program, headers should all be lowercase.

        """

        if header in self.headers:
            return True

        return False

    def get_header(self, header):
        """
        Get the value associated with the given header.

        Inside the program, headers should all be lowercase.

        """

        return self.headers[header]

    def invalidate_cache_by_header(self, header):
        """
        Invalidate the cache for certain performance optimizations.

        """

        if header:
            # headers affecting title_cache field
            if "title" == header:
                self.title_cache = None
            if "escape-title" == header:
                self.title_cache = None

            # headers affecting url_cache field
            if "flat-url" == header:
                self.url_cache = None

    def set_header(self, header, value):
        """
        Set the given header to the specified value.

        Inside the program, headers should all be lowercase.

        """

        self.invalidate_cache_by_header(header)

        self.headers[header] = value

    def delete_header(self, header):
        """
        Delete the given header.

        Inside the program, headers should all be lowercase.

        """

        self.invalidate_cache_by_header(header)

        del(self.headers[header])

    def get_header_keys(self):
        """
        Get each of the header keys.

        """

        return sorted(self.headers.keys())

    def get_header_key_values(self):
        """
        Get each key/value element of the headers as a series of two-element
        tuples.

        """

        return sorted(self.headers.items())

    def get_datetime_from_date_str(self, date_str):
        """
        Get a datetime.datetime instance from the provided date string.

        The date string must be in the ISO 8601 date format.

        """

        try:
            # turn the date string into a datetime instance
            dt = datetime.datetime.fromisoformat(date_str)

            # if the datetime instance doesn't have a time zone,
            # create a new datetime with the date/time we read from
            # the date string, augmented with the local time zone
            if dt.tzinfo is None:
                tmp_dt = datetime.datetime.fromtimestamp(
                    dt.timestamp(),
                    datetime.datetime.now(datetime.UTC).astimezone().tzinfo)

                dt = tmp_dt

            # return the datetime with time zone
            return dt

        except Exception as e:
            err = "invalid date header value in node " + \
                  "'%s': '%s'"
            raise UrielError(err % (self.get_path(), date_str))

    def __lt__(self, other):
        """
        Sort Node instances by:
            date descending (created if available, otherwise modified)
            title ascending
            URL ascending

        """

        # self and other candidate dates
        # created isn't always set, but modified is always available

        self_date = self.modified
        if self.created:
            self_date = self.created

        other_date = other.modified
        if other.created:
            other_date = other.created

        # sort by date
        if self_date != other_date:
            # try to use time zones
            if self_date.tzinfo and other_date.tzinfo:
                return self_date > other_date

            # fall back to comparing dates without time zones
            else:
                self_date_epoch_sec = int(self_date.strftime("%s"))
                other_date_epoch_sec = int(other_date.strftime("%s"))
                return self_date_epoch_sec > other_date_epoch_sec

        # then by title
        if self.get_title() != other.get_title():
            return self.get_title() < other.get_title()

        # and finally by URL (which is unique)
        return self.get_url() < other.get_url()

    def __str__(self):
        return self.get_path()


class FileNode(Node):
    """
    Represents a dynamic page backed by a node file.

    Contains headers and a body (format similar to HTTP, email, etc.)

    Headers are inherited from the parent node.

    Nodes have a parent/child tree relationship.

    """

    def __init__(self, project_root, path, parent_node=None):
        """
        Accepts the project root directory.

        Accepts the path to the node file (as a sub path of NODES_ROOT).

        Optionally accepts a parent Node instance.

        """

        # call the superconstructor
        super().__init__(project_root, path, parent_node=parent_node)

        # set the default date based on the node modified time
        node_path = os.path.join(self.nodes_root, self.get_path())
        node_modified = os.path.getmtime(node_path)
        self.modified = \
            datetime.datetime.fromtimestamp(node_modified,
                datetime.datetime.now(datetime.UTC).astimezone().tzinfo)

        # read dynamic node file, and parse out headers and body
        body_lines = []
        with open(os.path.join(self.nodes_root, path)) as f:
            parsing_headers = True

            # read each line in the node file
            for line in f.readlines():
                line = line.rstrip()

                # if we're still parsing headers...
                if parsing_headers:
                    # if we find a line without a ":", stop parsing headers
                    # this lets us have a body-only template
                    if -1 == line.find(":"):
                        parsing_headers = False

                    # if we hit a blank line, it could either be the blank
                    # line separating the headers from the body, or it could
                    # be a node without any headers at all
                    if "" == line:
                        # either way, we're not parsing headers anymore
                        parsing_headers = False

                        # if we already had any headers before this,
                        # skip the blank separating line
                        # (if not, we'll continue on and treat it as the
                        # first line of the page body)
                        if len(self.headers) > 0:
                            continue

                # if we're parsing headers...
                if parsing_headers:
                    # parse headers
                    (key, value) = line.split(":", maxsplit=1)

                    # canonicalize header key to lowercase
                    key = key.strip().lower()

                    # strip leading/trailing whitespace from value
                    value = value.strip()

                    # set the header
                    self.set_header(key, value)

                # if we're parsing the body...
                else:
                    body_lines.append(line)

        # process "-" headers
        for key in self.get_header_keys():
            # if a header starts with a "-", and has a value of "*",
            # it's an instruction to remove that header from this Node
            # (e.g. -Foo: *)
            if key.startswith("-"):
                if "*" == value:
                    # strip leading "-"
                    remove_key = key[1:]

                    # remove header
                    if self.has_header(remove_key):
                        self.delete_header(remove_key)
                else:
                    raise UrielError(
                        "invalid header in node '%s': '%s': '%s'" % \
                        (self.get_path(), key, value))

        # process other special headers
        for key in self.get_header_keys():
            value = self.get_header(key)

            # Created
            if "created" == key:
                self.created = self.get_datetime_from_date_str(value)

            # Modified
            elif "modified" == key:
                self.modified = self.get_datetime_from_date_str(value)

        # remember the body as a string
        self.body = "\n".join(body_lines)

    def get_node_type(self):
        return "file"


class VirtualNode(Node):
    """
    Represents a virtual node, not backed by a file.

    To use this class:
      - call set_body() to set the node body (which will be merged with
        template later)
      - set headers (although it inherits headers from the parent node)
      - add children (if necessary)

    """

    def __init__(self, project_root, path, parent_node=None):
        """
        Accepts the project root directory.

        Accepts the path (as a subdirectory of the node root).

        Optionally accepts a parent Node instance.

        """

        # call the superconstructor
        super().__init__(project_root, path, parent_node=parent_node)

        # set the date, either to the parent node value, or to now
        if self.get_parent_node():
            self.created = self.get_parent_node().created
            self.modified = self.get_parent_node().modified
        else:
            # use the current date/time/timezone
            now = datetime.datetime.fromtimestamp(
                    datetime.datetime.now().timestamp(),
                    datetime.datetime.now(datetime.UTC).astimezone().tzinfo)

            self.created = now
            self.modified = now

    def get_node_type(self):
        return "virtual"


class FileWriter:
    """
    Writes an individual file to disk.

    """

    def __init__(self, path, mode="w"):
        """
        Accepts the path to the file, and an optional write mode.

        """

        self.path = path
        self.mode = mode
        self.f = open(path, mode)

    def write(self, content):
        """
        Writes the content to the file.

        """

        self.f.write(content)

    def close(self):
        """
        Closes the file.

        """

        self.f.flush()
        self.f.close()


def sys_exit(exit_code):
    """
    Exit the program, with the given exit code.

    """

    sys.exit(exit_code)

def log(s):
    """
    Log the program name and the given string to stderr.

    """

    sys.stderr.write(str(s) + "\n")
    sys.stderr.flush()

def warn(s):
    """
    Log the given string as a warning to stderr.

    """

    log(PROGRAM_NAME + ": " + str(s))

def die(s):
    """
    Log the given string as an error to stderr, and exit.

    """

    warn(s)
    sys_exit(EXIT_FAIL)

def show_usage():
    """
    Show command-line usage information for the program, and exit.

    """

    log(PROGRAM_NAME + ": Yet Another Static Site Generator")
    log("Usage: " + PROGRAM_NAME + " <project-root>")
    sys_exit(EXIT_FAIL)

def printable_path(path):
    """
    Makes a filesystem path look a bit nicer in log messages.

    Accepts a file path

    Returns a nicer looking but equivalent file path

    """

    # special case, if we were given a /, just return it
    # (never mind that we should never be dealing with the root filesystem)
    if "/" == path:
        return path

    tmp = path

    # strip leading ./
    # .///test///static/// -> test///static///
    tmp = re.sub(r'^\.\/*', r'', tmp)

    # collapse multiple / in a row
    # test///static/// -> test/static/
    tmp = re.sub(r'\/{2,}', r'/', tmp)

    # strip trailing /
    # test/static/ -> test/static
    tmp = re.sub(r'\/*$', r'', tmp)

    return tmp

def indent_spaces(indent):
    """
    Get a string with blank spaces, according to the specified indenting.

    Accepts a number of indentations.

    Returns a string with two spaces for each indentation.

    """

    return "  " * indent

def get_exception_reason(e):
    """
    Get the reason associated with an exception, or if that isn't present,
    default to the type of the exception.

    Accepts an Exception (or a subclass of Exception)

    Returns a non-blank reason string derived from the exception

    """

    reason = str(e)
    if "" == reason:
        reason = e.__class__.__name__

    return reason

def copy_file(src, dest):
    """
    Copy the source file to the destination file.

    Raises a UrielError if an error occurs.

    """

    try:
        # if either the source or dest file is a symlink,
        # delete the destination file first
        if os.path.islink(src) or os.path.islink(dest):
            os.unlink(dest)

        # if the destination path is a directory, we don't want
        # to copy the source file into a directory with the same name
        if os.path.isdir(dest):
            raise UrielError(
                "error copying '%s' to '%s': destination path is a directory" %
                (src, dest))

        # copy the file (or create a symlink)
        shutil.copy2(src, dest, follow_symlinks=False)

    except UrielError as e:
        raise

    except FileNotFoundError as e:
        raise UrielError("error copying '%s' to '%s': file not found" %
                         (src, dest))

    except PermissionError as e:
        raise UrielError("error copying '%s' to '%s': permission denied" %
                         (src, dest))

    except IsADirectoryError as e:
        # was the source path a directory?
        if os.path.isdir(src):
            raise UrielError(
                "error copying '%s' to '%s': source path is a directory" %
                (src, dest))

        # was the destination path a directory?
        if os.path.isdir(dest):
            raise UrielError(
                "error copying '%s' to '%s': destination path is a directory" %
                (src, dest))

        # in case the checks above both fail
        raise UrielError(
            "error copying '%s' to '%s': '%s'" %
            (src, dest, get_exception_reason(e)))

    except Exception as e:
        raise UrielError(
            "error copying '%s' to '%s': '%s'" %
            (src, dest, get_exception_reason(e)))

def copy_file_if_different(src, dest):
    # if the source file doesn't exist, or is a directory, raise an error
    if (not os.path.isfile(src)) and (not os.path.islink(src)):
        if not os.path.exists(src):
            raise UrielError("source file does not exist: '%s'" % (src))
        else:
            raise UrielError("source file can not be a directory: '%s'" % (src))

    # if the destination file doesn't exist, just copy src to dest
    if not os.path.exists(dest):
        copy_file(src, dest)

    # we have both source and destination files
    # decide whether we really want to expend the energy copying the file
    else:
        src_stat = os.stat(src)
        dest_stat = os.stat(dest)

        # try to skip copying the file, if we can get away with it
        skip_copy = True

        # compare symlink status
        if skip_copy:
            if os.path.islink(src) != os.path.islink(dest):
                skip_copy = False

        # compare file sizes
        if skip_copy:
            if src_stat.st_size != dest_stat.st_size:
                skip_copy = False

        # compare file permissions
        if skip_copy:
            if src_stat.st_mode != dest_stat.st_mode:
                skip_copy = False

        # compare file modification times
        if skip_copy:
            if src_stat.st_mtime != dest_stat.st_mtime:
                skip_copy = False

        # if anything is different, copy the file
        if not skip_copy:
            copy_file(src, dest)

def copy_files_recursive(src, dest):
    """
    Recursively copy the source directory into the destination directory.

    """

    try:
        # if the source path is a directory
        if os.path.isdir(src):
            # if the destination path exists
            if os.path.exists(dest) or os.path.islink(dest):
                # if the destination path is a symlink or a file,
                # remove it and create the destination directory
                if os.path.islink(dest) or not os.path.isdir(dest):
                    os.unlink(dest)
                    os.mkdir(dest)

            # the destination path does not exist, create it
            else:
                os.mkdir(dest)

            # get the permissions and timestamps from src,
            # and apply them to dest
            st = os.stat(src)
            os.chmod(dest, st.st_mode)
            os.utime(dest, times=(st.st_atime, st.st_mtime))

            # go through the files and directories in the source
            for dirent in os.listdir(src):
                # calculate subdir paths
                src_path = os.path.join(src, dirent)
                dest_path = os.path.join(dest, dirent)

                # if this is a directory, recurse
                if os.path.isdir(src_path):
                    copy_files_recursive(src_path, dest_path)

                # if this is a file, copy it from src to dest
                else:
                    copy_file_if_different(src_path, dest_path)

        # if we got called with files as arguments,
        # just copy the source file to the dest file
        else:
            copy_file_if_different(src, dest)

    except Exception as e:
        raise UrielError("error copying '%s' to '%s': '%s'" %
                         (src, dest, get_exception_reason(e)))

def copy_files_recursive_overwrite(src, dest):
    """
    Recursively copy the source directory into the destination directory,
    deleting anything in the destination directory that is not present
    in the source directory.

    """

    try:
        # if the source path is a directory
        if os.path.isdir(src):
            # if the destination path exists
            if os.path.exists(dest) or os.path.islink(dest):
                # if the destination path is a symlink or a file,
                # remove it and create the destination directory
                if os.path.islink(dest) or not os.path.isdir(dest):
                    os.unlink(dest)
                    os.mkdir(dest)

            # the destination path does not exist, create it
            else:
                os.mkdir(dest)

            # get the permissions and timestamps from src,
            # and apply them to dest
            st = os.stat(src)
            os.chmod(dest, st.st_mode)
            os.utime(dest, times=(st.st_atime, st.st_mtime))

            # get the list of entries in the source directory
            # these will be copied over from src to dest
            src_copy_list = os.listdir(src)

            # get the list of entries in the destination directory
            # these will be deleted from dest, unless they are
            # removed from this list due to also appearing in src
            dest_delete_list = os.listdir(dest)

            # remove entries from the delete list if they are found in src
            for src_file in src_copy_list:
                if src_file in dest_delete_list:
                    dest_delete_list.remove(src_file)

            # copy everything on the copy list
            for dirent in src_copy_list:
                # calculate subdir paths
                src_path = os.path.join(src, dirent)
                dest_path = os.path.join(dest, dirent)

                # if this is a directory, recurse
                if os.path.isdir(src_path):
                    copy_files_recursive_overwrite(src_path, dest_path)

                # if this is a file, copy it from src to dest
                else:
                    copy_file_if_different(src_path, dest_path)

            # delete everything on the delete list
            for dirent in dest_delete_list:
                dest_delete_path = os.path.join(dest, dirent)

                # delete path is a directory
                if os.path.isdir(dest_delete_path):
                    # if it's actually a symlink in disguise, unlink it
                    if os.path.islink(dest_delete_path):
                        os.unlink(dest_delete_path)

                    # otherwise, recursively delete the dest delete directory
                    else:
                        shutil.rmtree(dest_delete_path)

                # delete path is a file
                else:
                    os.unlink(dest_delete_path)

        # if we got called with files as arguments,
        # just copy the source file to the dest file
        else:
            copy_file_if_different(src, dest)

    except Exception as e:
        raise UrielError("error copying '%s' to '%s': '%s'" %
                         (src, dest, get_exception_reason(e)))

def delete_directory_recursive(path):
    """
    Recursively delete the given directory.

    """

    try:
        shutil.rmtree(path)
    except Exception as e:
        raise UrielError(
            "could not delete directory '%s': '%s'" %
            (path, get_exception_reason(e)))

def escape(text):
    """
    Escape HTML.

    Accepts unescaped text.

    Returns an HTML escaped string.

    """

    if text is None:
        return ""

    chars = []
    for c in text:
        chars.append(HTML_ESCAPE_MAP.get(c, c))

    return "".join(chars)

def escape_xml(text):
    """
    Escape XML.

    The main purpose of this function is to make sure that no <![CDATA[ ]]>
    tags slip through and break the XML formatting in things like RSS feeds.

    """

    unescaped_lines = text.split("\n")
    escaped_lines = []

    for unescaped_line in unescaped_lines:
        escaped_line = unescaped_line.replace("<![CDATA[", "&lt;![CDATA[")
        escaped_line = escaped_line.replace("]]>", "]]&gt;")
        escaped_lines.append(escaped_line)

    return "\n".join(escaped_lines)

def create_file_node_tree(project_root, parent_path=None, parent_node=None):
    """
    Create Node entries for all of the dynamic pages.

    Accepts project_root

    Optionally accepts the following arguments:
        parent_path - subdirectory of nodes_root containing this node
        parent_node - Node instance to use as the parent of discovered nodes

    Returns the top Node in the tree (the top-level dynamic index page)

    """

    # there are a lot of dir/path variables in here.
    # here is what they all mean:
    #
    # project_root  top-level project root directory
    # parent_path   parent directory path above this part of the tree
    #               (without the NODES_ROOT container directory)
    # dir_path      project_root + NODES_ROOT + parent_path
    # dirent        directory entry in this directory (e.g. "foo")
    # dirent_path   project_root + NODES_ROOT + parent_path + dirent
    # node_path     parent_path + dirent

    # create the index node for this directory first
    # if this is not the root node, create the index for this subdirectory
    if parent_path:
        dir_path = os.path.join(project_root, NODES_ROOT, parent_path)
        index = FileNode(project_root,
                         os.path.join(parent_path, NODE_INDEX),
                         parent_node)

    # special case: create the root node
    else:
        dir_path = os.path.join(project_root, NODES_ROOT)
        index = FileNode(project_root, NODE_INDEX)
        is_root_node = True

    # create all the rest of the nodes
    for dirent in sorted(os.listdir(dir_path)):
        # skip hidden files
        if dirent.startswith("."):
            continue

        # skip emacs backup files
        if dirent.endswith("~"):
            continue

        # we already got the index file, don't get it again
        if NODE_INDEX == dirent:
            continue

        # get full path to child node
        dirent_path = os.path.join(dir_path, dirent)

        # create relative path from below NODES_ROOT
        if parent_path:
            node_path = os.path.join(parent_path, dirent)
        else:
            node_path = dirent

        # file: create node
        if os.path.isfile(dirent_path):
            node = FileNode(project_root, node_path, index)
            index.add_child(node)

        # directory: recurse
        elif os.path.isdir(dirent_path):
            node = create_file_node_tree(project_root,
                                         node_path,
                                         index)
            index.add_child(node)

    return index

def create_tag_node_tree(project_root, root, use_canonical_url):
    """
    Create the tag node tree.

    Accepts project_root directory, the root node, and a boolean indicating
    whether we should pre-compute canonical URLs.

    Looks for the Tag-Node header in the root node, to determine where to
    place the tag root.

    Builds an index of tags and nodes.

    Creates a virtual node hierarchy under the tag root.

    """

    # get tag node
    tag_node = root.get_tag_node()
    if tag_node is None:
        return

    # build tag node index
    root.create_tag_node_index()
    tag_node_index = root.get_tag_node_index()

    # get link prefix/suffix
    link_prefix = tag_node.get_link_prefix()
    link_suffix = tag_node.get_link_suffix()

    ############
    # TAG ROOT #
    ############

    # __Tag-List-HTML / __Tag-List-HTML-Canonical
    lines = []
    canonical_lines = []
    for tag in sorted(tag_node_index.keys()):
        escaped_tag = escape(tag)

        line = link_prefix + \
            "<a href=\"" + \
            tag_node.get_url() + escaped_tag + "/" + "\">" + \
            escaped_tag + "</a>" + \
            link_suffix
        lines.append(line)

        if use_canonical_url:
            c_line = link_prefix + \
                "<a href=\"" + \
                tag_node.get_canonical_url() + escaped_tag + "/" + "\">" + \
                escaped_tag + "</a>" + \
                link_suffix
            canonical_lines.append(c_line)

    tag_node.set_header("__tag-list-html",
                        "\n".join(lines))

    if use_canonical_url:
        tag_node.set_header("__tag-list-html-canonical",
                            "\n".join(canonical_lines))

    # RSS-Include: false
    # (omit tag root and child vnodes from RSS feed)
    tag_node.set_header("rss-include", "false")

    ################
    # CHILD VNODES #
    ################

    # set the base path for the vnodes (without trailing slash)
    vnode_base_path = tag_node.get_path()
    if vnode_base_path.endswith(NODE_INDEX):
        end_index = (len(NODE_INDEX) * -1) - 1
        vnode_base_path = vnode_base_path[:end_index]

    # create child vnodes for each tag
    for tag in sorted(tag_node_index.keys()):
        # create the path for the child vnode
        vnode_path = vnode_base_path + "/" + tag

        # create the vnode
        vnode = VirtualNode(project_root, vnode_path, tag_node)

        # copy the tag node body into the child vnode
        vnode.set_body(tag_node.get_body())

        # Title
        vnode.set_header("title", vnode.get_name())

        # Flat-URL
        vnode.set_header("flat-url", "false")

        # __Tag-List-HTML
        lines = []
        canonical_lines = []
        for node in sorted(tag_node_index[tag]):
            line = link_prefix + node.get_link() + link_suffix
            lines.append(line)

            if use_canonical_url:
                c_line = link_prefix + node.get_canonical_link() + link_suffix
                canonical_lines.append(c_line)

        vnode.set_header("__tag-list-html",
                         "\n".join(lines))

        if use_canonical_url:
            vnode.set_header("__tag-list-html-canonical",
                             "\n".join(canonical_lines))

        tag_node.add_child(vnode)

def create_tag_links(node, use_canonical_url):
    """
    Create HTML to link to all tags associated with this node, as defined
    in the Tags header for the given node.

    Accepts a Node, and a boolean indicating whether we should
    pre-compute canonical URLs.

    """

    links = []

    # go through all of the tags for this node
    for tag in sorted(node.get_tags()):
        # separate tags with commas
        if len(links) > 0:
            links.append(", ")

        # get the vnode for this tag
        tag_vnode = node.get_vnode_for_tag(tag)

        if use_canonical_url:
            url = tag_vnode.get_canonical_url()
        else:
            url = tag_vnode.get_url()

        # construct a link to the vnode
        link = "<a href=\"" + url + "\">" + escape(tag) + "</a>"
        links.append(link)

    return "".join(links)

def create_tag_links_recursive(node, use_canonical_url):
    """
    Walk through the node tree, from the root, and create tag links for
    each node that does not already have something for the {{tag-list:*}}
    parameter.

    Accepts a Node, and a boolean indicating whether we should pre-compute
    canonical URL links.

    """

    # leaf node
    if not node.has_header("__tag-list-html"):
        node.set_header("__tag-list-html", create_tag_links(node, False))

    if use_canonical_url:
        if not node.has_header("__tag-list-html-canonical"):
            node.set_header("__tag-list-html-canonical",
                            create_tag_links(node, True))

    # recurse
    for child in node.get_children():
        create_tag_links_recursive(child, use_canonical_url)

def create_child_node_list_html(node, use_canonical_url):
    """
    Create an HTML fragment of links to all of the child nodes of the given
    Node, and makes it available for use with the {{node-list:*}} parameter.

    Accepts a Node, and a boolean indicating whether we should pre-compute
    canonical URL links.

    """

    # __Node-List-HTML / __Node-List-HTML-Canonical
    lines = []
    canonical_lines = []
    for child in node.get_children():
        # get link prefix/suffix
        link_prefix = node.get_link_prefix()
        link_suffix = node.get_link_suffix()

        # create the link to the child node
        line = link_prefix + child.get_link() + link_suffix
        lines.append(line)

        if use_canonical_url:
            c_line = link_prefix + child.get_canonical_link() + link_suffix
            canonical_lines.append(c_line)

    # add the HTML fragment with all the links to the node headers
    node.set_header("__node-list-html",
                    "\n".join(lines))

    # maybe add another HTML fragment with all the canonical links
    # to a different node header
    if use_canonical_url:
        node.set_header("__node-list-html-canonical",
                        "\n".join(canonical_lines))

    # recurse
    for child in node.get_children():
        create_child_node_list_html(child, use_canonical_url)

def augment_node_tree(project_root, root_node):
    """
    Augment the node tree with additional computed values before rendering.

    Accepts the project root directory, and the root Node.

    """

    # check to see if we should pre-compute canonical URL links
    use_canonical_url = False
    if root_node.has_header("canonical-url"):
        use_canonical_url = True

    # create tag-related things, if Tag-Node is set
    tag_node = root_node.get_tag_node()
    if tag_node is not None:
        # if the tag node doesn't have explicit created or modified times set,
        # default to now, because tags are dynamically generated anyway
        if not tag_node.has_header("created"):
            if not tag_node.has_header("modified"):
                now = datetime.datetime.fromtimestamp(
                        datetime.datetime.now().timestamp(),
                        datetime.datetime.now(datetime.UTC).astimezone().tzinfo)

                tag_node.created = now
                tag_node.modified = now

        # create tag nodes
        create_tag_node_tree(project_root, root_node, use_canonical_url)

        # create links to the tag pages, on all the nodes that have tags
        create_tag_links_recursive(root_node, use_canonical_url)

    # create {{list-node:*}} HTML fragment for each node
    create_child_node_list_html(root_node, use_canonical_url)

def render_node_tree(project_root, node):
    """
    Walk the tree of Node entries and render their contents in place.

    """

    # create a Page for this Node
    page = Page(project_root, node)

    # render the contents in place
    node.set_rendered_body(page.render())

    # recurse through the child nodes
    for child in node.get_children():
        render_node_tree(project_root, child)

def get_max_url_path_len(node, max_url_len=0, max_path_len=0):
    """
    Get the maximum URL and path lengths from all of the nodes,
    and return them as a two-element tuple.

    """

    # get the maximum length of the url and path for this node
    url_len = len(node.get_url())
    path_len = len(node.get_path())

    # raise the max values, if the new lengths are higher
    max_url_len = max(url_len, max_url_len)
    max_path_len = max(path_len, max_path_len)

    # recurse
    for child in node.get_children():
        (max_url_len, max_path_len) = \
            get_max_url_path_len(child, max_url_len, max_path_len)

    return (max_url_len, max_path_len)

def write_dynamic_nodes(project_root, node):
    """
    Write the dynamic node/template merged data into files under PUBLIC_ROOT.

    """

    # get the maximum URL and path string lengths
    (max_url_len, max_path_len) = get_max_url_path_len(node, 3, 4)

    # not a metasyntactic variable, an actual horizontal display bar
    bar = ("-" * 7) + "-+-" + \
          ("-" * max_path_len) + "-+-" + \
          ("-" * max_url_len)

    # write the nodes
    log("creating pages in '%s' from nodes and templates" % \
        (printable_path(os.path.join(project_root, PUBLIC_ROOT))))

    log(bar)
    log("type    | " + "node".ljust(max_path_len) + " | " + "url")
    log(bar)

    unique_urls = set()

    write_nodes(project_root, node, max_url_len, max_path_len, unique_urls)

    log(bar)

def write_nodes(project_root, node, max_url_len, max_path_len, unique_urls):
    """
    Recursive function to write the nodes.

    """

    # get the directory and file to use for this node
    dir = node.get_dest_dir()
    file = node.get_dest_file()

    # if the directory doesn't exist already, create it
    if not os.path.exists(dir):
        os.makedirs(dir)

    # log this node being written
    #
    # node types in the first column are "file" or "virtual",
    # so a maximum of 7 characters
    log("%s | %s | %s" % \
        (node.get_node_type().ljust(7),
         node.get_path().ljust(max_path_len),
         node.get_url()))

    # make sure the URL is unique before we write the node
    url = node.get_url()
    if url in unique_urls:
        err = "duplicate node URL '%s' in node '%s'"
        raise UrielError(err % (url, node.get_path()))
    unique_urls.add(url)

    # write the rendered file
    fw = FileWriter(file)
    fw.write(node.get_rendered_body())
    fw.close()

    # recurse into the node children
    for child in node.get_children():
        write_nodes(project_root,
                    child,
                    max_url_len,
                    max_path_len,
                    unique_urls)

def get_eligible_nodes(node, boolean_header, default, node_set):
    """
    Get nodes that have the given header set to true.

    Accepts a Node, the name of the header to check for true status,
    and a node set to store the eligible nodes.

    Accepts the following arguments:
        node            - root node to search
        boolean_header  - header name to check for boolean value
        default         - default boolean value to use if header not set
        node_set        - set the node will be added to if eligible

    """

    # leaf
    if node.get_boolean_header_value(boolean_header, default):
        node_set.add(node)

    # recurse
    for child in node.get_children():
        get_eligible_nodes(child, boolean_header, default, node_set)

def get_utc_offset():
    """
    Get the time zone offset as a three-element tuple of strings:

        (sign, hours, minutes)

    For example, if the local time is set to EST, this function will return:

        ("-", "05", "00")

    """

    # get the local time, then convert it to UTC and get the offset in seconds
    t = time.localtime()
    utc_offset_sec = calendar.timegm(t) - \
                     calendar.timegm(time.gmtime(time.mktime(t)))

    # we need to calculate a four digit UTC offset in hours and minutes,
    # with a + or - sign in front.
    if utc_offset_sec >= 0:
        sign = "+"
    else:
        sign = "-"

    # temporary variables
    hours = 0
    minutes = 0
    seconds = utc_offset_sec

    # if we have a positive offset, calculate the amount
    if seconds >= 0:
        while seconds >= (60 * 60):
            seconds -= (60 * 60)
            hours += 1
        while seconds >= 60:
            seconds -= 60
            minutes += 1

    # if we have a negative offset, calculate the amount
    else:
        while seconds <= (-1 * 60 * 60):
            seconds += (60 * 60)
            hours += 1
        while seconds <= (-1 * 60):
            seconds += 60
            minutes += 1

    hh = "%.2d" % (hours)
    mm = "%.2d" % (minutes)

    return (sign, hh, mm)

def get_rfc_2822_date(dt):
    """
    Gets a date string in RFC 2822 format.

    Accepts a datetime.datetime instance, without a timezone.

    Assumes that the datetime instance uses the local timezone.

    Returns a date string.

    """

    # get the UTC offset, and format it as a string for RFC 2822 format
    (sign, hh, mm) = get_utc_offset()

    return dt.strftime("%a, %d %b %Y %H:%M:%S ") + sign + hh + mm

def get_w3c_datetime(dt):
    """
    Gets a date string in W3C Datetime format.

    Accepts a datetime.datetime instance, without a timezone.

    Assumes that the datetime instance uses the local timezone.

    Returns a date string.

    """

    # get the UTC offset, and format it as a string for W3C Datetime format
    (sign, hh, mm) = get_utc_offset()

    return dt.strftime("%Y-%m-%dT%H:%M:%S") + sign + hh + ":" + mm

def get_rss_url(root_node):
    """
    Get the RSS URL (without a leading slash).

    Returns None if RSS-URL is not enabled.

    """

    # if RSS is not enabled, stop here
    if not root_node.has_header("rss-url"):
        return None

    # get the full path to the RSS file we're going to write on disk
    rss_url = root_node.get_header("rss-url")
    if -1 != rss_url.find("../"):
        raise UrielError("RSS-URL can not contain ../")
    if rss_url.startswith("/"):
        rss_url = rss_url[1:]

    return rss_url

def write_rss(project_root, root_node):
    """
    Write RSS feed to disk (if enabled).

    Accepts the project root, and the root Node.

    """

    # get the RSS URL
    rss_url = get_rss_url(root_node)

    # if RSS is not enabled, stop here
    if rss_url is None:
        return

    # get the path to the RSS file to write
    rss_file = os.path.join(root_node.get_dest_dir(), rss_url)

    # skip creating the file if there's a static file with the same name
    if os.path.exists(rss_file):
        log("skipping creation of '%s': file exists" %
            (printable_path(rss_file)))
        return

    log("creating '%s'" % (printable_path(rss_file)))

    # Canonical-URL
    if not root_node.has_header("canonical-url"):
        err = "Canonical-URL must be set on node '%s' to write RSS feed"
        raise UrielError(err % (root_node.get_path()))

    # RSS-Title / Title
    if root_node.has_header("rss-title"):
        title = root_node.get_header("rss-title")
    elif root_node.has_header("title"):
        title = root_node.get_header("title")
    else:
        err = "RSS-Title or Title must be set on node '%s' to write RSS feed"
        raise UrielError(err % (root_node.get_path()))

    # RSS-Description
    if root_node.has_header("rss-description"):
        description = root_node.get_header("rss-description")
    else:
        err = "RSS-Description must be set on node '%s' to write RSS feed"
        raise UrielError(err % (root_node.get_path()))

    # get all of the RSS-eligible nodes
    eligible_node_set = set()
    get_eligible_nodes(root_node, "rss-include", False, eligible_node_set)

    # keep track of which nodes we want to include
    nodes_to_write = []

    # pick out the most recent eligible nodes for inclusion
    item_count = 0
    for node in sorted(eligible_node_set):
        item_count += 1
        if item_count > RSS_MAX_ENTRIES:
            break

        nodes_to_write.append(node)

    # write the RSS file
    fw = FileWriter(rss_file)

    fw.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
    fw.write("<rss version=\"2.0\">\n")

    # channel
    fw.write("<channel>\n")

    # title
    fw.write("    <title>" + escape(title) + "</title>\n")

    # link
    fw.write("    <link>" + escape(root_node.get_canonical_url()) + \
              "</link>\n")

    # description
    fw.write("    <description>" + escape(description) + "</description>\n")

    # lastBuildDate
    fw.write("    <lastBuildDate>" + \
        escape(get_rfc_2822_date(datetime.datetime.now())) + \
        "</lastBuildDate>\n")

    # image
    if root_node.has_header("rss-image-url"):
        rss_image_url = root_node.get_header("rss-image-url")

        # if this isn't a remote RSS image URL, validate the local path
        if not rss_image_url.startswith("http"):
            if rss_image_url.startswith("/"):
                rss_image_url = rss_image_url[1:]

            # get the path to the RSS image on the filesystem
            rss_image_file = os.path.join(root_node.get_dest_dir(),
                                          rss_image_url)

            # if this is a local static file reference, make sure it exists
            if not os.path.exists(rss_image_file):
                raise UrielError(
                    "RSS image file not found: '%s' at path '%s'" % \
                    (rss_image_url, rss_image_file))

            # create canonical RSS image URL
            canonical_rss_image_url = root_node.get_canonical_url() + \
                rss_image_url

        # image
        fw.write("    <image>\n")

        # url
        fw.write("        <url>" + escape(canonical_rss_image_url) + \
            "</url>\n")

        # title
        fw.write("        <title>" + escape(title) + "</title>\n")

        # link
        fw.write("        <link>" + \
            escape(root_node.get_canonical_url()) + "</link>\n")

        # width
        if root_node.has_header("rss-image-width"):
            fw.write("        <width>" + \
                escape(root_node.get_header("rss-image-width")) + \
                "</width>\n")

        # height
        if root_node.has_header("rss-image-height"):
            fw.write("        <height>" + \
                escape(root_node.get_header("rss-image-height")) + \
                "</height>\n")

        fw.write("    </image>\n")

    # nodes
    for node in nodes_to_write:
        # render the node for RSS
        # this involves setting the template for the node to null
        # before rendering it, and then resetting the template back
        # to its original value
        template = None
        if node.has_header("template"):
            template = node.get_header("template")
        node.set_header("template", "null")

        # RSS-Add-Node-Title-Header
        add_node_title_header = \
            node.get_boolean_header_value("rss-add-node-title-header", True)

        # create page
        page = Page(project_root, node, use_canonical_url=True)

        # get node body
        rendered_node_body = page.render().rstrip()
        if add_node_title_header:
            description = \
                "<h1>" + escape(node.get_title()) + "</h1>\n\n" + \
                rendered_node_body
        else:
            description = rendered_node_body

        # set template back to its previous value, if any
        if template is not None:
            node.set_header("template", template)

        # item
        fw.write("    <item>\n")

        # title
        # (always escape the title, XML has stricter requirements)
        fw.write("        <title>" + escape(node.get_title()) + \
            "</title>\n")

        # link
        fw.write("        <link>" + escape(node.get_canonical_url()) + \
              "</link>\n")

        # description
        fw.write("        <description>")
        fw.write("<![CDATA[" + escape_xml(description) + "]]>")
        fw.write("</description>\n")

        # category
        for tag in sorted(node.get_tags()):
            fw.write("        <category>" + escape(tag) + \
                "</category>\n")

        # pubDate
        node_date = node.modified
        if node.created:
            node_date = node.created
        fw.write("        <pubDate>" + \
            escape(get_rfc_2822_date(node_date)) + \
            "</pubDate>\n")

        fw.write("    </item>\n")

    fw.write("</channel>\n")
    fw.write("</rss>\n")

    fw.close()

def get_sitemap_url(root_node):
    """
    Get the sitemap URL (without a leading slash).

    Returns None if Sitemap-URL is not enabled.

    """

    # if sitemap is not enabled, stop here
    if not root_node.has_header("sitemap-url"):
        return None

    # get the full path to the sitemap file we're going to write on disk
    sitemap_url = root_node.get_header("sitemap-url")
    if -1 != sitemap_url.find("../"):
        raise UrielError("Sitemap-URL can not contain ../")

    if sitemap_url.startswith("/"):
        sitemap_url = sitemap_url[1:]

    return sitemap_url

def write_sitemap(project_root, root_node):
    """
    Write sitemap file to disk (if enabled).

    Also writes a simple robots.txt file that points to the sitemap.

    Accepts the project root, and the root Node.

    """

    # get sitemap URL
    sitemap_url = get_sitemap_url(root_node)
    if sitemap_url is None:
        return

    # get the path to the sitemap file to write
    sitemap_file = os.path.join(root_node.get_dest_dir(), sitemap_url)

    # skip creating the file if there's a static file with the same name
    if os.path.exists(sitemap_file):
        log("skipping creation of '%s': file exists" %
            (printable_path(sitemap_file)))
        return

    # get all of the sitemap-eligible nodes
    eligible_node_set = set()
    get_eligible_nodes(root_node, "sitemap-include", True, eligible_node_set)

    log("creating '%s'" % (printable_path(sitemap_file)))

    # Canonical-URL
    if not root_node.has_header("canonical-url"):
        err = "Canonical-URL must be set on node '%s' to write sitemap"
        raise UrielError(err % (root_node.get_path()))

    # keep track of which nodes we want to include
    nodes_to_write = []

    # pick out the most recent eligible nodes for inclusion
    item_count = 0
    for node in sorted(eligible_node_set):
        item_count += 1
        if item_count > SITEMAP_MAX_ENTRIES:
            warn("warning: sitemap contents limited to most recent " + \
                str(SITEMAP_MAX_ENTRIES) + " entries")

            break

        nodes_to_write.append(node)

    # write the sitemap
    fw = FileWriter(sitemap_file)
    fw.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
    fw.write("<urlset " + \
        "xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n")

    # nodes
    for node in sorted(nodes_to_write):
        # url
        fw.write("    <url>\n")

        # loc
        fw.write("        <loc>" + escape(node.get_canonical_url()) + \
            "</loc>\n")

        # lastmod
        fw.write("        <lastmod>" + \
            escape_xml(get_w3c_datetime(node.modified)) + \
            "</lastmod>\n")

        fw.write("    </url>\n")

    fw.write("</urlset>\n")
    fw.close()

def write_robots_txt(project_root, root_node):
    """
    Write robots.txt file.

    Accepts the project_root, and the root Node.

    Only writes robots.txt if Sitemap-URL is enabled.

    """

    # get the path to the robots.txt file
    robots_txt_file = os.path.join(root_node.get_dest_dir(), "robots.txt")
    if os.path.exists(robots_txt_file):
        log("skipping creation of '%s': file exists" %
            (printable_path(robots_txt_file)))
        return

    # get the sitemap URL (without a leading slash)
    sitemap_url = get_sitemap_url(root_node)

    # if we don't have a sitemap, there's no point to creating this
    # particular form of robots.txt that only includes a Sitemap entry
    if sitemap_url is None:
        return

    # write robots.txt
    #
    # this is just enough to get the sitemap listed,
    # a real robots.txt on a production website might be better off
    # using this as a starting point and copying it into static/
    if not os.path.exists(robots_txt_file):
        log("creating '%s'" % (printable_path(robots_txt_file)))
        fw = FileWriter(robots_txt_file)
        fw.write("Sitemap: " + \
                root_node.get_canonical_url() + sitemap_url + "\n")
        fw.close()

def get_default_template_contents():
    """
    Get the contents of the initial default template, as a multi-line string.

    """

    lines = []

    lines.append("<!DOCTYPE html>")
    lines.append("<html lang=\"en-US\">")
    lines.append("<head>")
    lines.append("    <title>{{node:title}}</title>")
    lines.append("</head>")
    lines.append("")
    lines.append("<body>")
    lines.append("")
    lines.append("<h1>{{node:title}}</h1>")
    lines.append("")
    lines.append("<p>The value of the <i>Foo</i> header is: ")
    lines.append("<b>{{value:foo}}</b></p>")
    lines.append("")
    lines.append("{{node:body}}")
    lines.append("")
    lines.append("</body>")
    lines.append("</html>")
    lines.append("")

    return "\n".join(lines)

def get_default_index_node_contents():
    """
    Get the contents of the default index node, as a multi-line string.

    """

    lines = []

    lines.append("Title: Hello World")
    lines.append("Foo: bar")
    lines.append("")
    lines.append("<p>This page is generated from a combination of the ")
    lines.append("<i>index</i> node and the <i>default.html</i> ")
    lines.append("template.</p>")
    lines.append("")
    lines.append("<p>Replace this with your own content, etc.</p>")
    lines.append("")
    lines.append("<p>This page was generated by " + PROGRAM_NAME + "</p>")
    lines.append("")

    return "\n".join(lines)

def get_default_soju_contents():
    """
    Get the contents of the default soju.py file, as a multi-line string.

    """

    lines = []

    lines.append("#" * 78)
    lines.append("# soju.py" + (" " * 68) + "#")
    lines.append("#" * 78)
    lines.append("")
    lines.append("# The following symbols are imported using magic:")
    lines.append("#")
    lines.append("# import uriel")
    lines.append("# from uriel import SojuError")
    lines.append("# from uriel import log")
    lines.append("# from uriel import escape")
    lines.append("")
    lines.append("# The following variables are available to pass to " + \
                 "functions:")
    lines.append("#")
    lines.append("# page")
    lines.append("# node")
    lines.append("# project_root")
    lines.append("# use_canonical_url")
    lines.append("")
    lines.append("# {{soju:node_title(node)}}")
    lines.append("def node_title(node):")
    lines.append("    return escape(node.get_title())")
    lines.append("")
    lines.append("")

    return "\n".join(lines)

def get_default_handlers_contents():
    """
    Get the contents of the default handlers.py file, as a multi-line string.

    """

    lines = []

    lines.append("#" * 78)
    lines.append("# handlers.py" + (" " * 64) + "#")
    lines.append("#" * 78)
    lines.append("")
    lines.append("# The following symbols are imported using magic:")
    lines.append("#")
    lines.append("# import uriel")
    lines.append("# from uriel import Page")
    lines.append("# from uriel import Node")
    lines.append("# from uriel import FileNode")
    lines.append("# from uriel import VirtualNode")
    lines.append("# from uriel import HandlerError")
    lines.append("# from uriel import log")
    lines.append("# from uriel import escape")
    lines.append("")
    lines.append("#def init(project_root):")
    lines.append("#    pass")
    lines.append("")
    lines.append("#def before_render_node_tree(project_root, root_node):")
    lines.append("#    pass")
    lines.append("")
    lines.append("#def after_render_node_tree(project_root, root_node):")
    lines.append("#    pass")
    lines.append("")
    lines.append("#def cleanup(project_root, root_node):")
    lines.append("#    pass")
    lines.append("")
    lines.append("")

    return "\n".join(lines)

def get_default_makefile_contents():
    """
    Get the contents of the default Makefile, as a multi-line string.

    """

    lines = []

    lines.append("#" * 78)
    lines.append("# uriel project Makefile " + (" " * 52) + "#")
    lines.append("#" * 78)
    lines.append("")
    lines.append("# path to uriel")
    lines.append("URIEL=" + os.path.abspath(__file__))
    lines.append("")
    lines.append("# uriel project subdirectories")
    lines.append("STATIC=static")
    lines.append("NODES=nodes")
    lines.append("TEMPLATES=templates")
    lines.append("PUBLIC=public")
    lines.append("LIB=lib")
    lines.append("")
    lines.append("#" * 78)
    lines.append("# targets " + (" " * 67) + "#")
    lines.append("#" * 78)
    lines.append("")
    lines.append(".PHONY: site clean preview")
    lines.append("")
    lines.append("site:")
    lines.append("\t${URIEL} .")
    lines.append("")
    lines.append("clean:")
    lines.append("\trm -rf ${PUBLIC}/")
    lines.append("\trm -rf ${LIB}/__pycache__")
    lines.append("")
    lines.append("preview: site")
    lines.append("\tcd ${PUBLIC}/ && python3 -m http.server")
    lines.append("")
    lines.append("")

    return "\n".join(lines)

def init_project_root(project_root, project_root_just_created):
    """
    (Re)initialize the rendered web site public directory with only the
    static content, deleting everything else.

    Accepts the project root directory, and a boolean indicating whether the
    project root directory was just created for the first time.

    Create any missing directories and files required for the project:
        static      - static files to include in the website
        nodes       - dynamic node pages
        templates   - templates to merge with dynamic nodes
        public      - rendered website

    If the project root was just created (and therefore this is the initial
    creation of this project), then some additional non-required but helpful
    files will be created as well. If these files are deleted, they will
    not be recreated when uriel is run against the project again.

    """

    # section root directories
    static = os.path.join(project_root, STATIC_ROOT)
    nodes = os.path.join(project_root, NODES_ROOT)
    templates = os.path.join(project_root, TEMPLATES_ROOT)
    lib = os.path.join(project_root, LIB_ROOT)
    public = os.path.join(project_root, PUBLIC_ROOT)

    # create all of the required project directories, if they don't exist
    for dirname in [nodes, templates]:
        if not os.path.isdir(dirname):
            log("creating '%s'" % (printable_path(dirname)))
            try:
                os.mkdir(dirname)
            except Exception as e:
                err = "could not create directory '%s': '%s'"
                raise UrielError(err % (dir, str(e)))

    # create all of the optional project directories, if they don't exist
    if project_root_just_created:
        for dirname in [static, lib]:
            if not os.path.isdir(dirname):
                log("creating '%s'" % (printable_path(dirname)))
                try:
                    os.mkdir(dirname)
                except Exception as e:
                    err = "could not create directory '%s': '%s'"
                    raise UrielError(err % (dirname, str(e)))

    # if we have a static directory, copy it over the public directory,
    # deleting any previously generated contents in public
    if os.path.exists(static):
        log("copying '%s' to '%s', overwriting previous contents" %
            (printable_path(static), printable_path(public)))

        # (re)initialize public directory with only the static content
        copy_files_recursive_overwrite(static, public)

    # if we don't have static/, we need to get public/ into a good known
    # state, by deleting it and recreating it
    else:
        log("skipping static file copy, '%s' not found" %
            (printable_path(static)))

        # if we have a public directory, but no static directory, it means
        # that we effectively want to have an empty public directory, since
        # everything that could exist in there would have been created from
        # a previous run. the first step is to delete the public directory.
        if os.path.exists(public):
            log("deleting '%s'" % (printable_path(public)))

            # delete the directory
            delete_directory_recursive(public)

        log("creating '%s'" % (printable_path(public)))

        # create a new public directory
        try:
            os.mkdir(public)
        except Exception as e:
            err = "could not create directory '%s': '%s'"
            raise UrielError(err % (public, str(e)))

    # create a simple default template, if it doesn't already exist
    default_template = os.path.join(templates, DEFAULT_TEMPLATE)
    if not os.path.exists(default_template):
        log("creating '" + printable_path(default_template) + "'")
        fw = FileWriter(default_template)
        fw.write(get_default_template_contents())
        fw.close()

    # create a simple default home page node, if it doesn't already exist
    default_node = os.path.join(nodes, NODE_INDEX)
    if not os.path.exists(default_node):
        log("creating '" + printable_path(default_node) + "'")
        fw = FileWriter(default_node)
        fw.write(get_default_index_node_contents())
        fw.close()

    # optional files
    if project_root_just_created:
        # create a simple default soju file
        default_soju = os.path.join(lib, "soju.py")
        if not os.path.exists(default_soju):
            log("creating '" + printable_path(default_soju) + "'")
            fw = FileWriter(default_soju)
            fw.write(get_default_soju_contents())
            fw.close()

        # create a simple default handlers file
        default_handlers = os.path.join(lib, "handlers.py")
        if not os.path.exists(default_handlers):
            log("creating '" + printable_path(default_handlers) + "'")
            fw = FileWriter(default_handlers)
            fw.write(get_default_handlers_contents())
            fw.close()

        # create a simple Makefile
        default_makefile = os.path.join(project_root, "Makefile")
        if not os.path.exists(default_makefile):
            log("creating '" + printable_path(default_makefile) + "'")
            fw = FileWriter(default_makefile)
            fw.write(get_default_makefile_contents())
            fw.close()

def get_uriel_module():
    """
    Get a reference to ourselves as a module.

    """

    return sys.modules[__name__]

def init_modules(project_root):
    """
    Initialize the modules we can use under LIB_ROOT.

    """

    # get the path to the lib directory
    lib_dir = os.path.join(project_root, LIB_ROOT)

    # if the lib directory was deleted, skip initializing modules
    if not os.path.exists(lib_dir):
        log("skipping module initialization, '%s' not found" %
            (printable_path(lib_dir)))
        return

    # add the project lib dir to our module import path
    sys.path.insert(0, lib_dir)

    # initialize soju module
    lib_soju_file = os.path.join(lib_dir, "soju.py")
    if os.path.exists(lib_soju_file):
        log("initializing soju")
        import soju

        # export relevant symbols to soju
        soju.uriel = get_uriel_module()
        soju.SojuError = SojuError
        def soju_log(s):
            return log("soju: " + str(s))
        soju.log = soju_log
        soju.escape = escape

        # make this module available to the rest of the program
        globals()["soju"] = soju

    else:
        log("skipping module initialization, '%s' not found" %
            (printable_path(lib_soju_file)))

    # initialize handlers module
    lib_handlers_file = os.path.join(lib_dir, "handlers.py")
    if os.path.exists(lib_handlers_file):
        log("initializing handlers")
        import handlers

        # export relevant symbols to handlers
        handlers.uriel = get_uriel_module()
        handlers.Page = Page
        handlers.Node = Node
        handlers.FileNode = FileNode
        handlers.VirtualNode = VirtualNode
        handlers.HandlerError = HandlerError
        def handlers_log(s):
            return log("handler: " + str(s))
        handlers.log = handlers_log
        handlers.escape = escape

        # make this module available to the rest of the program
        globals()["handlers"] = handlers

    else:
        log("skipping module initialization, '%s' not found" %
            (printable_path(lib_handlers_file)))

def write_additional_files(project_root, node):
    """
    Write additional files after the nodes have been generated.

    """

    # write RSS feed out to disk (if enabled)
    write_rss(project_root, node)

    # write sitemap out to disk (if enabled)
    write_sitemap(project_root, node)

    # write robots.txt (if sitemap is enabled)
    write_robots_txt(project_root, node)

def copy_static_files(project_root):
    """
    Copy the static files into the public website, without modification.

    """

    # create path to src directory
    src = os.path.join(project_root, STATIC_ROOT)

    # if we don't have a static directory, just skip this step
    if not os.path.exists(src):
        # if we got here, this would be the second time the user has
        # been notified that this directory doesn't exist, so skip
        # the log message as well
        return

    # create path to dest directory
    dest = os.path.join(project_root, PUBLIC_ROOT)

    log("copying '%s' to '%s'" %
        (printable_path(src), printable_path(dest)))

    # copy the files
    copy_files_recursive(src, dest)

def create_project_root(project_root):
    """
    Create the project root directory, if it doesn't exist.

    Returns True if the project root directory didn't exist before and was
    newly created.

    Returns False if the project root directory already existed.

    Raises a UrielError if we tried to create the project root, but failed.

    """

    if not os.path.exists(project_root):
        try:
            log("creating '%s'" % (printable_path(project_root)))
            os.mkdir(project_root)
        except Exception as e:
            raise UrielError("could not create project root '%s'" % \
                             (project_root))

        # we just created the project root directory
        return True

    # the project root directory already existed
    return False

def call_handler(handler_name, *args):
    """
    Run a user-defined handler.

    Accepts the handler name, function reference, project_root, node.

    """

    # is the handlers module loaded?
    if "handlers" not in globals():
        return

    # is the specified handler defined in the handlers module?
    if handler_name not in dir(globals()["handlers"]):
        return

    # create a reference to the user-defined handler function we want to call
    handler_function = eval("globals()['handlers']." + handler_name)

    log("running handler: %s" % (handler_name))

    try:
        handler_function(*args)
    except HandlerError as e:
        die("%s handler error: %s" % (handler_name, get_exception_reason(e)))
    except Exception as e:
        log("%s handler error: %s" % (handler_name, get_exception_reason(e)))
        raise

def handle_project(project_root):
    """
    Handle whatever needs to be done for the project.

    If the project_root does not exist, then create a new project.

    If the project_root does exist, then build it.

    Exits the program upon completion, with a 0 exit code for
    success, and a 1 exit code on failure.

    """

    try:
        # create project root directory (if necessary)
        project_root_just_created = create_project_root(project_root)

        # initialize the project root
        init_project_root(project_root, project_root_just_created)

        # initialize modules under lib/
        init_modules(project_root)

        # handler: init
        call_handler("init", project_root)

        # create a tree of all the file-based nodes (in memory)
        log("reading node files")
        node = create_file_node_tree(project_root)

        # handler: before_render_node_tree
        call_handler("before_render_node_tree",
                     project_root,
                     node)

        # augment nodes with additional values before rendering
        augment_node_tree(project_root, node)

        # render nodes using templates (in memory)
        log("rendering node content")
        render_node_tree(project_root, node)

        # handler: after_render_node_tree
        call_handler("after_render_node_tree",
                     project_root,
                     node)

        # write rendered node pages out to disk
        write_dynamic_nodes(project_root, node)

        # write additional files
        write_additional_files(project_root, node)

        # copy static files into the public directory
        copy_static_files(project_root)

        # handler: cleanup
        call_handler("cleanup", project_root, node)

    except SojuError as e:
        log("soju: " + get_exception_reason(e))
        sys_exit(EXIT_FAIL)

    except UrielError as e:
        die(get_exception_reason(e))

    except KeyboardInterrupt:
        sys_exit(EXIT_FAIL)

    sys_exit(EXIT_OK)

def main():
    """
    Main entry point into the program.

    """

    # show usage information if we didn't get exactly one argument
    if 2 != len(sys.argv):
        show_usage()

    # get the project root
    project_root = sys.argv[1]

    # short and long options are not supported, only the project root
    # if the user passes in -h or --help or similar, show usage and exit
    if project_root.startswith("-"):
        show_usage()

    # now that we have the project root, handle this run for the project
    handle_project(project_root)

if __name__ == "__main__":
    main()
